rubylexer 0.7.5 → 0.7.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,3 +1,16 @@
1
+ === 0.7.6/7-01-2009
2
+ * 5 Bugfixes:
3
+ * don't treat <, <=, <=> as starting variables (only << for here header)
4
+ * space between break/return/next and following open paren is ignored
5
+ * fixed unusual whitespace is unlikely places (module header)
6
+ * some parentheses weren't being decorated right
7
+ * rescue should not end implicit parameter lists... unless its an op
8
+
9
+ * 3 new ruby 1.9 features:
10
+ * stabby blocks
11
+ * dot at beginning of line
12
+ * !, !=, !~ are now valid method/symbol names
13
+
1
14
  === 0.7.5/5-23-2009
2
15
  * 1 Bugfix:
3
16
  * fixed problem with parsing shebang lines
data/Rakefile CHANGED
@@ -30,7 +30,7 @@ require 'lib/rubylexer/version.rb'
30
30
  _.test_globs=["test/code/regression.rb"]
31
31
  _.description=desc
32
32
  _.summary=desc[/\A[^.]+\./]
33
- _.spec_extras={:bindir=>'',:rdoc_options=>'-x lib/rubylexer/test'}
33
+ _.spec_extras={:bindir=>'',:rdoc_options=>'-x lib/rubylexer/test/oneliners.rb'}
34
34
  #_.rdoc_pattern=/\A(howtouse\.txt|testing\.txt|README\.txt|lib\/[^\/]*\.rb|lib\/rubylexer\/[^\d][^\/]*\.rb)\Z/
35
35
  end
36
36
 
@@ -170,6 +170,11 @@ class RubyLexer
170
170
  @progress_thread=nil
171
171
  @rubyversion=options[:rubyversion]
172
172
  @encoding=options[:encoding]||:detect
173
+ @method_operators=if @rubyversion>=1.9
174
+ /#{RUBYSYMOPERATORREX}|\A![=~]?\Z/o
175
+ else
176
+ RUBYSYMOPERATORREX
177
+ end
173
178
 
174
179
  @toptable=CharHandler.new(self, :illegal_char, CHARMAPPINGS)
175
180
 
@@ -670,9 +675,9 @@ private
670
675
  maybe_local=false
671
676
  lastid=lasttok&&lasttok.ident
672
677
  case lastid
673
- when /\A[;(]|do\Z/: was_after_nonid_op=false
674
- when '|': was_after_nonid_op=false unless BlockParamListLhsContext===@parsestack.last
675
- when '{': was_after_nonid_op=false if BlockContext===@parsestack.last or BeginEndContext===@parsestack.last
678
+ when /\A[;(]|do\Z/; was_after_nonid_op=false
679
+ when '|'; was_after_nonid_op=false unless BlockParamListLhsContext===@parsestack.last
680
+ when '{'; was_after_nonid_op=false if BlockContext===@parsestack.last or BeginEndContext===@parsestack.last
676
681
  end if KeywordToken===lasttok
677
682
  was_after_nonid_op=false if NewlineToken===lasttok or lasttok.nil?
678
683
  want_parens=!(ws_toks.empty? or was_after_nonid_op) #or
@@ -752,20 +757,21 @@ private
752
757
  result.unshift ImplicitParamListStartToken.new(oldpos),
753
758
  ImplicitParamListEndToken.new(oldpos)
754
759
  when 1,3;
755
- arr,pass=*param_list_coming_with_2_or_more_params?
756
- result.push( *arr )
757
- unless pass
760
+ if /^(break|next|return)$/===name and
761
+ !(KeywordToken===lasttok and /^(\.|::)$/===lasttok.ident)
758
762
  #only 1 param in list
759
763
  result.unshift ImplicitParamListStartToken.new(oldpos)
760
- last=result.last
761
- last.set_callsite! false if last.respond_to? :callsite? and last.callsite? #KeywordToken===last and last.ident==')'
762
- if /^(break|next|return)$/===name and
763
- !(KeywordToken===lasttok and /^(\.|::)$/===lasttok.ident)
764
- ty=KWParamListContextNoParen
765
- else
766
- ty=ParamListContextNoParen
764
+ @parsestack.push ParamListContextNoParen.new(@linenum)
765
+ else
766
+ arr,pass=*param_list_coming_with_2_or_more_params?
767
+ result.push( *arr )
768
+ unless pass
769
+ #only 1 param in list
770
+ result.unshift ImplicitParamListStartToken.new(oldpos)
771
+ last=result.last
772
+ last.set_callsite! false if last.respond_to? :callsite? and last.callsite?
773
+ @parsestack.push ParamListContextNoParen.new(@linenum)
767
774
  end
768
- @parsestack.push ty.new(@linenum)
769
775
  end
770
776
  when 0; #do nothing
771
777
  else raise 'invalid value of implicit_parens_to_emit'
@@ -782,8 +788,11 @@ private
782
788
  end
783
789
 
784
790
  #-----------------------------------
791
+ #read ahead to see if there's method param list (with real parentheses)
792
+ #and 2 or more parameters (and hence a comma to separate them)
793
+ #ugly, lexer recursion
785
794
  def param_list_coming_with_2_or_more_params?
786
- WHSPCHARS[prevchar] && (?(==nextchar) or return [[],false]
795
+ return [[],false] unless WHSPCHARS[prevchar] && (?(==nextchar)
787
796
  basesize=@parsestack.size
788
797
  result=[get1token]
789
798
  pass=loop{
@@ -799,6 +808,8 @@ private
799
808
  lexerror tok, "unexpected eof in parameter list"
800
809
  end
801
810
  }
811
+ result.concat @moretokens
812
+ @moretokens.replace []
802
813
  return [result,pass]
803
814
  end
804
815
 
@@ -907,22 +918,22 @@ private
907
918
  #-----------------------------------
908
919
  @@SPACES=/[\ \t\v\f\v]/
909
920
  @@WSTOK=/\r?\n|\r*#@@SPACES+(?:#@@SPACES|\r(?!\n))*|\#[^\n]*\n|\\\r?\n|
910
- ^=begin[\s\n](?:(?!=end).*\n)*=end[\s\n].*\n/x
921
+ ^=begin(?:[\s].*)?\n(?:(?!=end).*\n)*=end[\s\n].*\n/x
911
922
  @@WSTOKS=/(?!=begin)#@@WSTOK+/o
912
923
  def divide_ws(ws,offset)
913
924
  result=[]
914
925
  ws.scan(/\G#@@WSTOK/o){|ws|
915
926
  incr= $~.begin(0)
916
927
  klass=case ws
917
- when /\A[\#=]/: CommentToken
918
- when /\n\Z/: EscNlToken
928
+ when /\A[\#=]/; CommentToken
929
+ when /\n\Z/; EscNlToken
919
930
  else WsToken
920
931
  end
921
932
  result << klass.new(ws,offset+incr)
922
933
  }
923
934
  result.each_with_index{|ws,i|
924
935
  if WsToken===ws
925
- ws.ident << result.delete(i+1).ident while WsToken===result[i+1]
936
+ ws.ident << result.delete_at(i+1).ident while WsToken===result[i+1]
926
937
  end
927
938
  }
928
939
  return result
@@ -1052,15 +1063,21 @@ private
1052
1063
 
1053
1064
  def keyword_do(str,offset,result)
1054
1065
  result.unshift(*abort_noparens_for_do!(str))
1055
- if ExpectDoOrNlContext===@parsestack.last
1066
+ ctx=@parsestack.last
1067
+ if ExpectDoOrNlContext===ctx
1056
1068
  @parsestack.pop
1057
1069
  assert WantsEndContext===@parsestack.last
1058
1070
  result.last.as=";"
1059
1071
  else
1060
1072
  result.last.has_end!
1061
- @parsestack.push WantsEndContext.new(str,@linenum)
1062
- localvars.start_block
1063
- block_param_list_lookahead
1073
+ if BlockContext===ctx and ctx.wanting_stabby_block_body
1074
+ ctx.wanting_stabby_block_body=false
1075
+ ctx.starter,ctx.ender="do","end"
1076
+ else
1077
+ @parsestack.push WantsEndContext.new(str,@linenum)
1078
+ localvars.start_block
1079
+ block_param_list_lookahead
1080
+ end
1064
1081
  end
1065
1082
  return result
1066
1083
  end
@@ -1262,7 +1279,7 @@ private
1262
1279
  result.push KwParamListStartToken.new(offset+str.length)
1263
1280
  #corresponding EndToken emitted by abort_noparens! on leaving rescue context
1264
1281
  @parsestack.push RescueSMContext.new(@linenum)
1265
- result.unshift(*abort_noparens!(str))
1282
+ # result.unshift(*abort_noparens!(str))
1266
1283
  end
1267
1284
  return result
1268
1285
  end
@@ -1386,16 +1403,17 @@ private
1386
1403
 
1387
1404
 
1388
1405
  #-----------------------------------
1389
- def block_param_list_lookahead
1406
+ def block_param_list_lookahead starter=?|, ctx_type=BlockParamListLhsContext
1390
1407
  safe_recurse{ |la|
1391
1408
  set_last_token KeywordToken.new( ';' )
1392
1409
  a=ignored_tokens
1393
1410
 
1394
- if eat_next_if(?|)
1395
- a<< KeywordToken.new("|", input_position-1)
1411
+ if eat_next_if(starter)
1412
+ mycontext=ctx_type.new(@linenum)
1413
+ a<< KeywordToken.new(mycontext.starter, input_position-1)
1396
1414
  if true
1397
- @parsestack.push mycontext=BlockParamListLhsContext.new(@linenum)
1398
- nextchar==?| and a.push NoWsToken.new(input_position)
1415
+ @parsestack.push mycontext
1416
+ nextchar==mycontext.ender[0] and a.push NoWsToken.new(input_position)
1399
1417
  else
1400
1418
  if eat_next_if(?|)
1401
1419
  a.concat [NoWsToken.new(input_position-1),
@@ -1430,8 +1448,11 @@ else
1430
1448
  a<< KeywordToken.new('|',tok.offset)
1431
1449
  @moretokens.empty? or
1432
1450
  fixme %#moretokens might be set from get1token call above...might be bad#
1433
- end
1434
1451
  end
1452
+ end
1453
+ elsif starter==?(
1454
+ ctx_type=UnparenedParamListLhsContext #hacky... should be a param?
1455
+ @parsestack.push ctx_type.new(@linenum)
1435
1456
  end
1436
1457
 
1437
1458
  set_last_token KeywordToken.new( ';' )
@@ -1594,7 +1615,8 @@ end
1594
1615
  when AssignmentRhsContext; result.tag= :rhs
1595
1616
  when ParamListContext,ParamListContextNoParen; #:call
1596
1617
  when ListImmedContext; #:array
1597
- when BlockParamListLhsContext; #:block
1618
+ when BlockParamListLhsContext,UnparenedParamListLhsContext; #:block or stabby proc
1619
+ when ParenedParamListLhsContext; #:stabby proc or method def'n?
1598
1620
  when KnownNestedLhsParenContext; #:nested
1599
1621
  else result.tag= :lhs if cill
1600
1622
  end
@@ -1647,7 +1669,8 @@ end
1647
1669
 
1648
1670
  s=tok.to_s
1649
1671
  case s
1650
- when /^[@$<]/; true
1672
+ when /^[@$]/; true
1673
+ when /^<</; HerePlaceholderToken===tok
1651
1674
  when /(?!#@@LETTER_DIGIT).$/o; false
1652
1675
  # when /^#@@LCLETTER/o; localvars===s or VARLIKE_KEYWORDS===s
1653
1676
  when /^#@@LETTER/o; VarNameToken===tok
@@ -1714,8 +1737,9 @@ end
1714
1737
  end
1715
1738
  when RescueSMContext
1716
1739
  tok.as=";"
1717
- end or
1740
+ end or
1718
1741
  fail ": not expected in #{@parsestack.last.class}->#{@parsestack.last.starter}"
1742
+
1719
1743
 
1720
1744
  #end ternary context, if any
1721
1745
  @parsestack.last.see self,:colon
@@ -1748,7 +1772,7 @@ end
1748
1772
  klass=(notbare ? SymbolToken : MethNameToken)
1749
1773
 
1750
1774
  #look for operators
1751
- opmatches=readahead(3)[RUBYSYMOPERATORREX]
1775
+ opmatches=readahead(3)[@method_operators]
1752
1776
  result= opmatches ? read(opmatches.size) :
1753
1777
  case nc=nextchar
1754
1778
  when ?" #"
@@ -1782,27 +1806,31 @@ end
1782
1806
  return result
1783
1807
  end
1784
1808
 
1809
+ #-----------------------------------
1785
1810
  def merge_assignment_op_in_setter_callsites?
1786
1811
  false
1787
1812
  end
1813
+
1788
1814
  #-----------------------------------
1789
1815
  def callsite_symbol(tok_to_errify)
1790
1816
  start= input_position
1791
1817
 
1792
1818
  #look for operators
1793
- opmatches=readahead(3)[RUBYSYMOPERATORREX]
1794
- return [opmatches ? read(opmatches.size) :
1795
- case nc=nextchar
1796
- when ?` then read(1) #`
1797
- when ?_,?a..?z,?A..?Z,NONASCII then
1819
+ opmatches=readahead(3)[@method_operators]
1820
+ return [read(opmatches.size), start] if opmatches
1821
+ case nc=nextchar
1822
+ when ?` #`
1823
+ return [read(1),start]
1824
+ when ?_,?a..?z,?A..?Z,NONASCII
1798
1825
  context=merge_assignment_op_in_setter_callsites? ? ?: : nc
1799
- identifier_as_string(context)
1800
- else
1801
- set_last_token KeywordToken.new(';')
1802
- lexerror(tok_to_errify,"unexpected char starting callsite symbol: #{nc.chr}, tok=#{tok_to_errify.inspect}")
1803
- nil
1804
- end, start
1805
- ]
1826
+ return [identifier_as_string(context), start]
1827
+ when ?(
1828
+ return [nil,start] if @enable_macro
1829
+ end
1830
+
1831
+ set_last_token KeywordToken.new(';')
1832
+ lexerror(tok_to_errify,"unexpected char starting callsite symbol: #{nc.chr}, tok=#{tok_to_errify.inspect}")
1833
+ return [nil, start]
1806
1834
  end
1807
1835
 
1808
1836
  #-----------------------------------
@@ -2112,13 +2140,15 @@ end
2112
2140
  pre=FileAndLineToken.new(@filename,@linenum+1,input_position)
2113
2141
  pre.allow_ooo_offset=true
2114
2142
 
2115
- if NewlineToken===@last_operative_token or #hack
2116
- (KeywordToken===@last_operative_token and
2117
- @last_operative_token.ident=="rescue" and
2118
- !@last_operative_token.infix?) or
2119
- #/^(;|begin|do|#{INNERBOUNDINGWORDS})$/ or #hack
2120
- !after_nonid_op?{false}
2121
- then #hack-o-rama: probly cases left out above
2143
+ hard=NewlineToken===@last_operative_token || #hack
2144
+ (KeywordToken===@last_operative_token and
2145
+ @last_operative_token.ident=="rescue" and
2146
+ !@last_operative_token.infix?) ||
2147
+ !after_nonid_op?{false}
2148
+
2149
+ hard=false if @rubyversion>=1.9 and @file.check /\A\n(?:#@@WSTOKS)?\.[^.]/o
2150
+
2151
+ if hard
2122
2152
  @offset_adjust=@min_offset_adjust
2123
2153
  a= abort_noparens!
2124
2154
  case @parsestack.last #these should be in the see:semi handler
@@ -2402,6 +2432,15 @@ end
2402
2432
  /^(return|break|next)$/===@last_operative_token.ident
2403
2433
  if (?0..?9)===readahead(2)[1]
2404
2434
  return number(ch)
2435
+ elsif @rubyversion>=1.9 and '->' == readahead(2) #stabby proc
2436
+ #push down block context
2437
+ localvars.start_block
2438
+ @parsestack.push ctx=BlockContext.new(@linenum)
2439
+ ctx.wanting_stabby_block_body=true
2440
+ #read optional proc params
2441
+ block_param_list_lookahead ?(, ParenedParamListLhsContext
2442
+
2443
+
2405
2444
  else #unary operator
2406
2445
  result=getchar
2407
2446
  WHSPLF[nextchar.chr] or
@@ -2575,8 +2614,13 @@ end
2575
2614
  when '('
2576
2615
  lasttok=last_token_maybe_implicit #last_operative_token
2577
2616
  #could be: lasttok===/^#@@LETTER/o
2578
- if (VarNameToken===lasttok or MethNameToken===lasttok or
2579
- lasttok===FUNCLIKE_KEYWORDS)
2617
+ method_params= (
2618
+ VarNameToken===lasttok or
2619
+ MethNameToken===lasttok or
2620
+ lasttok===FUNCLIKE_KEYWORDS or
2621
+ (@enable_macro and lasttok and lasttok.ident==')')
2622
+ )
2623
+ if method_params
2580
2624
  unless WHSPCHARS[lastchar]
2581
2625
  @moretokens << tokch
2582
2626
  tokch= NoWsToken.new(input_position-1)
@@ -2589,6 +2633,8 @@ end
2589
2633
  !(KeywordToken===lasttok && lasttok.ident=="def")
2590
2634
  if maybe_def or
2591
2635
  BlockParamListLhsContext===ctx or
2636
+ ParenedParamListLhsContext===ctx or
2637
+ UnparenedParamListLhsContext===ctx or
2592
2638
  ParenContext===ctx && ctx.lhs
2593
2639
  @parsestack.push KnownNestedLhsParenContext.new(@linenum)
2594
2640
  else
@@ -2614,9 +2660,13 @@ end
2614
2660
  end
2615
2661
  #=end
2616
2662
 
2617
- localvars.start_block
2618
- @parsestack.push BlockContext.new(@linenum)
2619
- block_param_list_lookahead
2663
+ if BlockContext===@parsestack.last and @parsestack.last.wanting_stabby_block_body
2664
+ @parsestack.last.wanting_stabby_block_body=false
2665
+ else
2666
+ localvars.start_block
2667
+ @parsestack.push BlockContext.new(@linenum)
2668
+ block_param_list_lookahead
2669
+ end
2620
2670
  end
2621
2671
  end
2622
2672
  return (tokch)
@@ -2645,6 +2695,10 @@ end
2645
2695
  assert ch==')'
2646
2696
  kw.set_callsite! #not needed?
2647
2697
  end
2698
+ if ParenedParamListLhsContext===ctx
2699
+ assert @parsestack.last.wanting_stabby_block_body
2700
+ assert ch==')'
2701
+ end
2648
2702
  return @moretokens.shift
2649
2703
  end
2650
2704
 
@@ -2705,7 +2759,8 @@ end
2705
2759
  when AssignmentRhsContext; token.tag=:rhs
2706
2760
  when ParamListContext,ParamListContextNoParen; #:call
2707
2761
  when ListImmedContext; #:array
2708
- when BlockParamListLhsContext; #:block
2762
+ when BlockParamListLhsContext,UnparenedParamListLhsContext; #:block or stabby proc
2763
+ when ParenedParamListLhsContext; #stabby proc or method def'n?
2709
2764
  when KnownNestedLhsParenContext; #:nested
2710
2765
  else
2711
2766
  token.tag=:lhs if comma_in_lvalue_list?
@@ -73,6 +73,7 @@ module NestedContexts
73
73
  def initialize(linenum)
74
74
  super('{','}',linenum)
75
75
  end
76
+ attr_accessor :wanting_stabby_block_body
76
77
  end
77
78
 
78
79
  class BeginEndContext < NestedContext
@@ -109,6 +110,16 @@ module NestedContexts
109
110
  def ender; '|' end
110
111
  end
111
112
 
113
+ class ParenedParamListLhsContext < ImplicitLhsContext
114
+ def starter; '(' end
115
+ def ender; ')' end
116
+ end
117
+
118
+ class UnparenedParamListLhsContext < ImplicitLhsContext
119
+ def starter; huh end #" " ???
120
+ def ender; huh end #; or \n when from method def, { or do when from stabby block
121
+ end
122
+
112
123
  class ImplicitContext < ListContext
113
124
  end
114
125
 
@@ -1,3 +1,3 @@
1
1
  class RubyLexer
2
- VERSION='0.7.5'
2
+ VERSION='0.7.6'
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rubylexer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.5
4
+ version: 0.7.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Caleb Clausen
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: ""
10
10
  cert_chain: []
11
11
 
12
- date: 2009-05-23 00:00:00 -07:00
12
+ date: 2009-07-06 00:00:00 -07:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
@@ -166,7 +166,7 @@ has_rdoc: true
166
166
  homepage: http://github.com/coatl/rubylexer/
167
167
  post_install_message:
168
168
  rdoc_options:
169
- - -x lib/rubylexer/test
169
+ - -x lib/rubylexer/test/oneliners.rb
170
170
  require_paths:
171
171
  - lib
172
172
  required_ruby_version: !ruby/object:Gem::Requirement