rley 0.8.09 → 0.8.10

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9734bc9875c8931a5cd5c5497df1a77a3c938076c86d2748557b8ec901d09de6
4
- data.tar.gz: 20a2a6a9b88752645cf731f6790b7f89729fb108f13701048a6cc8a2c65521f6
3
+ metadata.gz: 92dd793350853b0466c7d541d8e19bd5d03b661f6bc207836155968b8580584b
4
+ data.tar.gz: c1583a4668d945c55ab7b687748eec224adb16721a4fd108813abb61d4f3356a
5
5
  SHA512:
6
- metadata.gz: e203c3d6cf1b4f8b32a16af06ffe2a4548f8a53c37a2677fd13e8b3695a1e482f7d46177b3861d0b5c5b6139827133d09d1213821a7ec91b8dd805fcf3eac4dd
7
- data.tar.gz: 43be3973376040fed3a9db55207d68171a0a6084ad9a102abcd26f47d60600c8aa9165a134c67b0f443e73e4e0cdd2d5a65e5271f63081a21f83aea0550b4474
6
+ metadata.gz: 948a5292ff798277c50e9a2b1829e9c0afb47886d7b1e4887409a98bcb08e490d493d3c672563b965f074756ce553f3a00c4729270a983f25b9b4a0389ab3505
7
+ data.tar.gz: d372ccc4cac0643c535759db3f63e2f5fa604dbc80fd72a85a9fb3f867b26a0a31d4a7e70925da0f2845f6e89d764efc886f5895ef1592798a13c76923839466
@@ -5,7 +5,7 @@
5
5
 
6
6
  module Rley # Module used as a namespace
7
7
  # The version number of the gem.
8
- Version = '0.8.09'
8
+ Version = '0.8.10'
9
9
 
10
10
  # Brief description of the gem.
11
11
  Description = "Ruby implementation of the Earley's parsing algorithm"
@@ -14,6 +14,13 @@ module Rley
14
14
  # Delimiters: e.g. parentheses '(', ')'
15
15
  # Separators: e.g. comma
16
16
  class Tokenizer
17
+ PATT_KEY = /[a-zA-Z_][a-zA-Z_0-9]*:/.freeze
18
+ PATT_INTEGER = /\d+/.freeze
19
+ PATT_NEWLINE = /(?:\r\n)|\r|\n/.freeze
20
+ PATT_STRING_START = /"|'/.freeze
21
+ PATT_SYMBOL = /[^?*+,:(){}\s]+/.freeze
22
+ PATT_WHITESPACE = /[ \t\f]+/.freeze
23
+
17
24
  # @return [StringScanner] Low-level input scanner
18
25
  attr_reader(:scanner)
19
26
 
@@ -24,7 +31,7 @@ module Rley
24
31
  attr_reader(:line_start)
25
32
 
26
33
  # One or two special character tokens.
27
- @@lexeme2name = {
34
+ Lexeme2name = {
28
35
  '(' => 'LEFT_PAREN',
29
36
  ')' => 'RIGHT_PAREN',
30
37
  '{' => 'LEFT_BRACE',
@@ -44,16 +51,16 @@ module Rley
44
51
  # Constructor. Initialize a tokenizer for RGN input.
45
52
  # @param source [String] RGN text to tokenize.
46
53
  def initialize(source = nil)
47
- @scanner = StringScanner.new('')
48
- start_with(source) if source
54
+ reset
55
+ input = source || ''
56
+ @scanner = StringScanner.new(input)
49
57
  end
50
58
 
51
59
  # Reset the tokenizer and make the given text, the current input.
52
60
  # @param source [String] RGN text to tokenize.
53
61
  def start_with(source)
62
+ reset
54
63
  @scanner.string = source
55
- @lineno = 1
56
- @line_start = 0
57
64
  end
58
65
 
59
66
  # Scan the source and return an array of tokens.
@@ -65,47 +72,67 @@ module Rley
65
72
  tok_sequence << token unless token.nil?
66
73
  end
67
74
 
68
- return tok_sequence
75
+ tok_sequence
69
76
  end
70
77
 
71
78
  private
72
79
 
73
- def _next_token
74
- pos_before = scanner.pos
75
- skip_intertoken_spaces
76
- ws_found = true if scanner.pos > pos_before
77
- curr_ch = scanner.peek(1)
78
- return nil if curr_ch.nil? || curr_ch.empty?
80
+ def reset
81
+ @lineno = 1
82
+ @line_start = 0
83
+ end
79
84
 
85
+ def _next_token
80
86
  token = nil
87
+ ws_found = false
81
88
 
82
- if '(){},'.include? curr_ch
83
- # Single delimiter, separator or character
84
- token = build_token(@@lexeme2name[curr_ch], scanner.getch)
85
- elsif '?*+,'.include? curr_ch # modifier character
86
- # modifiers without prefix text are symbols
87
- symb = ws_found ? 'SYMBOL' : @@lexeme2name[curr_ch]
88
- token = build_token(symb, scanner.getch)
89
- elsif (lexeme = scanner.scan(/\.\./))
90
- # One or two special character tokens
91
- token = build_token(@@lexeme2name[lexeme], lexeme)
92
- elsif scanner.check(/"|'/) # Start of string detected...
93
- token = build_string_token
94
- elsif (lexeme = scanner.scan(/\d+/))
95
- token = build_token('INT_LIT', lexeme)
96
- elsif (lexeme = scanner.scan(/[a-zA-Z_][a-zA-Z_0-9]*:/))
97
- keyw = @@keywords[lexeme.chop!]
98
- token = build_token('KEY', lexeme) if keyw
99
- # ... error case
100
- elsif (lexeme = scanner.scan(/[^?*+,:(){}\s]+/))
101
- token = build_token('SYMBOL', lexeme)
102
- else # Unknown token
103
- col = scanner.pos - @line_start + 1
104
- _erroneous = curr_ch.nil? ? '' : scanner.scan(/./)
105
- raise ScanError, "Error: [line #{lineno}:#{col}]: Unexpected character."
106
- end
89
+ # Loop until end of input reached or token found
90
+ until token || scanner.eos?
107
91
 
108
- return token
92
+ nl_found = scanner.skip(PATT_NEWLINE)
93
+ if nl_found
94
+ next_line_scanned
95
+ next
96
+ end
97
+ if scanner.skip(PATT_WHITESPACE) # Skip whitespaces
98
+ ws_found = true
99
+ next
100
+ end
101
+
102
+ curr_ch = scanner.peek(1)
103
+
104
+ if '(){},'.include? curr_ch
105
+ # Single delimiter, separator or character
106
+ token = build_token(Lexeme2name[curr_ch], scanner.getch)
107
+ elsif '?*+,'.include? curr_ch # modifier character
108
+ # modifiers without prefix text are symbols
109
+ symb = (ws_found || nl_found) ? 'SYMBOL' : Lexeme2name[curr_ch]
110
+ token = build_token(symb, scanner.getch)
111
+ elsif (lexeme = scanner.scan(/\.\./))
112
+ # One or two special character tokens
113
+ token = build_token(Lexeme2name[lexeme], lexeme)
114
+ elsif scanner.check(PATT_STRING_START) # Start of string detected...
115
+ token = build_string_token
116
+ elsif (lexeme = scanner.scan(PATT_INTEGER))
117
+ token = build_token('INT_LIT', lexeme)
118
+ elsif (lexeme = scanner.scan(PATT_KEY))
119
+ keyw = @@keywords[lexeme.chop!]
120
+ token = build_token('KEY', lexeme) if keyw
121
+ # ... error case
122
+ elsif (lexeme = scanner.scan(PATT_SYMBOL))
123
+ token = build_token('SYMBOL', lexeme)
124
+ else # Unknown token
125
+ col = scanner.pos - @line_start + 1
126
+ _erroneous = curr_ch.nil? ? '' : scanner.scan(/./)
127
+ raise ScanError, "Error: [line #{lineno}:#{col}]: Unexpected character."
128
+ end
129
+ ws_found = false
130
+ end # until
131
+
132
+ # unterminated(@string_start.line, @string_start.column) if state == :multiline
133
+ token
134
+
135
+ # return token
109
136
  end
110
137
 
111
138
  def build_token(aSymbolName, aLexeme)
@@ -154,24 +181,8 @@ module Rley
154
181
  Rley::Lexical::Token.new(literal, 'STR_LIT', pos)
155
182
  end
156
183
 
157
- # Skip non-significant whitespaces and comments.
158
- # Advance the scanner until something significant is found.
159
- def skip_intertoken_spaces
160
- loop do
161
- ws_found = scanner.skip(/[ \t\f]+/) ? true : false
162
- nl_found = scanner.skip(/(?:\r\n)|\r|\n/)
163
- if nl_found
164
- ws_found = true
165
- next_line
166
- end
167
-
168
- break unless ws_found
169
- end
170
-
171
- scanner.pos
172
- end
173
-
174
- def next_line
184
+ # Event: next line detected.
185
+ def next_line_scanned
175
186
  @lineno += 1
176
187
  @line_start = scanner.pos
177
188
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rley
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.8.09
4
+ version: 0.8.10
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dimitri Geshef
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-01-28 00:00:00.000000000 Z
11
+ date: 2022-04-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake