rley 0.8.09 → 0.8.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9734bc9875c8931a5cd5c5497df1a77a3c938076c86d2748557b8ec901d09de6
4
- data.tar.gz: 20a2a6a9b88752645cf731f6790b7f89729fb108f13701048a6cc8a2c65521f6
3
+ metadata.gz: 92dd793350853b0466c7d541d8e19bd5d03b661f6bc207836155968b8580584b
4
+ data.tar.gz: c1583a4668d945c55ab7b687748eec224adb16721a4fd108813abb61d4f3356a
5
5
  SHA512:
6
- metadata.gz: e203c3d6cf1b4f8b32a16af06ffe2a4548f8a53c37a2677fd13e8b3695a1e482f7d46177b3861d0b5c5b6139827133d09d1213821a7ec91b8dd805fcf3eac4dd
7
- data.tar.gz: 43be3973376040fed3a9db55207d68171a0a6084ad9a102abcd26f47d60600c8aa9165a134c67b0f443e73e4e0cdd2d5a65e5271f63081a21f83aea0550b4474
6
+ metadata.gz: 948a5292ff798277c50e9a2b1829e9c0afb47886d7b1e4887409a98bcb08e490d493d3c672563b965f074756ce553f3a00c4729270a983f25b9b4a0389ab3505
7
+ data.tar.gz: d372ccc4cac0643c535759db3f63e2f5fa604dbc80fd72a85a9fb3f867b26a0a31d4a7e70925da0f2845f6e89d764efc886f5895ef1592798a13c76923839466
@@ -5,7 +5,7 @@
5
5
 
6
6
  module Rley # Module used as a namespace
7
7
  # The version number of the gem.
8
- Version = '0.8.09'
8
+ Version = '0.8.10'
9
9
 
10
10
  # Brief description of the gem.
11
11
  Description = "Ruby implementation of the Earley's parsing algorithm"
@@ -14,6 +14,13 @@ module Rley
14
14
  # Delimiters: e.g. parentheses '(', ')'
15
15
  # Separators: e.g. comma
16
16
  class Tokenizer
17
+ PATT_KEY = /[a-zA-Z_][a-zA-Z_0-9]*:/.freeze
18
+ PATT_INTEGER = /\d+/.freeze
19
+ PATT_NEWLINE = /(?:\r\n)|\r|\n/.freeze
20
+ PATT_STRING_START = /"|'/.freeze
21
+ PATT_SYMBOL = /[^?*+,:(){}\s]+/.freeze
22
+ PATT_WHITESPACE = /[ \t\f]+/.freeze
23
+
17
24
  # @return [StringScanner] Low-level input scanner
18
25
  attr_reader(:scanner)
19
26
 
@@ -24,7 +31,7 @@ module Rley
24
31
  attr_reader(:line_start)
25
32
 
26
33
  # One or two special character tokens.
27
- @@lexeme2name = {
34
+ Lexeme2name = {
28
35
  '(' => 'LEFT_PAREN',
29
36
  ')' => 'RIGHT_PAREN',
30
37
  '{' => 'LEFT_BRACE',
@@ -44,16 +51,16 @@ module Rley
44
51
  # Constructor. Initialize a tokenizer for RGN input.
45
52
  # @param source [String] RGN text to tokenize.
46
53
  def initialize(source = nil)
47
- @scanner = StringScanner.new('')
48
- start_with(source) if source
54
+ reset
55
+ input = source || ''
56
+ @scanner = StringScanner.new(input)
49
57
  end
50
58
 
51
59
  # Reset the tokenizer and make the given text, the current input.
52
60
  # @param source [String] RGN text to tokenize.
53
61
  def start_with(source)
62
+ reset
54
63
  @scanner.string = source
55
- @lineno = 1
56
- @line_start = 0
57
64
  end
58
65
 
59
66
  # Scan the source and return an array of tokens.
@@ -65,47 +72,67 @@ module Rley
65
72
  tok_sequence << token unless token.nil?
66
73
  end
67
74
 
68
- return tok_sequence
75
+ tok_sequence
69
76
  end
70
77
 
71
78
  private
72
79
 
73
- def _next_token
74
- pos_before = scanner.pos
75
- skip_intertoken_spaces
76
- ws_found = true if scanner.pos > pos_before
77
- curr_ch = scanner.peek(1)
78
- return nil if curr_ch.nil? || curr_ch.empty?
80
+ def reset
81
+ @lineno = 1
82
+ @line_start = 0
83
+ end
79
84
 
85
+ def _next_token
80
86
  token = nil
87
+ ws_found = false
81
88
 
82
- if '(){},'.include? curr_ch
83
- # Single delimiter, separator or character
84
- token = build_token(@@lexeme2name[curr_ch], scanner.getch)
85
- elsif '?*+,'.include? curr_ch # modifier character
86
- # modifiers without prefix text are symbols
87
- symb = ws_found ? 'SYMBOL' : @@lexeme2name[curr_ch]
88
- token = build_token(symb, scanner.getch)
89
- elsif (lexeme = scanner.scan(/\.\./))
90
- # One or two special character tokens
91
- token = build_token(@@lexeme2name[lexeme], lexeme)
92
- elsif scanner.check(/"|'/) # Start of string detected...
93
- token = build_string_token
94
- elsif (lexeme = scanner.scan(/\d+/))
95
- token = build_token('INT_LIT', lexeme)
96
- elsif (lexeme = scanner.scan(/[a-zA-Z_][a-zA-Z_0-9]*:/))
97
- keyw = @@keywords[lexeme.chop!]
98
- token = build_token('KEY', lexeme) if keyw
99
- # ... error case
100
- elsif (lexeme = scanner.scan(/[^?*+,:(){}\s]+/))
101
- token = build_token('SYMBOL', lexeme)
102
- else # Unknown token
103
- col = scanner.pos - @line_start + 1
104
- _erroneous = curr_ch.nil? ? '' : scanner.scan(/./)
105
- raise ScanError, "Error: [line #{lineno}:#{col}]: Unexpected character."
106
- end
89
+ # Loop until end of input reached or token found
90
+ until token || scanner.eos?
107
91
 
108
- return token
92
+ nl_found = scanner.skip(PATT_NEWLINE)
93
+ if nl_found
94
+ next_line_scanned
95
+ next
96
+ end
97
+ if scanner.skip(PATT_WHITESPACE) # Skip whitespaces
98
+ ws_found = true
99
+ next
100
+ end
101
+
102
+ curr_ch = scanner.peek(1)
103
+
104
+ if '(){},'.include? curr_ch
105
+ # Single delimiter, separator or character
106
+ token = build_token(Lexeme2name[curr_ch], scanner.getch)
107
+ elsif '?*+,'.include? curr_ch # modifier character
108
+ # modifiers without prefix text are symbols
109
+ symb = (ws_found || nl_found) ? 'SYMBOL' : Lexeme2name[curr_ch]
110
+ token = build_token(symb, scanner.getch)
111
+ elsif (lexeme = scanner.scan(/\.\./))
112
+ # One or two special character tokens
113
+ token = build_token(Lexeme2name[lexeme], lexeme)
114
+ elsif scanner.check(PATT_STRING_START) # Start of string detected...
115
+ token = build_string_token
116
+ elsif (lexeme = scanner.scan(PATT_INTEGER))
117
+ token = build_token('INT_LIT', lexeme)
118
+ elsif (lexeme = scanner.scan(PATT_KEY))
119
+ keyw = @@keywords[lexeme.chop!]
120
+ token = build_token('KEY', lexeme) if keyw
121
+ # ... error case
122
+ elsif (lexeme = scanner.scan(PATT_SYMBOL))
123
+ token = build_token('SYMBOL', lexeme)
124
+ else # Unknown token
125
+ col = scanner.pos - @line_start + 1
126
+ _erroneous = curr_ch.nil? ? '' : scanner.scan(/./)
127
+ raise ScanError, "Error: [line #{lineno}:#{col}]: Unexpected character."
128
+ end
129
+ ws_found = false
130
+ end # until
131
+
132
+ # unterminated(@string_start.line, @string_start.column) if state == :multiline
133
+ token
134
+
135
+ # return token
109
136
  end
110
137
 
111
138
  def build_token(aSymbolName, aLexeme)
@@ -154,24 +181,8 @@ module Rley
154
181
  Rley::Lexical::Token.new(literal, 'STR_LIT', pos)
155
182
  end
156
183
 
157
- # Skip non-significant whitespaces and comments.
158
- # Advance the scanner until something significant is found.
159
- def skip_intertoken_spaces
160
- loop do
161
- ws_found = scanner.skip(/[ \t\f]+/) ? true : false
162
- nl_found = scanner.skip(/(?:\r\n)|\r|\n/)
163
- if nl_found
164
- ws_found = true
165
- next_line
166
- end
167
-
168
- break unless ws_found
169
- end
170
-
171
- scanner.pos
172
- end
173
-
174
- def next_line
184
+ # Event: next line detected.
185
+ def next_line_scanned
175
186
  @lineno += 1
176
187
  @line_start = scanner.pos
177
188
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rley
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.8.09
4
+ version: 0.8.10
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dimitri Geshef
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-01-28 00:00:00.000000000 Z
11
+ date: 2022-04-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake