rley 0.8.09 → 0.8.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/rley/constants.rb +1 -1
- data/lib/rley/rgn/tokenizer.rb +67 -56
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 92dd793350853b0466c7d541d8e19bd5d03b661f6bc207836155968b8580584b
|
4
|
+
data.tar.gz: c1583a4668d945c55ab7b687748eec224adb16721a4fd108813abb61d4f3356a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 948a5292ff798277c50e9a2b1829e9c0afb47886d7b1e4887409a98bcb08e490d493d3c672563b965f074756ce553f3a00c4729270a983f25b9b4a0389ab3505
|
7
|
+
data.tar.gz: d372ccc4cac0643c535759db3f63e2f5fa604dbc80fd72a85a9fb3f867b26a0a31d4a7e70925da0f2845f6e89d764efc886f5895ef1592798a13c76923839466
|
data/lib/rley/constants.rb
CHANGED
data/lib/rley/rgn/tokenizer.rb
CHANGED
@@ -14,6 +14,13 @@ module Rley
|
|
14
14
|
# Delimiters: e.g. parentheses '(', ')'
|
15
15
|
# Separators: e.g. comma
|
16
16
|
class Tokenizer
|
17
|
+
PATT_KEY = /[a-zA-Z_][a-zA-Z_0-9]*:/.freeze
|
18
|
+
PATT_INTEGER = /\d+/.freeze
|
19
|
+
PATT_NEWLINE = /(?:\r\n)|\r|\n/.freeze
|
20
|
+
PATT_STRING_START = /"|'/.freeze
|
21
|
+
PATT_SYMBOL = /[^?*+,:(){}\s]+/.freeze
|
22
|
+
PATT_WHITESPACE = /[ \t\f]+/.freeze
|
23
|
+
|
17
24
|
# @return [StringScanner] Low-level input scanner
|
18
25
|
attr_reader(:scanner)
|
19
26
|
|
@@ -24,7 +31,7 @@ module Rley
|
|
24
31
|
attr_reader(:line_start)
|
25
32
|
|
26
33
|
# One or two special character tokens.
|
27
|
-
|
34
|
+
Lexeme2name = {
|
28
35
|
'(' => 'LEFT_PAREN',
|
29
36
|
')' => 'RIGHT_PAREN',
|
30
37
|
'{' => 'LEFT_BRACE',
|
@@ -44,16 +51,16 @@ module Rley
|
|
44
51
|
# Constructor. Initialize a tokenizer for RGN input.
|
45
52
|
# @param source [String] RGN text to tokenize.
|
46
53
|
def initialize(source = nil)
|
47
|
-
|
48
|
-
|
54
|
+
reset
|
55
|
+
input = source || ''
|
56
|
+
@scanner = StringScanner.new(input)
|
49
57
|
end
|
50
58
|
|
51
59
|
# Reset the tokenizer and make the given text, the current input.
|
52
60
|
# @param source [String] RGN text to tokenize.
|
53
61
|
def start_with(source)
|
62
|
+
reset
|
54
63
|
@scanner.string = source
|
55
|
-
@lineno = 1
|
56
|
-
@line_start = 0
|
57
64
|
end
|
58
65
|
|
59
66
|
# Scan the source and return an array of tokens.
|
@@ -65,47 +72,67 @@ module Rley
|
|
65
72
|
tok_sequence << token unless token.nil?
|
66
73
|
end
|
67
74
|
|
68
|
-
|
75
|
+
tok_sequence
|
69
76
|
end
|
70
77
|
|
71
78
|
private
|
72
79
|
|
73
|
-
def
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
curr_ch = scanner.peek(1)
|
78
|
-
return nil if curr_ch.nil? || curr_ch.empty?
|
80
|
+
def reset
|
81
|
+
@lineno = 1
|
82
|
+
@line_start = 0
|
83
|
+
end
|
79
84
|
|
85
|
+
def _next_token
|
80
86
|
token = nil
|
87
|
+
ws_found = false
|
81
88
|
|
82
|
-
|
83
|
-
|
84
|
-
token = build_token(@@lexeme2name[curr_ch], scanner.getch)
|
85
|
-
elsif '?*+,'.include? curr_ch # modifier character
|
86
|
-
# modifiers without prefix text are symbols
|
87
|
-
symb = ws_found ? 'SYMBOL' : @@lexeme2name[curr_ch]
|
88
|
-
token = build_token(symb, scanner.getch)
|
89
|
-
elsif (lexeme = scanner.scan(/\.\./))
|
90
|
-
# One or two special character tokens
|
91
|
-
token = build_token(@@lexeme2name[lexeme], lexeme)
|
92
|
-
elsif scanner.check(/"|'/) # Start of string detected...
|
93
|
-
token = build_string_token
|
94
|
-
elsif (lexeme = scanner.scan(/\d+/))
|
95
|
-
token = build_token('INT_LIT', lexeme)
|
96
|
-
elsif (lexeme = scanner.scan(/[a-zA-Z_][a-zA-Z_0-9]*:/))
|
97
|
-
keyw = @@keywords[lexeme.chop!]
|
98
|
-
token = build_token('KEY', lexeme) if keyw
|
99
|
-
# ... error case
|
100
|
-
elsif (lexeme = scanner.scan(/[^?*+,:(){}\s]+/))
|
101
|
-
token = build_token('SYMBOL', lexeme)
|
102
|
-
else # Unknown token
|
103
|
-
col = scanner.pos - @line_start + 1
|
104
|
-
_erroneous = curr_ch.nil? ? '' : scanner.scan(/./)
|
105
|
-
raise ScanError, "Error: [line #{lineno}:#{col}]: Unexpected character."
|
106
|
-
end
|
89
|
+
# Loop until end of input reached or token found
|
90
|
+
until token || scanner.eos?
|
107
91
|
|
108
|
-
|
92
|
+
nl_found = scanner.skip(PATT_NEWLINE)
|
93
|
+
if nl_found
|
94
|
+
next_line_scanned
|
95
|
+
next
|
96
|
+
end
|
97
|
+
if scanner.skip(PATT_WHITESPACE) # Skip whitespaces
|
98
|
+
ws_found = true
|
99
|
+
next
|
100
|
+
end
|
101
|
+
|
102
|
+
curr_ch = scanner.peek(1)
|
103
|
+
|
104
|
+
if '(){},'.include? curr_ch
|
105
|
+
# Single delimiter, separator or character
|
106
|
+
token = build_token(Lexeme2name[curr_ch], scanner.getch)
|
107
|
+
elsif '?*+,'.include? curr_ch # modifier character
|
108
|
+
# modifiers without prefix text are symbols
|
109
|
+
symb = (ws_found || nl_found) ? 'SYMBOL' : Lexeme2name[curr_ch]
|
110
|
+
token = build_token(symb, scanner.getch)
|
111
|
+
elsif (lexeme = scanner.scan(/\.\./))
|
112
|
+
# One or two special character tokens
|
113
|
+
token = build_token(Lexeme2name[lexeme], lexeme)
|
114
|
+
elsif scanner.check(PATT_STRING_START) # Start of string detected...
|
115
|
+
token = build_string_token
|
116
|
+
elsif (lexeme = scanner.scan(PATT_INTEGER))
|
117
|
+
token = build_token('INT_LIT', lexeme)
|
118
|
+
elsif (lexeme = scanner.scan(PATT_KEY))
|
119
|
+
keyw = @@keywords[lexeme.chop!]
|
120
|
+
token = build_token('KEY', lexeme) if keyw
|
121
|
+
# ... error case
|
122
|
+
elsif (lexeme = scanner.scan(PATT_SYMBOL))
|
123
|
+
token = build_token('SYMBOL', lexeme)
|
124
|
+
else # Unknown token
|
125
|
+
col = scanner.pos - @line_start + 1
|
126
|
+
_erroneous = curr_ch.nil? ? '' : scanner.scan(/./)
|
127
|
+
raise ScanError, "Error: [line #{lineno}:#{col}]: Unexpected character."
|
128
|
+
end
|
129
|
+
ws_found = false
|
130
|
+
end # until
|
131
|
+
|
132
|
+
# unterminated(@string_start.line, @string_start.column) if state == :multiline
|
133
|
+
token
|
134
|
+
|
135
|
+
# return token
|
109
136
|
end
|
110
137
|
|
111
138
|
def build_token(aSymbolName, aLexeme)
|
@@ -154,24 +181,8 @@ module Rley
|
|
154
181
|
Rley::Lexical::Token.new(literal, 'STR_LIT', pos)
|
155
182
|
end
|
156
183
|
|
157
|
-
#
|
158
|
-
|
159
|
-
def skip_intertoken_spaces
|
160
|
-
loop do
|
161
|
-
ws_found = scanner.skip(/[ \t\f]+/) ? true : false
|
162
|
-
nl_found = scanner.skip(/(?:\r\n)|\r|\n/)
|
163
|
-
if nl_found
|
164
|
-
ws_found = true
|
165
|
-
next_line
|
166
|
-
end
|
167
|
-
|
168
|
-
break unless ws_found
|
169
|
-
end
|
170
|
-
|
171
|
-
scanner.pos
|
172
|
-
end
|
173
|
-
|
174
|
-
def next_line
|
184
|
+
# Event: next line detected.
|
185
|
+
def next_line_scanned
|
175
186
|
@lineno += 1
|
176
187
|
@line_start = scanner.pos
|
177
188
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rley
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.8.
|
4
|
+
version: 0.8.10
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dimitri Geshef
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-04-08 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|