rley 0.8.09 → 0.8.10
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/rley/constants.rb +1 -1
- data/lib/rley/rgn/tokenizer.rb +67 -56
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 92dd793350853b0466c7d541d8e19bd5d03b661f6bc207836155968b8580584b
|
4
|
+
data.tar.gz: c1583a4668d945c55ab7b687748eec224adb16721a4fd108813abb61d4f3356a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 948a5292ff798277c50e9a2b1829e9c0afb47886d7b1e4887409a98bcb08e490d493d3c672563b965f074756ce553f3a00c4729270a983f25b9b4a0389ab3505
|
7
|
+
data.tar.gz: d372ccc4cac0643c535759db3f63e2f5fa604dbc80fd72a85a9fb3f867b26a0a31d4a7e70925da0f2845f6e89d764efc886f5895ef1592798a13c76923839466
|
data/lib/rley/constants.rb
CHANGED
data/lib/rley/rgn/tokenizer.rb
CHANGED
@@ -14,6 +14,13 @@ module Rley
|
|
14
14
|
# Delimiters: e.g. parentheses '(', ')'
|
15
15
|
# Separators: e.g. comma
|
16
16
|
class Tokenizer
|
17
|
+
PATT_KEY = /[a-zA-Z_][a-zA-Z_0-9]*:/.freeze
|
18
|
+
PATT_INTEGER = /\d+/.freeze
|
19
|
+
PATT_NEWLINE = /(?:\r\n)|\r|\n/.freeze
|
20
|
+
PATT_STRING_START = /"|'/.freeze
|
21
|
+
PATT_SYMBOL = /[^?*+,:(){}\s]+/.freeze
|
22
|
+
PATT_WHITESPACE = /[ \t\f]+/.freeze
|
23
|
+
|
17
24
|
# @return [StringScanner] Low-level input scanner
|
18
25
|
attr_reader(:scanner)
|
19
26
|
|
@@ -24,7 +31,7 @@ module Rley
|
|
24
31
|
attr_reader(:line_start)
|
25
32
|
|
26
33
|
# One or two special character tokens.
|
27
|
-
|
34
|
+
Lexeme2name = {
|
28
35
|
'(' => 'LEFT_PAREN',
|
29
36
|
')' => 'RIGHT_PAREN',
|
30
37
|
'{' => 'LEFT_BRACE',
|
@@ -44,16 +51,16 @@ module Rley
|
|
44
51
|
# Constructor. Initialize a tokenizer for RGN input.
|
45
52
|
# @param source [String] RGN text to tokenize.
|
46
53
|
def initialize(source = nil)
|
47
|
-
|
48
|
-
|
54
|
+
reset
|
55
|
+
input = source || ''
|
56
|
+
@scanner = StringScanner.new(input)
|
49
57
|
end
|
50
58
|
|
51
59
|
# Reset the tokenizer and make the given text, the current input.
|
52
60
|
# @param source [String] RGN text to tokenize.
|
53
61
|
def start_with(source)
|
62
|
+
reset
|
54
63
|
@scanner.string = source
|
55
|
-
@lineno = 1
|
56
|
-
@line_start = 0
|
57
64
|
end
|
58
65
|
|
59
66
|
# Scan the source and return an array of tokens.
|
@@ -65,47 +72,67 @@ module Rley
|
|
65
72
|
tok_sequence << token unless token.nil?
|
66
73
|
end
|
67
74
|
|
68
|
-
|
75
|
+
tok_sequence
|
69
76
|
end
|
70
77
|
|
71
78
|
private
|
72
79
|
|
73
|
-
def
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
curr_ch = scanner.peek(1)
|
78
|
-
return nil if curr_ch.nil? || curr_ch.empty?
|
80
|
+
def reset
|
81
|
+
@lineno = 1
|
82
|
+
@line_start = 0
|
83
|
+
end
|
79
84
|
|
85
|
+
def _next_token
|
80
86
|
token = nil
|
87
|
+
ws_found = false
|
81
88
|
|
82
|
-
|
83
|
-
|
84
|
-
token = build_token(@@lexeme2name[curr_ch], scanner.getch)
|
85
|
-
elsif '?*+,'.include? curr_ch # modifier character
|
86
|
-
# modifiers without prefix text are symbols
|
87
|
-
symb = ws_found ? 'SYMBOL' : @@lexeme2name[curr_ch]
|
88
|
-
token = build_token(symb, scanner.getch)
|
89
|
-
elsif (lexeme = scanner.scan(/\.\./))
|
90
|
-
# One or two special character tokens
|
91
|
-
token = build_token(@@lexeme2name[lexeme], lexeme)
|
92
|
-
elsif scanner.check(/"|'/) # Start of string detected...
|
93
|
-
token = build_string_token
|
94
|
-
elsif (lexeme = scanner.scan(/\d+/))
|
95
|
-
token = build_token('INT_LIT', lexeme)
|
96
|
-
elsif (lexeme = scanner.scan(/[a-zA-Z_][a-zA-Z_0-9]*:/))
|
97
|
-
keyw = @@keywords[lexeme.chop!]
|
98
|
-
token = build_token('KEY', lexeme) if keyw
|
99
|
-
# ... error case
|
100
|
-
elsif (lexeme = scanner.scan(/[^?*+,:(){}\s]+/))
|
101
|
-
token = build_token('SYMBOL', lexeme)
|
102
|
-
else # Unknown token
|
103
|
-
col = scanner.pos - @line_start + 1
|
104
|
-
_erroneous = curr_ch.nil? ? '' : scanner.scan(/./)
|
105
|
-
raise ScanError, "Error: [line #{lineno}:#{col}]: Unexpected character."
|
106
|
-
end
|
89
|
+
# Loop until end of input reached or token found
|
90
|
+
until token || scanner.eos?
|
107
91
|
|
108
|
-
|
92
|
+
nl_found = scanner.skip(PATT_NEWLINE)
|
93
|
+
if nl_found
|
94
|
+
next_line_scanned
|
95
|
+
next
|
96
|
+
end
|
97
|
+
if scanner.skip(PATT_WHITESPACE) # Skip whitespaces
|
98
|
+
ws_found = true
|
99
|
+
next
|
100
|
+
end
|
101
|
+
|
102
|
+
curr_ch = scanner.peek(1)
|
103
|
+
|
104
|
+
if '(){},'.include? curr_ch
|
105
|
+
# Single delimiter, separator or character
|
106
|
+
token = build_token(Lexeme2name[curr_ch], scanner.getch)
|
107
|
+
elsif '?*+,'.include? curr_ch # modifier character
|
108
|
+
# modifiers without prefix text are symbols
|
109
|
+
symb = (ws_found || nl_found) ? 'SYMBOL' : Lexeme2name[curr_ch]
|
110
|
+
token = build_token(symb, scanner.getch)
|
111
|
+
elsif (lexeme = scanner.scan(/\.\./))
|
112
|
+
# One or two special character tokens
|
113
|
+
token = build_token(Lexeme2name[lexeme], lexeme)
|
114
|
+
elsif scanner.check(PATT_STRING_START) # Start of string detected...
|
115
|
+
token = build_string_token
|
116
|
+
elsif (lexeme = scanner.scan(PATT_INTEGER))
|
117
|
+
token = build_token('INT_LIT', lexeme)
|
118
|
+
elsif (lexeme = scanner.scan(PATT_KEY))
|
119
|
+
keyw = @@keywords[lexeme.chop!]
|
120
|
+
token = build_token('KEY', lexeme) if keyw
|
121
|
+
# ... error case
|
122
|
+
elsif (lexeme = scanner.scan(PATT_SYMBOL))
|
123
|
+
token = build_token('SYMBOL', lexeme)
|
124
|
+
else # Unknown token
|
125
|
+
col = scanner.pos - @line_start + 1
|
126
|
+
_erroneous = curr_ch.nil? ? '' : scanner.scan(/./)
|
127
|
+
raise ScanError, "Error: [line #{lineno}:#{col}]: Unexpected character."
|
128
|
+
end
|
129
|
+
ws_found = false
|
130
|
+
end # until
|
131
|
+
|
132
|
+
# unterminated(@string_start.line, @string_start.column) if state == :multiline
|
133
|
+
token
|
134
|
+
|
135
|
+
# return token
|
109
136
|
end
|
110
137
|
|
111
138
|
def build_token(aSymbolName, aLexeme)
|
@@ -154,24 +181,8 @@ module Rley
|
|
154
181
|
Rley::Lexical::Token.new(literal, 'STR_LIT', pos)
|
155
182
|
end
|
156
183
|
|
157
|
-
#
|
158
|
-
|
159
|
-
def skip_intertoken_spaces
|
160
|
-
loop do
|
161
|
-
ws_found = scanner.skip(/[ \t\f]+/) ? true : false
|
162
|
-
nl_found = scanner.skip(/(?:\r\n)|\r|\n/)
|
163
|
-
if nl_found
|
164
|
-
ws_found = true
|
165
|
-
next_line
|
166
|
-
end
|
167
|
-
|
168
|
-
break unless ws_found
|
169
|
-
end
|
170
|
-
|
171
|
-
scanner.pos
|
172
|
-
end
|
173
|
-
|
174
|
-
def next_line
|
184
|
+
# Event: next line detected.
|
185
|
+
def next_line_scanned
|
175
186
|
@lineno += 1
|
176
187
|
@line_start = scanner.pos
|
177
188
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rley
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.8.
|
4
|
+
version: 0.8.10
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dimitri Geshef
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-04-08 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|