srl_ruby 0.4.11 → 0.4.12
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +26 -17
- data/CHANGELOG.md +8 -0
- data/Rakefile +5 -6
- data/cucumber.yml +1 -1
- data/lib/regex/atomic_expression.rb +1 -1
- data/lib/regex/char_class.rb +1 -3
- data/lib/regex/char_range.rb +1 -1
- data/lib/regex/char_shorthand.rb +1 -1
- data/lib/regex/character.rb +5 -9
- data/lib/regex/concatenation.rb +1 -3
- data/lib/regex/expression.rb +2 -3
- data/lib/regex/lookaround.rb +1 -2
- data/lib/regex/match_option.rb +2 -2
- data/lib/regex/monadic_expression.rb +1 -3
- data/lib/regex/multiplicity.rb +4 -4
- data/lib/regex/non_capturing_group.rb +1 -2
- data/lib/regex/polyadic_expression.rb +2 -4
- data/lib/regex/quantifiable.rb +1 -1
- data/lib/regex/repetition.rb +1 -2
- data/lib/srl_ruby/ast_builder.rb +10 -12
- data/lib/srl_ruby/tokenizer.rb +58 -65
- data/lib/srl_ruby/version.rb +1 -1
- data/lib/srl_ruby.rb +2 -2
- data/spec/acceptance/support/rule_file_ast_builder.rb +1 -1
- data/spec/acceptance/support/rule_file_parser.rb +2 -3
- data/spec/acceptance/support/rule_file_tokenizer.rb +5 -7
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d1bd1a0bcdf03aad4c52a4df83bc252cf3f99ec3d9156c1d49a073f020f4c59c
|
4
|
+
data.tar.gz: 38de02e4dc7a6d7cf99a0d62f76adb89642e7785871834303f2c5df754d50dc4
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 636462883d5d49e79bde56104fface3d68694a9e6e99ffd7329bb3f990707a4cf15373546ad93c7e0fb135605f50bce96b2cefe64936fbb690a57273f8103b7f
|
7
|
+
data.tar.gz: 561f4d2aed5fb6c425095ca666128fe00f15b72ddf84d6b27f5ab6778d0a6b0a5657e69b7c71651c11b7d346cbfc914b3c42bc491ae3626e04d9841828190ac4
|
data/.rubocop.yml
CHANGED
@@ -5,9 +5,9 @@ AllCops:
|
|
5
5
|
|
6
6
|
Gemspec/DateAssignment:
|
7
7
|
Enabled: true
|
8
|
-
|
8
|
+
|
9
9
|
Gemspec/RequireMFA: # new in 1.23
|
10
|
-
Enabled: false
|
10
|
+
Enabled: false
|
11
11
|
|
12
12
|
Layout/ArgumentAlignment:
|
13
13
|
Enabled: false
|
@@ -134,9 +134,9 @@ Lint/RaiseException:
|
|
134
134
|
|
135
135
|
Lint/RedundantDirGlobSort:
|
136
136
|
Enabled: true
|
137
|
-
|
137
|
+
|
138
138
|
Lint/RefinementImportMethods: # new in 1.27
|
139
|
-
Enabled: true
|
139
|
+
Enabled: true
|
140
140
|
|
141
141
|
Lint/RequireRelativeSelfPath: # new in 1.22
|
142
142
|
Enabled: true
|
@@ -167,9 +167,9 @@ Lint/UnusedMethodArgument:
|
|
167
167
|
|
168
168
|
Lint/UselessAccessModifier:
|
169
169
|
Enabled: true
|
170
|
-
|
170
|
+
|
171
171
|
Lint/UselessRuby2Keywords: # new in 1.23
|
172
|
-
Enabled: true
|
172
|
+
Enabled: true
|
173
173
|
|
174
174
|
Lint/Void:
|
175
175
|
Enabled: false
|
@@ -215,9 +215,9 @@ Naming/ConstantName:
|
|
215
215
|
|
216
216
|
Naming/ClassAndModuleCamelCase:
|
217
217
|
Enabled: false
|
218
|
-
|
218
|
+
|
219
219
|
Naming/BlockForwarding: # new in 1.24
|
220
|
-
Enabled: true
|
220
|
+
Enabled: true
|
221
221
|
|
222
222
|
Naming/BlockParameterName:
|
223
223
|
Enabled: true
|
@@ -234,6 +234,9 @@ Naming/MethodName:
|
|
234
234
|
Naming/VariableName:
|
235
235
|
Enabled: false
|
236
236
|
|
237
|
+
Security/CompoundHash: # new in 1.28
|
238
|
+
Enabled: true
|
239
|
+
|
237
240
|
Security/IoMethods: # new in 1.22
|
238
241
|
Enabled: true
|
239
242
|
|
@@ -290,9 +293,12 @@ Style/ExpandPathArguments:
|
|
290
293
|
|
291
294
|
Style/ExponentialNotation:
|
292
295
|
Enabled: true
|
293
|
-
|
296
|
+
|
297
|
+
Style/FetchEnvVar: # new in 1.28
|
298
|
+
Enabled: true
|
299
|
+
|
294
300
|
Style/FileRead: # new in 1.24
|
295
|
-
Enabled: true
|
301
|
+
Enabled: true
|
296
302
|
|
297
303
|
Style/FileWrite: # new in 1.24
|
298
304
|
Enabled: true
|
@@ -320,9 +326,9 @@ Style/InPatternThen:
|
|
320
326
|
|
321
327
|
Style/InverseMethods:
|
322
328
|
Enabled: false
|
323
|
-
|
329
|
+
|
324
330
|
Style/MapToHash: # new in 1.24
|
325
|
-
Enabled: true
|
331
|
+
Enabled: true
|
326
332
|
|
327
333
|
Style/MissingRespondToMissing:
|
328
334
|
Enabled: false
|
@@ -350,9 +356,12 @@ Style/NumberedParametersLimit: # new in 1.22
|
|
350
356
|
|
351
357
|
Style/NumericLiterals:
|
352
358
|
Enabled: false
|
353
|
-
|
359
|
+
|
360
|
+
Style/ObjectThen: # new in 1.28
|
361
|
+
Enabled: true
|
362
|
+
|
354
363
|
Style/OpenStructUse: # new in 1.23
|
355
|
-
Enabled: true
|
364
|
+
Enabled: true
|
356
365
|
|
357
366
|
Style/QuotedSymbols:
|
358
367
|
Enabled: true
|
@@ -362,12 +371,12 @@ Style/RaiseArgs:
|
|
362
371
|
|
363
372
|
Style/RedundantArgument:
|
364
373
|
Enabled: true
|
365
|
-
|
374
|
+
|
366
375
|
Style/RedundantInitialize: # new in 1.27
|
367
|
-
Enabled: true
|
376
|
+
Enabled: true
|
368
377
|
|
369
378
|
Style/RedundantReturn:
|
370
|
-
Enabled:
|
379
|
+
Enabled: true
|
371
380
|
|
372
381
|
Style/RedundantSelf:
|
373
382
|
Enabled: true
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,11 @@
|
|
1
|
+
## [0.4.12] - 2022-04-22
|
2
|
+
- Code refactoring.
|
3
|
+
|
4
|
+
### Changed
|
5
|
+
- Refactoring class `SrlRuby::Tokenizer`: use of manifest constants, simplified newline and whitespace processing.
|
6
|
+
- Updated of `.rubocop.yml` to integrate newer cops (from version 1.28)
|
7
|
+
- Code refactoring: removal of redundant `return`
|
8
|
+
|
1
9
|
## [0.4.11] - 2022-04-17
|
2
10
|
- Fixed code breaking change in Ruby 3.1+: prime library is no longer part of stdlib.
|
3
11
|
|
data/Rakefile
CHANGED
@@ -2,19 +2,18 @@
|
|
2
2
|
|
3
3
|
require 'bundler/gem_tasks'
|
4
4
|
require 'rspec/core/rake_task' # Rspec as testing tool
|
5
|
-
|
5
|
+
require 'cucumber/rake/task' # Cucumber as testing tool
|
6
6
|
|
7
7
|
# desc 'Run RSpec'
|
8
8
|
RSpec::Core::RakeTask.new(:spec)
|
9
9
|
|
10
|
-
|
11
|
-
#
|
12
|
-
|
10
|
+
Cucumber::Rake::Task.new do |_|
|
11
|
+
# Comment
|
12
|
+
end
|
13
13
|
|
14
14
|
# Combine RSpec and Cucumber tests
|
15
15
|
desc 'Run tests, with RSpec and Cucumber'
|
16
|
-
task test:
|
17
|
-
# task test: %i[spec cucumber]
|
16
|
+
task test: %i[spec cucumber]
|
18
17
|
|
19
18
|
|
20
19
|
# Default rake task
|
data/cucumber.yml
CHANGED
data/lib/regex/char_class.rb
CHANGED
data/lib/regex/char_range.rb
CHANGED
@@ -44,7 +44,7 @@ module Regex # This module is used as a namespace
|
|
44
44
|
msg = 'Character range error: lower bound is greater than upper bound.'
|
45
45
|
raise StandardError, msg if theLowerBound.codepoint > theUpperBound.codepoint
|
46
46
|
|
47
|
-
|
47
|
+
[theLowerBound, theUpperBound]
|
48
48
|
end
|
49
49
|
end # class
|
50
50
|
end # module
|
data/lib/regex/char_shorthand.rb
CHANGED
data/lib/regex/character.rb
CHANGED
@@ -107,9 +107,7 @@ module Regex # This module is used as a namespace
|
|
107
107
|
msg = "Escape sequence #{esc_seq} does not begin with a backslash (\)."
|
108
108
|
raise StandardError, msg unless esc_seq[0] == '\\'
|
109
109
|
|
110
|
-
|
111
|
-
|
112
|
-
return result
|
110
|
+
(esc_seq.length == 2) ? digram2codepoint(esc_seq) : esc_number2codepoint(esc_seq)
|
113
111
|
end
|
114
112
|
|
115
113
|
# Return the character as a String object
|
@@ -127,7 +125,7 @@ module Regex # This module is used as a namespace
|
|
127
125
|
# newOne == 0x03a3 # true. The Integer is compared to the codepoint value.
|
128
126
|
# Will test equality with any Object that knows the to_s method
|
129
127
|
def ==(other)
|
130
|
-
|
128
|
+
case other
|
131
129
|
when Character
|
132
130
|
to_str == other.to_str
|
133
131
|
|
@@ -141,8 +139,6 @@ module Regex # This module is used as a namespace
|
|
141
139
|
# Unknown type: try with a convertion
|
142
140
|
self == other.to_s # Recursive call
|
143
141
|
end
|
144
|
-
|
145
|
-
return result
|
146
142
|
end
|
147
143
|
|
148
144
|
# Return a plain English description of the character
|
@@ -160,7 +156,7 @@ module Regex # This module is used as a namespace
|
|
160
156
|
def text_repr
|
161
157
|
return char if lexeme.nil?
|
162
158
|
|
163
|
-
|
159
|
+
lexeme.dup
|
164
160
|
end
|
165
161
|
|
166
162
|
# Convertion method that returns a codepoint for the given two characters
|
@@ -178,7 +174,7 @@ module Regex # This module is used as a namespace
|
|
178
174
|
# If it not a special sequence, then escaped character is
|
179
175
|
# considered literally (the backslash is 'dummy')
|
180
176
|
result = char2codepoint(aDigram[-1]) if result.nil?
|
181
|
-
|
177
|
+
result
|
182
178
|
end
|
183
179
|
|
184
180
|
private_class_method :digram2codepoint
|
@@ -199,7 +195,7 @@ module Regex # This module is used as a namespace
|
|
199
195
|
|
200
196
|
# Extract the hexadecimal number
|
201
197
|
hexliteral = hexa # shorterSeq.sub(/^[xXu]\{?([0-9a-fA-F]+)}?$/, '\1')
|
202
|
-
|
198
|
+
hexliteral.hex
|
203
199
|
end
|
204
200
|
end
|
205
201
|
|
data/lib/regex/concatenation.rb
CHANGED
@@ -19,11 +19,9 @@ module Regex # This module is used as a namespace
|
|
19
19
|
# Conversion method re-definition.
|
20
20
|
# Purpose: Return the String representation of the concatented expressions.
|
21
21
|
def text_repr
|
22
|
-
|
22
|
+
children.inject(+'') do |result, child|
|
23
23
|
result << child.to_str
|
24
24
|
end
|
25
|
-
|
26
|
-
return outcome
|
27
25
|
end
|
28
26
|
end # class
|
29
27
|
end # module
|
data/lib/regex/expression.rb
CHANGED
@@ -26,8 +26,7 @@ module Regex # This module is used as a namespace
|
|
26
26
|
# @param theParentOptions [Hash] matching options. They are overridden
|
27
27
|
# by options with same name that are bound to this object.
|
28
28
|
def options(theParentOptions)
|
29
|
-
|
30
|
-
return resulting_options
|
29
|
+
theParentOptions.merge(@local_options)
|
31
30
|
end
|
32
31
|
|
33
32
|
# Template method.
|
@@ -38,7 +37,7 @@ module Regex # This module is used as a namespace
|
|
38
37
|
result << text_repr
|
39
38
|
result << suffix
|
40
39
|
|
41
|
-
|
40
|
+
result
|
42
41
|
end
|
43
42
|
|
44
43
|
protected
|
data/lib/regex/lookaround.rb
CHANGED
@@ -42,8 +42,7 @@ module Regex # This module is used as a namespace
|
|
42
42
|
def to_str
|
43
43
|
dir_syntax = (dir == :ahead) ? '' : '<'
|
44
44
|
kind_syntax = (kind == :positive) ? '=' : '!'
|
45
|
-
|
46
|
-
return result
|
45
|
+
"(?#{dir_syntax}#{kind_syntax}#{child.to_str})"
|
47
46
|
end
|
48
47
|
end # class
|
49
48
|
end # module
|
data/lib/regex/match_option.rb
CHANGED
@@ -26,7 +26,7 @@ module Regex # This module is used as a namespace
|
|
26
26
|
result = 0
|
27
27
|
flags.each { |f| result |= f }
|
28
28
|
|
29
|
-
|
29
|
+
result
|
30
30
|
end
|
31
31
|
|
32
32
|
# Equality operator
|
@@ -40,7 +40,7 @@ module Regex # This module is used as a namespace
|
|
40
40
|
isEqual = false
|
41
41
|
end
|
42
42
|
|
43
|
-
|
43
|
+
isEqual
|
44
44
|
end
|
45
45
|
|
46
46
|
protected
|
@@ -32,9 +32,7 @@ module Regex # This module is used as a namespace
|
|
32
32
|
|
33
33
|
# Return the text representation of the child (if any)
|
34
34
|
def all_child_text
|
35
|
-
|
36
|
-
|
37
|
-
return result
|
35
|
+
child.nil? ? '' : child.to_str
|
38
36
|
end
|
39
37
|
end # class
|
40
38
|
end # module
|
data/lib/regex/multiplicity.rb
CHANGED
@@ -56,7 +56,7 @@ module Regex # This module is used as a namespace
|
|
56
56
|
possessive: '+'
|
57
57
|
}
|
58
58
|
|
59
|
-
|
59
|
+
subresult + policy2suffix[policy]
|
60
60
|
end
|
61
61
|
|
62
62
|
private
|
@@ -66,7 +66,7 @@ module Regex # This module is used as a namespace
|
|
66
66
|
err_msg = "Invalid lower bound of repetition count #{aLowerBound}"
|
67
67
|
raise StandardError, err_msg unless aLowerBound.kind_of?(Integer)
|
68
68
|
|
69
|
-
|
69
|
+
aLowerBound
|
70
70
|
end
|
71
71
|
|
72
72
|
# Validation method. Return the validated lower bound value
|
@@ -76,7 +76,7 @@ module Regex # This module is used as a namespace
|
|
76
76
|
raise StandardError, err_msg
|
77
77
|
end
|
78
78
|
|
79
|
-
|
79
|
+
anUpperBound
|
80
80
|
end
|
81
81
|
|
82
82
|
# Validation method. Return the validated policy value.
|
@@ -85,7 +85,7 @@ module Regex # This module is used as a namespace
|
|
85
85
|
valid_policies = %i[greedy lazy possessive]
|
86
86
|
raise StandardError, err_msg unless valid_policies.include? aPolicy
|
87
87
|
|
88
|
-
|
88
|
+
aPolicy
|
89
89
|
end
|
90
90
|
end # class
|
91
91
|
end # module
|
@@ -22,8 +22,7 @@ module Regex # This module is used as a namespace
|
|
22
22
|
# Conversion method re-definition.
|
23
23
|
# Purpose: Return the String representation of the captured expression.
|
24
24
|
def text_repr
|
25
|
-
|
26
|
-
return result
|
25
|
+
"(?:#{all_child_text})"
|
27
26
|
end
|
28
27
|
end # class
|
29
28
|
end # module
|
@@ -24,7 +24,7 @@ module Regex # This module is used as a namespace
|
|
24
24
|
def <<(aChild)
|
25
25
|
@children << aChild
|
26
26
|
|
27
|
-
|
27
|
+
self
|
28
28
|
end
|
29
29
|
|
30
30
|
# Notification that the parse tree construction is complete.
|
@@ -52,7 +52,7 @@ module Regex # This module is used as a namespace
|
|
52
52
|
def df_visitor
|
53
53
|
root = children # The visit will start from the children of this object
|
54
54
|
|
55
|
-
|
55
|
+
Enumerator.new do |result| # result is a Yielder
|
56
56
|
# Initialization part: will run once
|
57
57
|
visit_stack = [root] # The LIFO queue of nodes to visit
|
58
58
|
|
@@ -78,8 +78,6 @@ module Regex # This module is used as a namespace
|
|
78
78
|
break if visit_stack.empty?
|
79
79
|
end
|
80
80
|
end
|
81
|
-
|
82
|
-
return visitor
|
83
81
|
end
|
84
82
|
end # class
|
85
83
|
end # module
|
data/lib/regex/quantifiable.rb
CHANGED
data/lib/regex/repetition.rb
CHANGED
@@ -30,8 +30,7 @@ module Regex # This module is used as a namespace
|
|
30
30
|
# Conversion method re-definition.
|
31
31
|
# @return [String] String representation of the concatented expressions.
|
32
32
|
def text_repr
|
33
|
-
|
34
|
-
return result
|
33
|
+
all_child_text + multiplicity.to_str
|
35
34
|
end
|
36
35
|
end # class
|
37
36
|
end # module
|
data/lib/srl_ruby/ast_builder.rb
CHANGED
@@ -59,13 +59,11 @@ module SrlRuby
|
|
59
59
|
# @param aTokenPosition [Integer] Position of token in the input stream
|
60
60
|
# @param aToken [Rley::Lexical::Token] The input token
|
61
61
|
def new_leaf_node(_production, _terminal, aTokenPosition, aToken)
|
62
|
-
|
63
|
-
|
64
|
-
return node
|
62
|
+
Rley::PTree::TerminalNode.new(aToken, aTokenPosition)
|
65
63
|
end
|
66
64
|
|
67
65
|
def multiplicity(lowerBound, upperBound)
|
68
|
-
|
66
|
+
Regex::Multiplicity.new(lowerBound, upperBound, :greedy)
|
69
67
|
end
|
70
68
|
|
71
69
|
# rubocop: disable Style/OptionalBooleanParameter
|
@@ -88,14 +86,14 @@ module SrlRuby
|
|
88
86
|
result = Regex::Character.new(aString)
|
89
87
|
end
|
90
88
|
|
91
|
-
|
89
|
+
result
|
92
90
|
end
|
93
91
|
# rubocop: enable Style/OptionalBooleanParameter
|
94
92
|
|
95
93
|
def char_range(lowerBound, upperBound)
|
96
94
|
lower = Regex::Character.new(lowerBound)
|
97
95
|
upper = Regex::Character.new(upperBound)
|
98
|
-
|
96
|
+
Regex::CharRange.new(lower, upper)
|
99
97
|
end
|
100
98
|
|
101
99
|
def char_class(toNegate, *theChildren)
|
@@ -283,7 +281,7 @@ module SrlRuby
|
|
283
281
|
end
|
284
282
|
|
285
283
|
# TODO check other implementations
|
286
|
-
|
284
|
+
Regex::CharClass.new(false, *alternatives)
|
287
285
|
end
|
288
286
|
|
289
287
|
# rule('character_class' => %w[NONE OF STRING_LIT]).tag 'none_of'
|
@@ -337,13 +335,13 @@ module SrlRuby
|
|
337
335
|
# What if literal is empty?...
|
338
336
|
|
339
337
|
raw_literal = theChildren[-1].token.lexeme.dup
|
340
|
-
|
338
|
+
string_literal(raw_literal)
|
341
339
|
end
|
342
340
|
|
343
341
|
# rule('raw' => %w[RAW STRING_LIT]).tag 'raw_literal'
|
344
342
|
def reduce_raw_literal(_production, _range, _tokens, theChildren)
|
345
343
|
raw_literal = theChildren[-1].token.lexeme.dup
|
346
|
-
|
344
|
+
Regex::RawExpression.new(raw_literal)
|
347
345
|
end
|
348
346
|
|
349
347
|
# rule('alternation' => %w[ANY OF LPAREN alternatives RPAREN]).tag 'any_of'
|
@@ -358,12 +356,12 @@ module SrlRuby
|
|
358
356
|
result = Regex::Alternation.new(*theChildren[3])
|
359
357
|
end
|
360
358
|
|
361
|
-
|
359
|
+
result
|
362
360
|
end
|
363
361
|
|
364
362
|
# rule('alternatives' => %w[alternatives separator quantifiable]).tag 'alternative_list'
|
365
363
|
def reduce_alternative_list(_production, _range, _tokens, theChildren)
|
366
|
-
|
364
|
+
theChildren[0] << theChildren[-1]
|
367
365
|
end
|
368
366
|
|
369
367
|
# rule('alternatives' => 'quantifiable').tag 'simple_alternative'
|
@@ -418,7 +416,7 @@ module SrlRuby
|
|
418
416
|
make_last_repetition_lazy(theChildren[1])
|
419
417
|
group = Regex::CapturingGroup.new(theChildren[1], name)
|
420
418
|
(_, until_expr) = theChildren[4]
|
421
|
-
|
419
|
+
Regex::Concatenation.new(group, until_expr)
|
422
420
|
end
|
423
421
|
|
424
422
|
# rule('quantifier' => 'ONCE').tag 'once'
|
data/lib/srl_ruby/tokenizer.rb
CHANGED
@@ -17,6 +17,16 @@ module SrlRuby
|
|
17
17
|
# Delimiters: parentheses '(' and ')'
|
18
18
|
# Separators: comma (optional)
|
19
19
|
class Tokenizer
|
20
|
+
PATT_CHAR_CLASS = /[^,"\s]{2,}/.freeze
|
21
|
+
PATT_DIGIT_LIT = /[0-9]((?=\s|,|\))|$)/.freeze
|
22
|
+
PATT_IDENTIFIER = /[a-zA-Z_][a-zA-Z0-9_]+/.freeze
|
23
|
+
PATT_INTEGER = /[0-9]{2,}((?=\s|,|\))|$)/.freeze # An integer has 2..* digits
|
24
|
+
PATT_LETTER_LIT = /[a-zA-Z]((?=\s|,|\))|$)/.freeze
|
25
|
+
PATT_NEWLINE = /(?:\r\n)|\r|\n/.freeze
|
26
|
+
PATT_STR_DBL_QUOTE = /"(?:\\"|[^"])*"/.freeze # Double quotes literal?
|
27
|
+
PATT_STR_SNGL_QUOTE = /'(?:\\'|[^'])*'/.freeze # Single quotes literal?
|
28
|
+
PATT_WHITESPACE = /[ \t\f]+/.freeze
|
29
|
+
|
20
30
|
# @return [StringScanner]
|
21
31
|
attr_reader(:scanner)
|
22
32
|
|
@@ -26,14 +36,14 @@ module SrlRuby
|
|
26
36
|
# @return [Integer] offset of start of current line within input
|
27
37
|
attr_reader(:line_start)
|
28
38
|
|
29
|
-
|
39
|
+
Lexeme2name = {
|
30
40
|
'(' => 'LPAREN',
|
31
41
|
')' => 'RPAREN',
|
32
42
|
',' => 'COMMA'
|
33
43
|
}.freeze
|
34
44
|
|
35
45
|
# Here are all the SRL keywords (in uppercase)
|
36
|
-
|
46
|
+
Keywords = %w[
|
37
47
|
ALL
|
38
48
|
ALREADY
|
39
49
|
AND
|
@@ -109,47 +119,55 @@ module SrlRuby
|
|
109
119
|
tok_sequence << token unless token.nil?
|
110
120
|
end
|
111
121
|
|
112
|
-
|
122
|
+
tok_sequence
|
113
123
|
end
|
114
124
|
|
115
125
|
private
|
116
126
|
|
117
127
|
def _next_token
|
118
|
-
skip_whitespaces
|
119
|
-
curr_ch = scanner.peek(1)
|
120
|
-
return nil if curr_ch.nil? || curr_ch.empty?
|
121
|
-
|
122
128
|
token = nil
|
123
129
|
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
130
|
+
# Loop until end of input reached or token found
|
131
|
+
until token || scanner.eos?
|
132
|
+
|
133
|
+
if scanner.skip(PATT_NEWLINE)
|
134
|
+
next_line_scanned
|
135
|
+
next
|
136
|
+
end
|
137
|
+
next if scanner.skip(PATT_WHITESPACE) # Skip whitespaces
|
138
|
+
|
139
|
+
curr_ch = scanner.peek(1)
|
140
|
+
|
141
|
+
token = if '(),'.include? curr_ch
|
142
|
+
# Delimiters, separators => single character token
|
143
|
+
build_token(Lexeme2name[curr_ch], scanner.getch)
|
144
|
+
elsif (lexeme = scanner.scan(PATT_INTEGER))
|
145
|
+
build_token('INTEGER', lexeme)
|
146
|
+
elsif (lexeme = scanner.scan(PATT_DIGIT_LIT))
|
147
|
+
build_token('DIGIT_LIT', lexeme)
|
148
|
+
elsif (lexeme = scanner.scan(PATT_STR_DBL_QUOTE))
|
149
|
+
unquoted = lexeme.gsub(/(^")|("$)/, '')
|
150
|
+
build_token('STRING_LIT', unquoted)
|
151
|
+
elsif (lexeme = scanner.scan(PATT_STR_SNGL_QUOTE))
|
152
|
+
unquoted = lexeme.gsub(/(^')|('$)/, '')
|
153
|
+
build_token('STRING_LIT', unquoted)
|
154
|
+
elsif (lexeme = scanner.scan(PATT_LETTER_LIT))
|
155
|
+
build_token('LETTER_LIT', lexeme)
|
156
|
+
elsif (lexeme = scanner.scan(PATT_IDENTIFIER))
|
157
|
+
keyw = Keywords[lexeme.upcase]
|
158
|
+
tok_type = keyw || 'IDENTIFIER'
|
159
|
+
build_token(tok_type, lexeme)
|
160
|
+
elsif (lexeme = scanner.scan(PATT_CHAR_CLASS))
|
161
|
+
build_token('CHAR_CLASS', lexeme)
|
162
|
+
else # Unknown token
|
163
|
+
erroneous = curr_ch.nil? ? '' : scanner.scan(/./)
|
164
|
+
sequel = scanner.scan(/.{1,20}/)
|
165
|
+
erroneous += sequel unless sequel.nil?
|
166
|
+
raise ScanError, "Unknown token #{erroneous} on line #{lineno}"
|
167
|
+
end
|
168
|
+
end # until
|
151
169
|
|
152
|
-
|
170
|
+
token
|
153
171
|
end
|
154
172
|
|
155
173
|
def build_token(aSymbolName, aLexeme)
|
@@ -162,38 +180,13 @@ module SrlRuby
|
|
162
180
|
raise e
|
163
181
|
end
|
164
182
|
|
165
|
-
|
166
|
-
end
|
167
|
-
|
168
|
-
def skip_whitespaces
|
169
|
-
pre_pos = scanner.pos
|
170
|
-
|
171
|
-
loop do
|
172
|
-
ws_found = false
|
173
|
-
found = scanner.skip(/[ \t\f]+/)
|
174
|
-
ws_found = true if found
|
175
|
-
found = scanner.skip(/(?:\r\n)|\r|\n/)
|
176
|
-
if found
|
177
|
-
ws_found = true
|
178
|
-
@lineno += 1
|
179
|
-
@line_start = scanner.pos
|
180
|
-
end
|
181
|
-
break unless ws_found
|
182
|
-
end
|
183
|
-
|
184
|
-
curr_pos = scanner.pos
|
185
|
-
return if curr_pos == pre_pos
|
186
|
-
# skipped = scanner.string.slice(Range.new(pre_pos, curr_pos))
|
187
|
-
# triplet = skipped.rpartition(/\n|\r/)
|
188
|
-
# @column = 1 unless triplet[1].empty?
|
189
|
-
|
190
|
-
# Correction for the tabs
|
191
|
-
# tab_count = triplet[2].chars.count { |ch| ch =~ /\t/ }
|
192
|
-
# @column += triplet[2].size + tab_count * (tab_size - 1) - 1
|
183
|
+
token
|
193
184
|
end
|
194
185
|
|
195
|
-
|
196
|
-
|
186
|
+
# Event: next line detected.
|
187
|
+
def next_line_scanned
|
188
|
+
@lineno += 1
|
189
|
+
@line_start = scanner.pos
|
197
190
|
end
|
198
191
|
end # class
|
199
192
|
end # module
|
data/lib/srl_ruby/version.rb
CHANGED
data/lib/srl_ruby.rb
CHANGED
@@ -14,7 +14,7 @@ module SrlRuby
|
|
14
14
|
File.open(filename, 'r') { |f| source = f.read }
|
15
15
|
return source if source.nil? || source.empty?
|
16
16
|
|
17
|
-
|
17
|
+
parse(source)
|
18
18
|
end
|
19
19
|
|
20
20
|
# Compile the given SRL expression into its Regexp equivalent.
|
@@ -52,6 +52,6 @@ module SrlRuby
|
|
52
52
|
# Now output the regexp literal
|
53
53
|
root = ast_ptree.root
|
54
54
|
options = root.is_a?(Regex::MatchOption) ? root.combine_opts : nil
|
55
|
-
|
55
|
+
Regexp.new(root.to_str, options)
|
56
56
|
end
|
57
57
|
end # module
|
@@ -14,7 +14,7 @@ module Acceptance # This module is used as a namespace
|
|
14
14
|
File.open(filename, 'r') { |f| source = f.read }
|
15
15
|
return source if source.nil? || source.empty?
|
16
16
|
|
17
|
-
|
17
|
+
parse(source)
|
18
18
|
end
|
19
19
|
|
20
20
|
# Parse the rule file
|
@@ -41,8 +41,7 @@ module Acceptance # This module is used as a namespace
|
|
41
41
|
ast_ptree = engine.convert(result)
|
42
42
|
|
43
43
|
# Now output the regexp literal
|
44
|
-
|
45
|
-
return root
|
44
|
+
ast_ptree.root
|
46
45
|
end
|
47
46
|
end
|
48
47
|
end # module
|
@@ -58,7 +58,7 @@ module Acceptance
|
|
58
58
|
tok_sequence << token unless token.nil?
|
59
59
|
end
|
60
60
|
|
61
|
-
|
61
|
+
tok_sequence
|
62
62
|
end
|
63
63
|
|
64
64
|
private
|
@@ -68,13 +68,11 @@ module Acceptance
|
|
68
68
|
curr_ch = scanner.peek(1)
|
69
69
|
return nil if curr_ch.nil? || curr_ch.empty?
|
70
70
|
|
71
|
-
|
71
|
+
if state == :default
|
72
72
|
default_mode
|
73
73
|
else
|
74
74
|
expecting_srl
|
75
75
|
end
|
76
|
-
|
77
|
-
return token
|
78
76
|
end
|
79
77
|
|
80
78
|
def default_mode
|
@@ -103,7 +101,7 @@ module Acceptance
|
|
103
101
|
raise ScanError, "Unknown token #{erroneous}"
|
104
102
|
end
|
105
103
|
|
106
|
-
|
104
|
+
token
|
107
105
|
end
|
108
106
|
|
109
107
|
def expecting_srl
|
@@ -123,7 +121,7 @@ module Acceptance
|
|
123
121
|
raise e
|
124
122
|
end
|
125
123
|
|
126
|
-
|
124
|
+
token
|
127
125
|
end
|
128
126
|
|
129
127
|
def skip_noise
|
@@ -152,7 +150,7 @@ module Acceptance
|
|
152
150
|
end
|
153
151
|
|
154
152
|
curr_pos = scanner.pos
|
155
|
-
|
153
|
+
curr_pos != pre_pos
|
156
154
|
end
|
157
155
|
|
158
156
|
def skip_comment
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: srl_ruby
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.4.
|
4
|
+
version: 0.4.12
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dimitri Geshef
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-04-
|
11
|
+
date: 2022-04-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rley
|