srl_ruby 0.4.9 → 0.4.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +40 -1
- data/CHANGELOG.md +17 -0
- data/Rakefile +3 -4
- data/cucumber.yml +1 -1
- data/lib/regex/atomic_expression.rb +1 -1
- data/lib/regex/char_class.rb +1 -3
- data/lib/regex/char_range.rb +1 -1
- data/lib/regex/char_shorthand.rb +1 -1
- data/lib/regex/character.rb +5 -9
- data/lib/regex/concatenation.rb +1 -3
- data/lib/regex/expression.rb +2 -6
- data/lib/regex/lookaround.rb +1 -2
- data/lib/regex/match_option.rb +2 -2
- data/lib/regex/monadic_expression.rb +1 -3
- data/lib/regex/multiplicity.rb +4 -4
- data/lib/regex/non_capturing_group.rb +1 -2
- data/lib/regex/polyadic_expression.rb +2 -4
- data/lib/regex/quantifiable.rb +1 -1
- data/lib/regex/repetition.rb +1 -2
- data/lib/srl_ruby/ast_builder.rb +10 -12
- data/lib/srl_ruby/tokenizer.rb +58 -65
- data/lib/srl_ruby/version.rb +1 -1
- data/lib/srl_ruby.rb +2 -2
- data/spec/acceptance/support/rule_file_ast_builder.rb +2 -23
- data/spec/acceptance/support/rule_file_grammar.rb +14 -19
- data/spec/acceptance/support/rule_file_parser.rb +2 -3
- data/spec/acceptance/support/rule_file_tokenizer.rb +5 -7
- data/spec/regex/character_spec.rb +3 -3
- data/srl_ruby.gemspec +4 -4
- metadata +11 -11
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d1bd1a0bcdf03aad4c52a4df83bc252cf3f99ec3d9156c1d49a073f020f4c59c
|
4
|
+
data.tar.gz: 38de02e4dc7a6d7cf99a0d62f76adb89642e7785871834303f2c5df754d50dc4
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 636462883d5d49e79bde56104fface3d68694a9e6e99ffd7329bb3f990707a4cf15373546ad93c7e0fb135605f50bce96b2cefe64936fbb690a57273f8103b7f
|
7
|
+
data.tar.gz: 561f4d2aed5fb6c425095ca666128fe00f15b72ddf84d6b27f5ab6778d0a6b0a5657e69b7c71651c11b7d346cbfc914b3c42bc491ae3626e04d9841828190ac4
|
data/.rubocop.yml
CHANGED
@@ -6,6 +6,9 @@ AllCops:
|
|
6
6
|
Gemspec/DateAssignment:
|
7
7
|
Enabled: true
|
8
8
|
|
9
|
+
Gemspec/RequireMFA: # new in 1.23
|
10
|
+
Enabled: false
|
11
|
+
|
9
12
|
Layout/ArgumentAlignment:
|
10
13
|
Enabled: false
|
11
14
|
|
@@ -132,6 +135,9 @@ Lint/RaiseException:
|
|
132
135
|
Lint/RedundantDirGlobSort:
|
133
136
|
Enabled: true
|
134
137
|
|
138
|
+
Lint/RefinementImportMethods: # new in 1.27
|
139
|
+
Enabled: true
|
140
|
+
|
135
141
|
Lint/RequireRelativeSelfPath: # new in 1.22
|
136
142
|
Enabled: true
|
137
143
|
|
@@ -162,6 +168,9 @@ Lint/UnusedMethodArgument:
|
|
162
168
|
Lint/UselessAccessModifier:
|
163
169
|
Enabled: true
|
164
170
|
|
171
|
+
Lint/UselessRuby2Keywords: # new in 1.23
|
172
|
+
Enabled: true
|
173
|
+
|
165
174
|
Lint/Void:
|
166
175
|
Enabled: false
|
167
176
|
|
@@ -207,6 +216,9 @@ Naming/ConstantName:
|
|
207
216
|
Naming/ClassAndModuleCamelCase:
|
208
217
|
Enabled: false
|
209
218
|
|
219
|
+
Naming/BlockForwarding: # new in 1.24
|
220
|
+
Enabled: true
|
221
|
+
|
210
222
|
Naming/BlockParameterName:
|
211
223
|
Enabled: true
|
212
224
|
|
@@ -222,6 +234,9 @@ Naming/MethodName:
|
|
222
234
|
Naming/VariableName:
|
223
235
|
Enabled: false
|
224
236
|
|
237
|
+
Security/CompoundHash: # new in 1.28
|
238
|
+
Enabled: true
|
239
|
+
|
225
240
|
Security/IoMethods: # new in 1.22
|
226
241
|
Enabled: true
|
227
242
|
|
@@ -279,6 +294,15 @@ Style/ExpandPathArguments:
|
|
279
294
|
Style/ExponentialNotation:
|
280
295
|
Enabled: true
|
281
296
|
|
297
|
+
Style/FetchEnvVar: # new in 1.28
|
298
|
+
Enabled: true
|
299
|
+
|
300
|
+
Style/FileRead: # new in 1.24
|
301
|
+
Enabled: true
|
302
|
+
|
303
|
+
Style/FileWrite: # new in 1.24
|
304
|
+
Enabled: true
|
305
|
+
|
282
306
|
Style/GuardClause:
|
283
307
|
Enabled: false
|
284
308
|
|
@@ -303,6 +327,9 @@ Style/InPatternThen:
|
|
303
327
|
Style/InverseMethods:
|
304
328
|
Enabled: false
|
305
329
|
|
330
|
+
Style/MapToHash: # new in 1.24
|
331
|
+
Enabled: true
|
332
|
+
|
306
333
|
Style/MissingRespondToMissing:
|
307
334
|
Enabled: false
|
308
335
|
|
@@ -312,6 +339,9 @@ Style/MultilineInPatternThen:
|
|
312
339
|
Style/NegatedIfElseCondition:
|
313
340
|
Enabled: true
|
314
341
|
|
342
|
+
Style/NestedFileDirname: # new in 1.26
|
343
|
+
Enabled: true
|
344
|
+
|
315
345
|
Style/Next:
|
316
346
|
Enabled: false
|
317
347
|
|
@@ -327,6 +357,12 @@ Style/NumberedParametersLimit: # new in 1.22
|
|
327
357
|
Style/NumericLiterals:
|
328
358
|
Enabled: false
|
329
359
|
|
360
|
+
Style/ObjectThen: # new in 1.28
|
361
|
+
Enabled: true
|
362
|
+
|
363
|
+
Style/OpenStructUse: # new in 1.23
|
364
|
+
Enabled: true
|
365
|
+
|
330
366
|
Style/QuotedSymbols:
|
331
367
|
Enabled: true
|
332
368
|
|
@@ -336,8 +372,11 @@ Style/RaiseArgs:
|
|
336
372
|
Style/RedundantArgument:
|
337
373
|
Enabled: true
|
338
374
|
|
375
|
+
Style/RedundantInitialize: # new in 1.27
|
376
|
+
Enabled: true
|
377
|
+
|
339
378
|
Style/RedundantReturn:
|
340
|
-
Enabled:
|
379
|
+
Enabled: true
|
341
380
|
|
342
381
|
Style/RedundantSelf:
|
343
382
|
Enabled: true
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,20 @@
|
|
1
|
+
## [0.4.12] - 2022-04-22
|
2
|
+
- Code refactoring.
|
3
|
+
|
4
|
+
### Changed
|
5
|
+
- Refactoring class `SrlRuby::Tokenizer`: use of manifest constants, simplified newline and whitespace processing.
|
6
|
+
- Updated of `.rubocop.yml` to integrate newer cops (from version 1.28)
|
7
|
+
- Code refactoring: removal of redundant `return`
|
8
|
+
|
9
|
+
## [0.4.11] - 2022-04-17
|
10
|
+
- Fixed code breaking change in Ruby 3.1+: prime library is no longer part of stdlib.
|
11
|
+
|
12
|
+
### Fixed
|
13
|
+
- File `srl-ruby.gemspec` has a dependency to `Rley 0.8.11`. Necessary to fix change in CRuby 3.1: `prime` library is no more part of stdlib.
|
14
|
+
|
15
|
+
### Changed
|
16
|
+
- Minor style refactoring to please Runocop 1.27
|
17
|
+
|
1
18
|
## [0.4.9] - 2021-11-01
|
2
19
|
- Code update to align with `Rley` v. 0.8.08
|
3
20
|
|
data/Rakefile
CHANGED
@@ -4,10 +4,8 @@ require 'bundler/gem_tasks'
|
|
4
4
|
require 'rspec/core/rake_task' # Rspec as testing tool
|
5
5
|
require 'cucumber/rake/task' # Cucumber as testing tool
|
6
6
|
|
7
|
-
desc 'Run RSpec'
|
8
|
-
RSpec::Core::RakeTask.new
|
9
|
-
spec.pattern = 'spec/**/*_spec.rb'
|
10
|
-
end
|
7
|
+
# desc 'Run RSpec'
|
8
|
+
RSpec::Core::RakeTask.new(:spec)
|
11
9
|
|
12
10
|
Cucumber::Rake::Task.new do |_|
|
13
11
|
# Comment
|
@@ -17,5 +15,6 @@ end
|
|
17
15
|
desc 'Run tests, with RSpec and Cucumber'
|
18
16
|
task test: %i[spec cucumber]
|
19
17
|
|
18
|
+
|
20
19
|
# Default rake task
|
21
20
|
task default: :test
|
data/cucumber.yml
CHANGED
data/lib/regex/char_class.rb
CHANGED
data/lib/regex/char_range.rb
CHANGED
@@ -44,7 +44,7 @@ module Regex # This module is used as a namespace
|
|
44
44
|
msg = 'Character range error: lower bound is greater than upper bound.'
|
45
45
|
raise StandardError, msg if theLowerBound.codepoint > theUpperBound.codepoint
|
46
46
|
|
47
|
-
|
47
|
+
[theLowerBound, theUpperBound]
|
48
48
|
end
|
49
49
|
end # class
|
50
50
|
end # module
|
data/lib/regex/char_shorthand.rb
CHANGED
data/lib/regex/character.rb
CHANGED
@@ -107,9 +107,7 @@ module Regex # This module is used as a namespace
|
|
107
107
|
msg = "Escape sequence #{esc_seq} does not begin with a backslash (\)."
|
108
108
|
raise StandardError, msg unless esc_seq[0] == '\\'
|
109
109
|
|
110
|
-
|
111
|
-
|
112
|
-
return result
|
110
|
+
(esc_seq.length == 2) ? digram2codepoint(esc_seq) : esc_number2codepoint(esc_seq)
|
113
111
|
end
|
114
112
|
|
115
113
|
# Return the character as a String object
|
@@ -127,7 +125,7 @@ module Regex # This module is used as a namespace
|
|
127
125
|
# newOne == 0x03a3 # true. The Integer is compared to the codepoint value.
|
128
126
|
# Will test equality with any Object that knows the to_s method
|
129
127
|
def ==(other)
|
130
|
-
|
128
|
+
case other
|
131
129
|
when Character
|
132
130
|
to_str == other.to_str
|
133
131
|
|
@@ -141,8 +139,6 @@ module Regex # This module is used as a namespace
|
|
141
139
|
# Unknown type: try with a convertion
|
142
140
|
self == other.to_s # Recursive call
|
143
141
|
end
|
144
|
-
|
145
|
-
return result
|
146
142
|
end
|
147
143
|
|
148
144
|
# Return a plain English description of the character
|
@@ -160,7 +156,7 @@ module Regex # This module is used as a namespace
|
|
160
156
|
def text_repr
|
161
157
|
return char if lexeme.nil?
|
162
158
|
|
163
|
-
|
159
|
+
lexeme.dup
|
164
160
|
end
|
165
161
|
|
166
162
|
# Convertion method that returns a codepoint for the given two characters
|
@@ -178,7 +174,7 @@ module Regex # This module is used as a namespace
|
|
178
174
|
# If it not a special sequence, then escaped character is
|
179
175
|
# considered literally (the backslash is 'dummy')
|
180
176
|
result = char2codepoint(aDigram[-1]) if result.nil?
|
181
|
-
|
177
|
+
result
|
182
178
|
end
|
183
179
|
|
184
180
|
private_class_method :digram2codepoint
|
@@ -199,7 +195,7 @@ module Regex # This module is used as a namespace
|
|
199
195
|
|
200
196
|
# Extract the hexadecimal number
|
201
197
|
hexliteral = hexa # shorterSeq.sub(/^[xXu]\{?([0-9a-fA-F]+)}?$/, '\1')
|
202
|
-
|
198
|
+
hexliteral.hex
|
203
199
|
end
|
204
200
|
end
|
205
201
|
|
data/lib/regex/concatenation.rb
CHANGED
@@ -19,11 +19,9 @@ module Regex # This module is used as a namespace
|
|
19
19
|
# Conversion method re-definition.
|
20
20
|
# Purpose: Return the String representation of the concatented expressions.
|
21
21
|
def text_repr
|
22
|
-
|
22
|
+
children.inject(+'') do |result, child|
|
23
23
|
result << child.to_str
|
24
24
|
end
|
25
|
-
|
26
|
-
return outcome
|
27
25
|
end
|
28
26
|
end # class
|
29
27
|
end # module
|
data/lib/regex/expression.rb
CHANGED
@@ -13,9 +13,6 @@ module Regex # This module is used as a namespace
|
|
13
13
|
# @return [NilClass, Anchor]
|
14
14
|
attr_accessor :end_anchor
|
15
15
|
|
16
|
-
# Constructor
|
17
|
-
def initialize(); end
|
18
|
-
|
19
16
|
# Abstract method. Return true iff the expression is atomic
|
20
17
|
# (= doesn't not have any child).
|
21
18
|
# @return [Boolean]
|
@@ -29,8 +26,7 @@ module Regex # This module is used as a namespace
|
|
29
26
|
# @param theParentOptions [Hash] matching options. They are overridden
|
30
27
|
# by options with same name that are bound to this object.
|
31
28
|
def options(theParentOptions)
|
32
|
-
|
33
|
-
return resulting_options
|
29
|
+
theParentOptions.merge(@local_options)
|
34
30
|
end
|
35
31
|
|
36
32
|
# Template method.
|
@@ -41,7 +37,7 @@ module Regex # This module is used as a namespace
|
|
41
37
|
result << text_repr
|
42
38
|
result << suffix
|
43
39
|
|
44
|
-
|
40
|
+
result
|
45
41
|
end
|
46
42
|
|
47
43
|
protected
|
data/lib/regex/lookaround.rb
CHANGED
@@ -42,8 +42,7 @@ module Regex # This module is used as a namespace
|
|
42
42
|
def to_str
|
43
43
|
dir_syntax = (dir == :ahead) ? '' : '<'
|
44
44
|
kind_syntax = (kind == :positive) ? '=' : '!'
|
45
|
-
|
46
|
-
return result
|
45
|
+
"(?#{dir_syntax}#{kind_syntax}#{child.to_str})"
|
47
46
|
end
|
48
47
|
end # class
|
49
48
|
end # module
|
data/lib/regex/match_option.rb
CHANGED
@@ -26,7 +26,7 @@ module Regex # This module is used as a namespace
|
|
26
26
|
result = 0
|
27
27
|
flags.each { |f| result |= f }
|
28
28
|
|
29
|
-
|
29
|
+
result
|
30
30
|
end
|
31
31
|
|
32
32
|
# Equality operator
|
@@ -40,7 +40,7 @@ module Regex # This module is used as a namespace
|
|
40
40
|
isEqual = false
|
41
41
|
end
|
42
42
|
|
43
|
-
|
43
|
+
isEqual
|
44
44
|
end
|
45
45
|
|
46
46
|
protected
|
@@ -32,9 +32,7 @@ module Regex # This module is used as a namespace
|
|
32
32
|
|
33
33
|
# Return the text representation of the child (if any)
|
34
34
|
def all_child_text
|
35
|
-
|
36
|
-
|
37
|
-
return result
|
35
|
+
child.nil? ? '' : child.to_str
|
38
36
|
end
|
39
37
|
end # class
|
40
38
|
end # module
|
data/lib/regex/multiplicity.rb
CHANGED
@@ -56,7 +56,7 @@ module Regex # This module is used as a namespace
|
|
56
56
|
possessive: '+'
|
57
57
|
}
|
58
58
|
|
59
|
-
|
59
|
+
subresult + policy2suffix[policy]
|
60
60
|
end
|
61
61
|
|
62
62
|
private
|
@@ -66,7 +66,7 @@ module Regex # This module is used as a namespace
|
|
66
66
|
err_msg = "Invalid lower bound of repetition count #{aLowerBound}"
|
67
67
|
raise StandardError, err_msg unless aLowerBound.kind_of?(Integer)
|
68
68
|
|
69
|
-
|
69
|
+
aLowerBound
|
70
70
|
end
|
71
71
|
|
72
72
|
# Validation method. Return the validated lower bound value
|
@@ -76,7 +76,7 @@ module Regex # This module is used as a namespace
|
|
76
76
|
raise StandardError, err_msg
|
77
77
|
end
|
78
78
|
|
79
|
-
|
79
|
+
anUpperBound
|
80
80
|
end
|
81
81
|
|
82
82
|
# Validation method. Return the validated policy value.
|
@@ -85,7 +85,7 @@ module Regex # This module is used as a namespace
|
|
85
85
|
valid_policies = %i[greedy lazy possessive]
|
86
86
|
raise StandardError, err_msg unless valid_policies.include? aPolicy
|
87
87
|
|
88
|
-
|
88
|
+
aPolicy
|
89
89
|
end
|
90
90
|
end # class
|
91
91
|
end # module
|
@@ -22,8 +22,7 @@ module Regex # This module is used as a namespace
|
|
22
22
|
# Conversion method re-definition.
|
23
23
|
# Purpose: Return the String representation of the captured expression.
|
24
24
|
def text_repr
|
25
|
-
|
26
|
-
return result
|
25
|
+
"(?:#{all_child_text})"
|
27
26
|
end
|
28
27
|
end # class
|
29
28
|
end # module
|
@@ -24,7 +24,7 @@ module Regex # This module is used as a namespace
|
|
24
24
|
def <<(aChild)
|
25
25
|
@children << aChild
|
26
26
|
|
27
|
-
|
27
|
+
self
|
28
28
|
end
|
29
29
|
|
30
30
|
# Notification that the parse tree construction is complete.
|
@@ -52,7 +52,7 @@ module Regex # This module is used as a namespace
|
|
52
52
|
def df_visitor
|
53
53
|
root = children # The visit will start from the children of this object
|
54
54
|
|
55
|
-
|
55
|
+
Enumerator.new do |result| # result is a Yielder
|
56
56
|
# Initialization part: will run once
|
57
57
|
visit_stack = [root] # The LIFO queue of nodes to visit
|
58
58
|
|
@@ -78,8 +78,6 @@ module Regex # This module is used as a namespace
|
|
78
78
|
break if visit_stack.empty?
|
79
79
|
end
|
80
80
|
end
|
81
|
-
|
82
|
-
return visitor
|
83
81
|
end
|
84
82
|
end # class
|
85
83
|
end # module
|
data/lib/regex/quantifiable.rb
CHANGED
data/lib/regex/repetition.rb
CHANGED
@@ -30,8 +30,7 @@ module Regex # This module is used as a namespace
|
|
30
30
|
# Conversion method re-definition.
|
31
31
|
# @return [String] String representation of the concatented expressions.
|
32
32
|
def text_repr
|
33
|
-
|
34
|
-
return result
|
33
|
+
all_child_text + multiplicity.to_str
|
35
34
|
end
|
36
35
|
end # class
|
37
36
|
end # module
|
data/lib/srl_ruby/ast_builder.rb
CHANGED
@@ -59,13 +59,11 @@ module SrlRuby
|
|
59
59
|
# @param aTokenPosition [Integer] Position of token in the input stream
|
60
60
|
# @param aToken [Rley::Lexical::Token] The input token
|
61
61
|
def new_leaf_node(_production, _terminal, aTokenPosition, aToken)
|
62
|
-
|
63
|
-
|
64
|
-
return node
|
62
|
+
Rley::PTree::TerminalNode.new(aToken, aTokenPosition)
|
65
63
|
end
|
66
64
|
|
67
65
|
def multiplicity(lowerBound, upperBound)
|
68
|
-
|
66
|
+
Regex::Multiplicity.new(lowerBound, upperBound, :greedy)
|
69
67
|
end
|
70
68
|
|
71
69
|
# rubocop: disable Style/OptionalBooleanParameter
|
@@ -88,14 +86,14 @@ module SrlRuby
|
|
88
86
|
result = Regex::Character.new(aString)
|
89
87
|
end
|
90
88
|
|
91
|
-
|
89
|
+
result
|
92
90
|
end
|
93
91
|
# rubocop: enable Style/OptionalBooleanParameter
|
94
92
|
|
95
93
|
def char_range(lowerBound, upperBound)
|
96
94
|
lower = Regex::Character.new(lowerBound)
|
97
95
|
upper = Regex::Character.new(upperBound)
|
98
|
-
|
96
|
+
Regex::CharRange.new(lower, upper)
|
99
97
|
end
|
100
98
|
|
101
99
|
def char_class(toNegate, *theChildren)
|
@@ -283,7 +281,7 @@ module SrlRuby
|
|
283
281
|
end
|
284
282
|
|
285
283
|
# TODO check other implementations
|
286
|
-
|
284
|
+
Regex::CharClass.new(false, *alternatives)
|
287
285
|
end
|
288
286
|
|
289
287
|
# rule('character_class' => %w[NONE OF STRING_LIT]).tag 'none_of'
|
@@ -337,13 +335,13 @@ module SrlRuby
|
|
337
335
|
# What if literal is empty?...
|
338
336
|
|
339
337
|
raw_literal = theChildren[-1].token.lexeme.dup
|
340
|
-
|
338
|
+
string_literal(raw_literal)
|
341
339
|
end
|
342
340
|
|
343
341
|
# rule('raw' => %w[RAW STRING_LIT]).tag 'raw_literal'
|
344
342
|
def reduce_raw_literal(_production, _range, _tokens, theChildren)
|
345
343
|
raw_literal = theChildren[-1].token.lexeme.dup
|
346
|
-
|
344
|
+
Regex::RawExpression.new(raw_literal)
|
347
345
|
end
|
348
346
|
|
349
347
|
# rule('alternation' => %w[ANY OF LPAREN alternatives RPAREN]).tag 'any_of'
|
@@ -358,12 +356,12 @@ module SrlRuby
|
|
358
356
|
result = Regex::Alternation.new(*theChildren[3])
|
359
357
|
end
|
360
358
|
|
361
|
-
|
359
|
+
result
|
362
360
|
end
|
363
361
|
|
364
362
|
# rule('alternatives' => %w[alternatives separator quantifiable]).tag 'alternative_list'
|
365
363
|
def reduce_alternative_list(_production, _range, _tokens, theChildren)
|
366
|
-
|
364
|
+
theChildren[0] << theChildren[-1]
|
367
365
|
end
|
368
366
|
|
369
367
|
# rule('alternatives' => 'quantifiable').tag 'simple_alternative'
|
@@ -418,7 +416,7 @@ module SrlRuby
|
|
418
416
|
make_last_repetition_lazy(theChildren[1])
|
419
417
|
group = Regex::CapturingGroup.new(theChildren[1], name)
|
420
418
|
(_, until_expr) = theChildren[4]
|
421
|
-
|
419
|
+
Regex::Concatenation.new(group, until_expr)
|
422
420
|
end
|
423
421
|
|
424
422
|
# rule('quantifier' => 'ONCE').tag 'once'
|
data/lib/srl_ruby/tokenizer.rb
CHANGED
@@ -17,6 +17,16 @@ module SrlRuby
|
|
17
17
|
# Delimiters: parentheses '(' and ')'
|
18
18
|
# Separators: comma (optional)
|
19
19
|
class Tokenizer
|
20
|
+
PATT_CHAR_CLASS = /[^,"\s]{2,}/.freeze
|
21
|
+
PATT_DIGIT_LIT = /[0-9]((?=\s|,|\))|$)/.freeze
|
22
|
+
PATT_IDENTIFIER = /[a-zA-Z_][a-zA-Z0-9_]+/.freeze
|
23
|
+
PATT_INTEGER = /[0-9]{2,}((?=\s|,|\))|$)/.freeze # An integer has 2..* digits
|
24
|
+
PATT_LETTER_LIT = /[a-zA-Z]((?=\s|,|\))|$)/.freeze
|
25
|
+
PATT_NEWLINE = /(?:\r\n)|\r|\n/.freeze
|
26
|
+
PATT_STR_DBL_QUOTE = /"(?:\\"|[^"])*"/.freeze # Double quotes literal?
|
27
|
+
PATT_STR_SNGL_QUOTE = /'(?:\\'|[^'])*'/.freeze # Single quotes literal?
|
28
|
+
PATT_WHITESPACE = /[ \t\f]+/.freeze
|
29
|
+
|
20
30
|
# @return [StringScanner]
|
21
31
|
attr_reader(:scanner)
|
22
32
|
|
@@ -26,14 +36,14 @@ module SrlRuby
|
|
26
36
|
# @return [Integer] offset of start of current line within input
|
27
37
|
attr_reader(:line_start)
|
28
38
|
|
29
|
-
|
39
|
+
Lexeme2name = {
|
30
40
|
'(' => 'LPAREN',
|
31
41
|
')' => 'RPAREN',
|
32
42
|
',' => 'COMMA'
|
33
43
|
}.freeze
|
34
44
|
|
35
45
|
# Here are all the SRL keywords (in uppercase)
|
36
|
-
|
46
|
+
Keywords = %w[
|
37
47
|
ALL
|
38
48
|
ALREADY
|
39
49
|
AND
|
@@ -109,47 +119,55 @@ module SrlRuby
|
|
109
119
|
tok_sequence << token unless token.nil?
|
110
120
|
end
|
111
121
|
|
112
|
-
|
122
|
+
tok_sequence
|
113
123
|
end
|
114
124
|
|
115
125
|
private
|
116
126
|
|
117
127
|
def _next_token
|
118
|
-
skip_whitespaces
|
119
|
-
curr_ch = scanner.peek(1)
|
120
|
-
return nil if curr_ch.nil? || curr_ch.empty?
|
121
|
-
|
122
128
|
token = nil
|
123
129
|
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
130
|
+
# Loop until end of input reached or token found
|
131
|
+
until token || scanner.eos?
|
132
|
+
|
133
|
+
if scanner.skip(PATT_NEWLINE)
|
134
|
+
next_line_scanned
|
135
|
+
next
|
136
|
+
end
|
137
|
+
next if scanner.skip(PATT_WHITESPACE) # Skip whitespaces
|
138
|
+
|
139
|
+
curr_ch = scanner.peek(1)
|
140
|
+
|
141
|
+
token = if '(),'.include? curr_ch
|
142
|
+
# Delimiters, separators => single character token
|
143
|
+
build_token(Lexeme2name[curr_ch], scanner.getch)
|
144
|
+
elsif (lexeme = scanner.scan(PATT_INTEGER))
|
145
|
+
build_token('INTEGER', lexeme)
|
146
|
+
elsif (lexeme = scanner.scan(PATT_DIGIT_LIT))
|
147
|
+
build_token('DIGIT_LIT', lexeme)
|
148
|
+
elsif (lexeme = scanner.scan(PATT_STR_DBL_QUOTE))
|
149
|
+
unquoted = lexeme.gsub(/(^")|("$)/, '')
|
150
|
+
build_token('STRING_LIT', unquoted)
|
151
|
+
elsif (lexeme = scanner.scan(PATT_STR_SNGL_QUOTE))
|
152
|
+
unquoted = lexeme.gsub(/(^')|('$)/, '')
|
153
|
+
build_token('STRING_LIT', unquoted)
|
154
|
+
elsif (lexeme = scanner.scan(PATT_LETTER_LIT))
|
155
|
+
build_token('LETTER_LIT', lexeme)
|
156
|
+
elsif (lexeme = scanner.scan(PATT_IDENTIFIER))
|
157
|
+
keyw = Keywords[lexeme.upcase]
|
158
|
+
tok_type = keyw || 'IDENTIFIER'
|
159
|
+
build_token(tok_type, lexeme)
|
160
|
+
elsif (lexeme = scanner.scan(PATT_CHAR_CLASS))
|
161
|
+
build_token('CHAR_CLASS', lexeme)
|
162
|
+
else # Unknown token
|
163
|
+
erroneous = curr_ch.nil? ? '' : scanner.scan(/./)
|
164
|
+
sequel = scanner.scan(/.{1,20}/)
|
165
|
+
erroneous += sequel unless sequel.nil?
|
166
|
+
raise ScanError, "Unknown token #{erroneous} on line #{lineno}"
|
167
|
+
end
|
168
|
+
end # until
|
151
169
|
|
152
|
-
|
170
|
+
token
|
153
171
|
end
|
154
172
|
|
155
173
|
def build_token(aSymbolName, aLexeme)
|
@@ -162,38 +180,13 @@ module SrlRuby
|
|
162
180
|
raise e
|
163
181
|
end
|
164
182
|
|
165
|
-
|
166
|
-
end
|
167
|
-
|
168
|
-
def skip_whitespaces
|
169
|
-
pre_pos = scanner.pos
|
170
|
-
|
171
|
-
loop do
|
172
|
-
ws_found = false
|
173
|
-
found = scanner.skip(/[ \t\f]+/)
|
174
|
-
ws_found = true if found
|
175
|
-
found = scanner.skip(/(?:\r\n)|\r|\n/)
|
176
|
-
if found
|
177
|
-
ws_found = true
|
178
|
-
@lineno += 1
|
179
|
-
@line_start = scanner.pos
|
180
|
-
end
|
181
|
-
break unless ws_found
|
182
|
-
end
|
183
|
-
|
184
|
-
curr_pos = scanner.pos
|
185
|
-
return if curr_pos == pre_pos
|
186
|
-
# skipped = scanner.string.slice(Range.new(pre_pos, curr_pos))
|
187
|
-
# triplet = skipped.rpartition(/\n|\r/)
|
188
|
-
# @column = 1 unless triplet[1].empty?
|
189
|
-
|
190
|
-
# Correction for the tabs
|
191
|
-
# tab_count = triplet[2].chars.count { |ch| ch =~ /\t/ }
|
192
|
-
# @column += triplet[2].size + tab_count * (tab_size - 1) - 1
|
183
|
+
token
|
193
184
|
end
|
194
185
|
|
195
|
-
|
196
|
-
|
186
|
+
# Event: next line detected.
|
187
|
+
def next_line_scanned
|
188
|
+
@lineno += 1
|
189
|
+
@line_start = scanner.pos
|
197
190
|
end
|
198
191
|
end # class
|
199
192
|
end # module
|
data/lib/srl_ruby/version.rb
CHANGED
data/lib/srl_ruby.rb
CHANGED
@@ -14,7 +14,7 @@ module SrlRuby
|
|
14
14
|
File.open(filename, 'r') { |f| source = f.read }
|
15
15
|
return source if source.nil? || source.empty?
|
16
16
|
|
17
|
-
|
17
|
+
parse(source)
|
18
18
|
end
|
19
19
|
|
20
20
|
# Compile the given SRL expression into its Regexp equivalent.
|
@@ -52,6 +52,6 @@ module SrlRuby
|
|
52
52
|
# Now output the regexp literal
|
53
53
|
root = ast_ptree.root
|
54
54
|
options = root.is_a?(Regex::MatchOption) ? root.combine_opts : nil
|
55
|
-
|
55
|
+
Regexp.new(root.to_str, options)
|
56
56
|
end
|
57
57
|
end # module
|
@@ -32,7 +32,7 @@ module Acceptance
|
|
32
32
|
Terminal2NodeClass
|
33
33
|
end
|
34
34
|
|
35
|
-
# rule('rule_file' =>
|
35
|
+
# rule('rule_file' => 'srl_heading tests').as 'start_rule'
|
36
36
|
def reduce_start_rule(_production, _range, _tokens, theChildren)
|
37
37
|
rule_file = RuleFileTests.new(theChildren[0])
|
38
38
|
tests = theChildren.last.flatten
|
@@ -46,7 +46,7 @@ module Acceptance
|
|
46
46
|
end
|
47
47
|
end
|
48
48
|
|
49
|
-
|
49
|
+
rule_file
|
50
50
|
end
|
51
51
|
|
52
52
|
# rule('srl_heading' => %w[SRL: SRL_SOURCE]).as 'srl_source'
|
@@ -54,16 +54,6 @@ module Acceptance
|
|
54
54
|
theChildren.last
|
55
55
|
end
|
56
56
|
|
57
|
-
# rule('srl_tests' => %w[srl_tests single_test]).as 'test_list'
|
58
|
-
def reduce_test_list(_production, _range, _tokens, theChildren)
|
59
|
-
theChildren[0] << theChildren[1]
|
60
|
-
end
|
61
|
-
|
62
|
-
# rule('srl_tests' => 'single_test').as 'one_test'
|
63
|
-
def reduce_one_test(_production, _range, _tokens, theChildren)
|
64
|
-
[theChildren.last]
|
65
|
-
end
|
66
|
-
|
67
57
|
# rule('match_test' => %w[MATCH: STRING_LIT]).as 'match_string'
|
68
58
|
def reduce_match_string(_production, _range, _tokens, theChildren)
|
69
59
|
MatchTest.new(theChildren.last)
|
@@ -85,17 +75,6 @@ module Acceptance
|
|
85
75
|
theChildren[2]
|
86
76
|
end
|
87
77
|
|
88
|
-
# rule('capture_expectations' => %w[capture_expectations
|
89
|
-
# single_expectation]).as 'assertion_list'
|
90
|
-
def reduce_assertion_list(_production, _range, _tokens, theChildren)
|
91
|
-
theChildren[0] << theChildren[1]
|
92
|
-
end
|
93
|
-
|
94
|
-
# rule('capture_expectations' => 'single_expectation').as 'one_expectation'
|
95
|
-
def reduce_one_expectation(_production, _range, _tokens, theChildren)
|
96
|
-
[theChildren.last]
|
97
|
-
end
|
98
|
-
|
99
78
|
# rule('single_expectation' => %w[DASH INTEGER COLON capture_variable
|
100
79
|
# COLON STRING_LIT]).as 'capture_expectation'
|
101
80
|
def reduce_capture_expectation(_production, _range, _tokens, theChildren)
|
@@ -6,7 +6,6 @@ require 'rley' # Load the Rley gem
|
|
6
6
|
# Grammar for Test-Rule files
|
7
7
|
# [File format](https://github.com/SimpleRegex/Test-Rules/blob/master/README.md)
|
8
8
|
########################################
|
9
|
-
# Define a grammar for basic arithmetical expressions
|
10
9
|
builder = Rley::grammar_builder do
|
11
10
|
# Punctuation
|
12
11
|
add_terminals('COLON', 'DASH')
|
@@ -19,24 +18,20 @@ builder = Rley::grammar_builder do
|
|
19
18
|
add_terminals('INTEGER', 'STRING_LIT')
|
20
19
|
add_terminals('IDENTIFIER', 'SRL_SOURCE')
|
21
20
|
|
22
|
-
rule('rule_file' => 'srl_heading
|
23
|
-
rule('srl_heading' => 'SRL SRL_SOURCE').
|
24
|
-
rule('
|
25
|
-
rule('
|
26
|
-
rule('
|
27
|
-
rule('
|
28
|
-
rule('
|
29
|
-
rule('
|
30
|
-
rule('
|
31
|
-
rule('
|
32
|
-
rule('
|
33
|
-
rule('
|
34
|
-
rule('
|
35
|
-
rule('
|
36
|
-
rule('capture_expectations' => 'single_expectation').as 'one_expectation'
|
37
|
-
rule('single_expectation' => 'DASH INTEGER COLON capture_variable COLON STRING_LIT').as 'capture_expectation'
|
38
|
-
rule('capture_variable' => 'INTEGER').as 'var_integer'
|
39
|
-
rule('capture_variable' => 'IDENTIFIER').as 'var_identifier'
|
21
|
+
rule('rule_file' => 'srl_heading srl_test+').tag 'start_rule'
|
22
|
+
rule('srl_heading' => 'SRL SRL_SOURCE').tag 'srl_source'
|
23
|
+
rule('srl_test' => 'atomic_test').tag 'single_atomic_test'
|
24
|
+
rule('srl_test' => 'compound_test').tag 'single_compound_test'
|
25
|
+
rule('atomic_test' => 'match_test').tag 'atomic_match'
|
26
|
+
rule('atomic_test' => 'no_match_test').tag 'atomic_no_match'
|
27
|
+
rule('compound_test' => 'capture_test').tag 'compound_capture'
|
28
|
+
rule('match_test' => 'MATCH STRING_LIT').tag 'match_string'
|
29
|
+
rule('no_match_test' => 'NO MATCH STRING_LIT').tag 'no_match_string'
|
30
|
+
rule('capture_test' => 'capture_heading capture_expectation+').tag 'capture_test'
|
31
|
+
rule('capture_heading' => 'CAPTURE FOR STRING_LIT COLON').tag 'capture_string'
|
32
|
+
rule('capture_expectation' => 'DASH INTEGER COLON capture_variable COLON STRING_LIT').tag 'capture_expectation'
|
33
|
+
rule('capture_variable' => 'INTEGER').tag 'var_integer'
|
34
|
+
rule('capture_variable' => 'IDENTIFIER').tag 'var_identifier'
|
40
35
|
end
|
41
36
|
|
42
37
|
# And now build the grammar...
|
@@ -14,7 +14,7 @@ module Acceptance # This module is used as a namespace
|
|
14
14
|
File.open(filename, 'r') { |f| source = f.read }
|
15
15
|
return source if source.nil? || source.empty?
|
16
16
|
|
17
|
-
|
17
|
+
parse(source)
|
18
18
|
end
|
19
19
|
|
20
20
|
# Parse the rule file
|
@@ -41,8 +41,7 @@ module Acceptance # This module is used as a namespace
|
|
41
41
|
ast_ptree = engine.convert(result)
|
42
42
|
|
43
43
|
# Now output the regexp literal
|
44
|
-
|
45
|
-
return root
|
44
|
+
ast_ptree.root
|
46
45
|
end
|
47
46
|
end
|
48
47
|
end # module
|
@@ -58,7 +58,7 @@ module Acceptance
|
|
58
58
|
tok_sequence << token unless token.nil?
|
59
59
|
end
|
60
60
|
|
61
|
-
|
61
|
+
tok_sequence
|
62
62
|
end
|
63
63
|
|
64
64
|
private
|
@@ -68,13 +68,11 @@ module Acceptance
|
|
68
68
|
curr_ch = scanner.peek(1)
|
69
69
|
return nil if curr_ch.nil? || curr_ch.empty?
|
70
70
|
|
71
|
-
|
71
|
+
if state == :default
|
72
72
|
default_mode
|
73
73
|
else
|
74
74
|
expecting_srl
|
75
75
|
end
|
76
|
-
|
77
|
-
return token
|
78
76
|
end
|
79
77
|
|
80
78
|
def default_mode
|
@@ -103,7 +101,7 @@ module Acceptance
|
|
103
101
|
raise ScanError, "Unknown token #{erroneous}"
|
104
102
|
end
|
105
103
|
|
106
|
-
|
104
|
+
token
|
107
105
|
end
|
108
106
|
|
109
107
|
def expecting_srl
|
@@ -123,7 +121,7 @@ module Acceptance
|
|
123
121
|
raise e
|
124
122
|
end
|
125
123
|
|
126
|
-
|
124
|
+
token
|
127
125
|
end
|
128
126
|
|
129
127
|
def skip_noise
|
@@ -152,7 +150,7 @@ module Acceptance
|
|
152
150
|
end
|
153
151
|
|
154
152
|
curr_pos = scanner.pos
|
155
|
-
|
153
|
+
curr_pos != pre_pos
|
156
154
|
end
|
157
155
|
|
158
156
|
def skip_comment
|
@@ -53,6 +53,7 @@ module Regex # Open this namespace, to get rid of scope qualifiers
|
|
53
53
|
end
|
54
54
|
end # context
|
55
55
|
|
56
|
+
# rubocop: disable Style/DocumentDynamicEvalDefinition
|
56
57
|
context 'Provided services' do
|
57
58
|
it 'Should know its lexeme if created from a string' do
|
58
59
|
# Lexeme is defined when the character was initialised from a text
|
@@ -89,7 +90,6 @@ module Regex # Open this namespace, to get rid of scope qualifiers
|
|
89
90
|
|
90
91
|
# Try with our escape sequence samples
|
91
92
|
(SampleDigrams + SampleNumEscs).each do |escape_seq|
|
92
|
-
|
93
93
|
# Build a string from escape sequence literal
|
94
94
|
expectation = String.class_eval(%Q|"#{escape_seq}"|, __FILE__, __LINE__)
|
95
95
|
new_ch = Character.new(escape_seq).to_str
|
@@ -115,7 +115,6 @@ module Regex # Open this namespace, to get rid of scope qualifiers
|
|
115
115
|
|
116
116
|
# Try with our escape sequence samples
|
117
117
|
(SampleDigrams + SampleNumEscs).each do |escape_seq|
|
118
|
-
|
119
118
|
# Get ordinal value of given escape sequence
|
120
119
|
expectation = String.class_eval(%Q|"#{escape_seq}".ord()|, __FILE__, __LINE__)
|
121
120
|
expect(Character.new(escape_seq).codepoint).to eq(expectation)
|
@@ -151,7 +150,7 @@ module Regex # Open this namespace, to get rid of scope qualifiers
|
|
151
150
|
# Case 5: test fails with multiple character strings
|
152
151
|
expect(newOne).not_to eq('03a3')
|
153
152
|
|
154
|
-
# Case 6: equality testing with
|
153
|
+
# Case 6: equality testing with arbitrary object
|
155
154
|
expect(newOne).not_to eq(nil)
|
156
155
|
expect(newOne).not_to eq(Object.new)
|
157
156
|
|
@@ -173,6 +172,7 @@ module Regex # Open this namespace, to get rid of scope qualifiers
|
|
173
172
|
expect(ch2.explain).to eq("the character '\u03a3'")
|
174
173
|
end
|
175
174
|
end # context
|
175
|
+
# rubocop: enable Style/DocumentDynamicEvalDefinition
|
176
176
|
# rubocop: enable Lint/ConstantDefinitionInBlock
|
177
177
|
end # describe
|
178
178
|
end # module
|
data/srl_ruby.gemspec
CHANGED
@@ -68,11 +68,11 @@ SUMMARY
|
|
68
68
|
spec.required_ruby_version = '>= 2.5.0'
|
69
69
|
|
70
70
|
# Runtime dependencies
|
71
|
-
spec.add_dependency 'rley', '~> 0.8.
|
71
|
+
spec.add_dependency 'rley', '~> 0.8.11'
|
72
72
|
|
73
73
|
# Development dependencies
|
74
|
-
spec.add_development_dependency 'bundler', '
|
75
|
-
spec.add_development_dependency 'cucumber', '>=
|
76
|
-
spec.add_development_dependency 'rake', '
|
74
|
+
spec.add_development_dependency 'bundler', '>= 2.2.0'
|
75
|
+
spec.add_development_dependency 'cucumber', '>= 3.1.2'
|
76
|
+
spec.add_development_dependency 'rake', '>= 12.0'
|
77
77
|
spec.add_development_dependency 'rspec', '~> 3.0'
|
78
78
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: srl_ruby
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.4.
|
4
|
+
version: 0.4.12
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dimitri Geshef
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2022-04-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rley
|
@@ -16,26 +16,26 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: 0.8.
|
19
|
+
version: 0.8.11
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: 0.8.
|
26
|
+
version: 0.8.11
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: bundler
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
|
-
- - "
|
31
|
+
- - ">="
|
32
32
|
- !ruby/object:Gem::Version
|
33
33
|
version: 2.2.0
|
34
34
|
type: :development
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
|
-
- - "
|
38
|
+
- - ">="
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: 2.2.0
|
41
41
|
- !ruby/object:Gem::Dependency
|
@@ -44,26 +44,26 @@ dependencies:
|
|
44
44
|
requirements:
|
45
45
|
- - ">="
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version:
|
47
|
+
version: 3.1.2
|
48
48
|
type: :development
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
52
|
- - ">="
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version:
|
54
|
+
version: 3.1.2
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
56
|
name: rake
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
58
58
|
requirements:
|
59
|
-
- - "
|
59
|
+
- - ">="
|
60
60
|
- !ruby/object:Gem::Version
|
61
61
|
version: '12.0'
|
62
62
|
type: :development
|
63
63
|
prerelease: false
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
65
65
|
requirements:
|
66
|
-
- - "
|
66
|
+
- - ">="
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: '12.0'
|
69
69
|
- !ruby/object:Gem::Dependency
|
@@ -206,7 +206,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
206
206
|
- !ruby/object:Gem::Version
|
207
207
|
version: '0'
|
208
208
|
requirements: []
|
209
|
-
rubygems_version: 3.
|
209
|
+
rubygems_version: 3.3.7
|
210
210
|
signing_key:
|
211
211
|
specification_version: 4
|
212
212
|
summary: A parser for the [Simple Regex Language](https://simple-regex.com/). It translates
|