srl_ruby 0.2.5 → 0.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 13bb293008059e97968eab3b976e05f7fe9c353e
4
- data.tar.gz: 933e9531ae294fe4adb46e3c7ef87c58a3243388
3
+ metadata.gz: 475a892be14d441ff82714324f489707839eae57
4
+ data.tar.gz: 52ad8fffe6fb8abe17e99e690dc182bb0b3a48b0
5
5
  SHA512:
6
- metadata.gz: 1c585bfad3f6330b87f47ab7c1dae115bc953a50220bc3d87642524c523cfbe1ae19193cfc0173730d456ec6691611abd59964ea4722a42bdbb1571f6c06ce9c
7
- data.tar.gz: f835d81332961e020addee4ec8b5e4c46378f29ad12a7497fb03f5545eee1c884b1da5b8a621d4f76713b348b3b324fc96c4eb9f60b98037c1c7e9b98ab5d9c6
6
+ metadata.gz: 24b028b5becb5f2ac57c6fbf1aaa5eb45d7b3e3bde1e8ea2892b56f7d67fc643bf8d28cff0e431e7a3f97c996b0c44321bfd52eb17d818daeb51a334cb2d056b
7
+ data.tar.gz: b21b2fefc35eac997bd06a0a4a7bc586d80eba2074cfb365c7a22a4e6390e48a45960df3132b09abc2e53569c5c6ad4f925510191808212ba6a8b991dfc259a8
data/CHANGELOG.md CHANGED
@@ -1,5 +1,14 @@
1
+ ## [0.2.6] - 2018-04-03
2
+ SrlRuby passes 13 tests out of 15 from standard SRL test suite.
3
+ ### Changed
4
+ - Class `SrlRuby#Tokenizer` added CHAR_CLASS literal and keywords EITHER, NONE, EITHER,
5
+ - Grammar expanded to support 'CARRIAGE RETURN', 'VERTICAL TAB', 'WORD', 'NO WORD' expressions
6
+ - Class `SrlRuby::ASTBuilder` updates to reflect changes in the grammar.
7
+ - File `acceptance/srl_test_suite_spec.rb`. 13 test files from official test suite are passing.
8
+
9
+
1
10
  ## [0.2.5] - 2018-04-02
2
- SrlRuby passes 12 tests out of 15 standard SRL tests in total.
11
+ SrlRuby passes 12 tests out of 15 from standard SRL test suite.
3
12
  ### Changed
4
13
  - Class `SrlRuby#Tokenizer` added keywords CARRIAGE, RETURN, VERTICAL, WORD
5
14
  - Grammar expanded to support 'CARRIAGE RETURN', 'VERTICAL TAB', 'WORD', 'NO WORD' expressions
@@ -8,7 +17,7 @@ SrlRuby passes 12 tests out of 15 standard SRL tests in total.
8
17
 
9
18
 
10
19
  ## [0.2.4] - 2018-04-02
11
- SrlRuby passes 10 tests out of 15 standard SRL tests in total.
20
+ SrlRuby passes 10 tests out of 15 from standard SRL test suite.
12
21
  ### Changed
13
22
  - File `lib/srl_ruby/grammar.rb` grammar refactoring. Added support for new 'no digit' SRL expression.
14
23
  - Class `SrlRuby::ASTBuilder` updates to reflect changes in the grammar.
@@ -27,7 +36,7 @@ SrlRuby passes 10 tests out of 15 standard SRL tests in total.
27
36
  - Fixed rley version dependency
28
37
 
29
38
  ## [0.2.1] - 2018-03-15
30
- SrlRuby passes 7 tests out of 15 standard SRL tests in total.
39
+ SrlRuby passes 7 tests out of 15 from standard SRL test suite.
31
40
  ### Changed
32
41
  - File `acceptance/srl_test_suite_spec.rb`. More examples in spec file.
33
42
  - File `ast_builder.rb` updates to reflect grammar changes.
@@ -39,7 +48,7 @@ SrlRuby passes 7 tests out of 15 standard SRL tests in total.
39
48
  - File `ast_builder.rb` fixed anchor implementation.
40
49
 
41
50
  ## [0.2.0] - 2018-03-14
42
- SrlRuby passes 3 standard out of 15 standard SRL tests in total.
51
+ SrlRuby passes 3 tests out of 15 from standard SRL test suite.
43
52
  ### Added
44
53
  - Added `spec/acceptance/support` directory. It contains test harness to use the .rule files from standard SRL test suite.
45
54
  - Added `acceptance/srl_test_suite_spec.rb`file. Spec file designed to standard SRL test suite. At this date, SrlRuby passes 3 tests out of 15 tests in total.
@@ -0,0 +1,23 @@
1
+ require_relative 'atomic_expression' # Access the superclass
2
+
3
+ module Regex # This module is used as a namespace
4
+ # A raw expression is a string that will be copied verbatim (as is)
5
+ # in the generated regular expression.
6
+ class RawExpression < AtomicExpression
7
+ attr_reader :raw
8
+
9
+ # Constructor
10
+ def initialize(rawLiteral)
11
+ super()
12
+ @raw = rawLiteral
13
+ end
14
+
15
+ protected
16
+
17
+ # Conversion method re-definition.
18
+ # Purpose: Return the String representation of the expression.
19
+ alias_method :text_repr, :raw
20
+ end # class
21
+ end # module
22
+
23
+ # End of file
@@ -272,15 +272,24 @@ module SrlRuby
272
272
  return Regex::CharClass.new(false, *alternatives)
273
273
  end
274
274
 
275
+ # rule('character_class' => %w[NONE OF STRING_LIT]).as 'none_of'
276
+ def reduce_none_of(_production, _range, _tokens, theChildren)
277
+ raw_literal = theChildren[-1].token.lexeme.dup
278
+ chars = raw_literal.chars.map do |ch|
279
+ Regex::Character.new(ch)
280
+ end
281
+ Regex::CharClass.new(true, *chars)
282
+ end
283
+
275
284
  # rule('special_char' => 'TAB').as 'tab'
276
285
  def reduce_tab(_production, _range, _tokens, _children)
277
286
  Regex::Character.new('\t')
278
287
  end
279
-
288
+
280
289
  # rule('special_char' => ' VERTICAL TAB').as 'vtab'
281
290
  def reduce_vtab(_production, _range, _tokens, _children)
282
291
  Regex::Character.new('\v')
283
- end
292
+ end
284
293
 
285
294
  # rule('special_char' => 'BACKSLASH').as 'backslash'
286
295
  def reduce_backslash(_production, _range, _tokens, _children)
@@ -293,7 +302,7 @@ module SrlRuby
293
302
  # TODO: control portability
294
303
  Regex::Character.new('\n')
295
304
  end
296
-
305
+
297
306
  # rule('special_char' => %w[CARRIAGE RETURN]).as 'carriage_return'
298
307
  def reduce_carriage_return(_production, _range, _tokens, _children)
299
308
  Regex::Character.new('\r')
@@ -303,11 +312,11 @@ module SrlRuby
303
312
  def reduce_word(_production, _range, _tokens, _children)
304
313
  Regex::Anchor.new('\b')
305
314
  end
306
-
315
+
307
316
  # rule('special_char' => %w[NO WORD]).as 'no word'
308
317
  def reduce_no_word(_production, _range, _tokens, _children)
309
318
  Regex::Anchor.new('\B')
310
- end
319
+ end
311
320
 
312
321
  # rule('literal' => %w[LITERALLY STRING_LIT]).as 'literally'
313
322
  def reduce_literally(_production, _range, _tokens, theChildren)
@@ -317,6 +326,12 @@ module SrlRuby
317
326
  return string_literal(raw_literal)
318
327
  end
319
328
 
329
+ # rule('raw' => %w[RAW STRING_LIT]).as 'raw_literal'
330
+ def reduce_raw_literal(_production, _range, _tokens, theChildren)
331
+ raw_literal = theChildren[-1].token.lexeme.dup
332
+ return Regex::RawExpression.new(raw_literal)
333
+ end
334
+
320
335
  # rule('alternation' => %w[ANY OF LPAREN alternatives RPAREN]).as 'any_of'
321
336
  def reduce_any_of(_production, _range, _tokens, theChildren)
322
337
  first_alternative = theChildren[3].first
@@ -4,17 +4,22 @@ module SrlRuby
4
4
  ########################################
5
5
  # SRL grammar
6
6
  builder = Rley::Syntax::GrammarBuilder.new do
7
+ # Separators...
7
8
  add_terminals('LPAREN', 'RPAREN', 'COMMA')
8
- add_terminals('DIGIT_LIT', 'INTEGER', 'LETTER_LIT')
9
+
10
+ # Literal values...
11
+ add_terminals('DIGIT_LIT', 'INTEGER', 'LETTER_LIT', 'CHAR_CLASS')
9
12
  add_terminals('LITERALLY', 'STRING_LIT', 'IDENTIFIER')
13
+
14
+ # Keywords...
10
15
  add_terminals('BEGIN', 'STARTS', 'WITH')
11
- add_terminals('MUST', 'END')
16
+ add_terminals('MUST', 'END', 'RAW')
12
17
  add_terminals('UPPERCASE', 'LETTER', 'FROM', 'TO')
13
- add_terminals('DIGIT', 'NUMBER', 'ANY', 'NO')
18
+ add_terminals('DIGIT', 'NUMBER', 'ANY', 'EITHER', 'NO')
14
19
  add_terminals('CHARACTER', 'WHITESPACE', 'ANYTHING')
15
20
  add_terminals('TAB', 'BACKSLASH', 'NEW', 'LINE', 'WORD')
16
- add_terminals('CARRIAGE', 'RETURN', 'VERTICAL', 'OF', 'ONE')
17
- add_terminals('EXACTLY', 'TIMES', 'ONCE', 'TWICE')
21
+ add_terminals('CARRIAGE', 'RETURN', 'VERTICAL', 'OF', 'NONE')
22
+ add_terminals('ONE', 'EXACTLY', 'TIMES', 'ONCE', 'TWICE')
18
23
  add_terminals('BETWEEN', 'AND', 'OPTIONAL', 'OR')
19
24
  add_terminals('MORE', 'NEVER', 'AT', 'LEAST')
20
25
  add_terminals('IF', 'FOLLOWED', 'BY', 'NOT')
@@ -23,6 +28,7 @@ module SrlRuby
23
28
  add_terminals('CASE', 'INSENSITIVE', 'MULTI', 'ALL')
24
29
  add_terminals('LAZY')
25
30
 
31
+ # Grammar rules...
26
32
  rule('srl' => 'expression').as 'start_rule'
27
33
  rule('expression' => %w[pattern flags]).as 'flagged_expr'
28
34
  rule('expression' => 'pattern').as 'simple_expr'
@@ -60,6 +66,7 @@ module SrlRuby
60
66
  rule('atom' => 'character_class').as 'character_class_atom'
61
67
  rule('atom' => 'special_char').as 'special_char_atom'
62
68
  rule('atom' => 'literal').as 'literal_atom'
69
+ rule('atom' => 'raw').as 'raw_atom'
63
70
  rule('letter_range' => %w[LETTER FROM LETTER_LIT TO LETTER_LIT]).as 'lowercase_from_to'
64
71
  rule('letter_range' => %w[UPPERCASE LETTER FROM LETTER_LIT TO LETTER_LIT]).as 'uppercase_from_to'
65
72
  rule('letter_range' => 'LETTER').as 'any_lowercase'
@@ -72,7 +79,12 @@ module SrlRuby
72
79
  rule('character_class' => 'WHITESPACE').as 'whitespace'
73
80
  rule('character_class' => %w[NO WHITESPACE]).as 'no_whitespace'
74
81
  rule('character_class' => 'ANYTHING').as 'anything'
75
- rule('character_class' => %w[ONE OF STRING_LIT]).as 'one_of'
82
+ rule('character_class' => %w[ONE OF cclass]).as 'one_of'
83
+ rule('character_class' => %w[NONE OF cclass]).as 'none_of'
84
+ rule('cclass' => 'STRING_LIT').as 'quoted_cclass' # Preferred syntax
85
+ rule('cclass' => 'INTEGER').as 'digits_cclass'
86
+ rule('cclass' => 'IDENTIFIER').as 'identifier_cclass'
87
+ rule('cclass' => 'CHAR_CLASS').as 'unquoted_cclass'
76
88
  rule('special_char' => 'TAB').as 'tab'
77
89
  rule('special_char' => 'VERTICAL TAB').as 'vtab'
78
90
  rule('special_char' => 'BACKSLASH').as 'backslash'
@@ -81,9 +93,12 @@ module SrlRuby
81
93
  rule('special_char' => %w[WORD]).as 'word'
82
94
  rule('special_char' => %w[NO WORD]).as 'no_word'
83
95
  rule('literal' => %w[LITERALLY STRING_LIT]).as 'literally'
84
- rule('alternation' => %w[ANY OF LPAREN alternatives RPAREN]).as 'any_of'
96
+ rule('raw' => 'RAW STRING_LIT').as 'raw_literal'
97
+ rule('alternation' => %w[any_or_either OF LPAREN alternatives RPAREN]).as 'any_of'
85
98
  rule('alternatives' => %w[alternatives separator quantifiable]).as 'alternative_list'
86
99
  rule('alternatives' => 'quantifiable').as 'simple_alternative'
100
+ rule('any_or_either' => 'ANY').as 'any_keyword'
101
+ rule('any_or_either' => 'EITHER').as 'either_keyword'
87
102
  rule('grouping' => %w[LPAREN pattern RPAREN]).as 'grouping_parenthenses'
88
103
  rule('capturing_group' => %w[CAPTURE assertable]).as 'capture'
89
104
  rule('capturing_group' => %w[CAPTURE assertable UNTIL assertable]).as 'capture_until'
@@ -11,4 +11,5 @@ require_relative '../regex/non_capturing_group'
11
11
  require_relative '../regex/anchor'
12
12
  require_relative '../regex/lookaround'
13
13
  require_relative '../regex/capturing_group'
14
- require_relative '../regex/match_option'
14
+ require_relative '../regex/match_option'
15
+ require_relative '../regex/raw_expression'
@@ -42,6 +42,7 @@ module SrlRuby
42
42
  CASE
43
43
  CHARACTER
44
44
  DIGIT
45
+ EITHER
45
46
  END
46
47
  EXACTLY
47
48
  FOLLOWED
@@ -60,6 +61,7 @@ module SrlRuby
60
61
  NEVER
61
62
  NEW
62
63
  NO
64
+ NONE
63
65
  NOT
64
66
  NUMBER
65
67
  OF
@@ -67,6 +69,7 @@ module SrlRuby
67
69
  ONE
68
70
  OPTIONAL
69
71
  OR
72
+ RAW
70
73
  RETURN
71
74
  STARTS
72
75
  TAB
@@ -111,22 +114,24 @@ module SrlRuby
111
114
  if '(),'.include? curr_ch
112
115
  # Delimiters, separators => single character token
113
116
  token = build_token(@@lexeme2name[curr_ch], scanner.getch)
114
- elsif (lexeme = scanner.scan(/[0-9]{2,}/))
117
+ elsif (lexeme = scanner.scan(/[0-9]{2,}((?=\s)|$)/))
115
118
  token = build_token('INTEGER', lexeme) # An integer has 2..* digits
116
- elsif (lexeme = scanner.scan(/[0-9]/))
119
+ elsif (lexeme = scanner.scan(/[0-9]((?=\s)|$)/))
117
120
  token = build_token('DIGIT_LIT', lexeme)
118
- elsif (lexeme = scanner.scan(/[a-zA-Z_][a-zA-Z0-9_]+/))
119
- keyw = @@keywords[lexeme.upcase]
120
- tok_type = keyw ? keyw : 'IDENTIFIER'
121
- token = build_token(tok_type, lexeme)
122
- elsif (lexeme = scanner.scan(/[a-zA-Z]((?=\s)|$)/))
123
- token = build_token('LETTER_LIT', lexeme)
124
121
  elsif (lexeme = scanner.scan(/"(?:\\"|[^"])*"/)) # Double quotes literal?
125
122
  unquoted = lexeme.gsub(/(^")|("$)/, '')
126
123
  token = build_token('STRING_LIT', unquoted)
127
124
  elsif (lexeme = scanner.scan(/'(?:\\'|[^'])*'/)) # Single quotes literal?
128
125
  unquoted = lexeme.gsub(/(^')|('$)/, '')
129
126
  token = build_token('STRING_LIT', unquoted)
127
+ elsif (lexeme = scanner.scan(/[a-zA-Z]((?=\s)|$)/))
128
+ token = build_token('LETTER_LIT', lexeme)
129
+ elsif (lexeme = scanner.scan(/[a-zA-Z_][a-zA-Z0-9_]+/))
130
+ keyw = @@keywords[lexeme.upcase]
131
+ tok_type = keyw ? keyw : 'IDENTIFIER'
132
+ token = build_token(tok_type, lexeme)
133
+ elsif (lexeme = scanner.scan(/[^,"\s]{2,}/))
134
+ token = build_token('CHAR_CLASS', lexeme)
130
135
  else # Unknown token
131
136
  erroneous = curr_ch.nil? ? '' : curr_ch
132
137
  sequel = scanner.scan(/.{1,20}/)
@@ -1,3 +1,3 @@
1
1
  module SrlRuby
2
- VERSION = '0.2.5'.freeze
2
+ VERSION = '0.2.6'.freeze
3
3
  end
@@ -71,6 +71,11 @@ RSpec.describe Acceptance do
71
71
  rule_file_repr = load_file('nondigit.rule')
72
72
  test_rule_file(rule_file_repr)
73
73
  end
74
+
75
+ it 'should support negative character class' do
76
+ rule_file_repr = load_file('none_of.rule')
77
+ test_rule_file(rule_file_repr)
78
+ end
74
79
 
75
80
  it 'should match a tab' do
76
81
  rule_file_repr = load_file('tab.rule')
@@ -44,6 +44,11 @@ describe SrlRuby do
44
44
  regexp = SrlRuby.parse("literally '.'")
45
45
  expect(regexp.source).to eq('\.')
46
46
  end
47
+
48
+ it 'should parse single quotes literal string' do
49
+ regexp = SrlRuby.parse('literally "an", whitespace, raw "[a-zA-Z]"')
50
+ expect(regexp.source).to eq('an\s[a-zA-Z]')
51
+ end
47
52
  end # context
48
53
 
49
54
  context 'Parsing character classes:' do
@@ -93,6 +98,41 @@ describe SrlRuby do
93
98
  # (escapes more characters than required)
94
99
  expect(regexp.source).to eq('[._%+\-]')
95
100
  end
101
+
102
+ it "should parse 'one of' with unquoted character class syntax" do
103
+ # Case of digit sequence
104
+ regexp = SrlRuby.parse('one of 13579, must end')
105
+ expect(regexp.source).to eq('[13579]$')
106
+
107
+ # Case of identifier-like character class
108
+ regexp = SrlRuby.parse('one of abcd, must end')
109
+ expect(regexp.source).to eq('[abcd]$')
110
+
111
+ # Case of arbitrary character class
112
+ regexp = SrlRuby.parse('one of 12hms:, must end')
113
+ expect(regexp.source).to eq('[12hms:]$')
114
+ end
115
+
116
+ it "should parse 'none of' syntax" do
117
+ regexp = SrlRuby.parse('none of "._%+-"')
118
+ # Remark: reference implementation less readable
119
+ # (escapes more characters than required)
120
+ expect(regexp.source).to eq('[^._%+\-]')
121
+ end
122
+
123
+ it "should parse 'none of' with unquoted character class syntax" do
124
+ # Case of digit sequence
125
+ regexp = SrlRuby.parse('none of 13579, must end')
126
+ expect(regexp.source).to eq('[^13579]$')
127
+
128
+ # Case of identifier-like character class
129
+ regexp = SrlRuby.parse('none of abcd, must end')
130
+ expect(regexp.source).to eq('[^abcd]$')
131
+
132
+ # Case of arbitrary character class
133
+ regexp = SrlRuby.parse('none of 12hms:^, must end')
134
+ expect(regexp.source).to eq('[^12hms:\^]$')
135
+ end
96
136
  end # context
97
137
 
98
138
  context 'Parsing special character declarations:' do
@@ -137,6 +177,12 @@ describe SrlRuby do
137
177
  source = 'any of (any character, one of "._%-+")'
138
178
  regexp = SrlRuby.parse(source)
139
179
  expect(regexp.source).to eq('(?:\w|[._%\-+])')
180
+ end
181
+
182
+ it "should parse 'either of' syntax" do
183
+ source = 'either of (any character, one of "._%-+")'
184
+ regexp = SrlRuby.parse(source)
185
+ expect(regexp.source).to eq('(?:\w|[._%\-+])')
140
186
  end
141
187
 
142
188
  it 'should anchor as alternative' do
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: srl_ruby
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.5
4
+ version: 0.2.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dimitri Geshef
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-04-02 00:00:00.000000000 Z
11
+ date: 2018-04-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rley
@@ -104,6 +104,7 @@ files:
104
104
  - lib/regex/non_capturing_group.rb
105
105
  - lib/regex/polyadic_expression.rb
106
106
  - lib/regex/quantifiable.rb
107
+ - lib/regex/raw_expression.rb
107
108
  - lib/regex/repetition.rb
108
109
  - lib/regex/wildcard.rb
109
110
  - lib/srl_ruby.rb