srl_ruby 0.2.5 → 0.2.6

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 13bb293008059e97968eab3b976e05f7fe9c353e
4
- data.tar.gz: 933e9531ae294fe4adb46e3c7ef87c58a3243388
3
+ metadata.gz: 475a892be14d441ff82714324f489707839eae57
4
+ data.tar.gz: 52ad8fffe6fb8abe17e99e690dc182bb0b3a48b0
5
5
  SHA512:
6
- metadata.gz: 1c585bfad3f6330b87f47ab7c1dae115bc953a50220bc3d87642524c523cfbe1ae19193cfc0173730d456ec6691611abd59964ea4722a42bdbb1571f6c06ce9c
7
- data.tar.gz: f835d81332961e020addee4ec8b5e4c46378f29ad12a7497fb03f5545eee1c884b1da5b8a621d4f76713b348b3b324fc96c4eb9f60b98037c1c7e9b98ab5d9c6
6
+ metadata.gz: 24b028b5becb5f2ac57c6fbf1aaa5eb45d7b3e3bde1e8ea2892b56f7d67fc643bf8d28cff0e431e7a3f97c996b0c44321bfd52eb17d818daeb51a334cb2d056b
7
+ data.tar.gz: b21b2fefc35eac997bd06a0a4a7bc586d80eba2074cfb365c7a22a4e6390e48a45960df3132b09abc2e53569c5c6ad4f925510191808212ba6a8b991dfc259a8
data/CHANGELOG.md CHANGED
@@ -1,5 +1,14 @@
1
+ ## [0.2.6] - 2018-04-03
2
+ SrlRuby passes 13 tests out of 15 from standard SRL test suite.
3
+ ### Changed
4
+ - Class `SrlRuby#Tokenizer` added CHAR_CLASS literal and keywords EITHER, NONE, EITHER,
5
+ - Grammar expanded to support 'CARRIAGE RETURN', 'VERTICAL TAB', 'WORD', 'NO WORD' expressions
6
+ - Class `SrlRuby::ASTBuilder` updates to reflect changes in the grammar.
7
+ - File `acceptance/srl_test_suite_spec.rb`. 13 test files from official test suite are passing.
8
+
9
+
1
10
  ## [0.2.5] - 2018-04-02
2
- SrlRuby passes 12 tests out of 15 standard SRL tests in total.
11
+ SrlRuby passes 12 tests out of 15 from standard SRL test suite.
3
12
  ### Changed
4
13
  - Class `SrlRuby#Tokenizer` added keywords CARRIAGE, RETURN, VERTICAL, WORD
5
14
  - Grammar expanded to support 'CARRIAGE RETURN', 'VERTICAL TAB', 'WORD', 'NO WORD' expressions
@@ -8,7 +17,7 @@ SrlRuby passes 12 tests out of 15 standard SRL tests in total.
8
17
 
9
18
 
10
19
  ## [0.2.4] - 2018-04-02
11
- SrlRuby passes 10 tests out of 15 standard SRL tests in total.
20
+ SrlRuby passes 10 tests out of 15 from standard SRL test suite.
12
21
  ### Changed
13
22
  - File `lib/srl_ruby/grammar.rb` grammar refactoring. Added support for new 'no digit' SRL expression.
14
23
  - Class `SrlRuby::ASTBuilder` updates to reflect changes in the grammar.
@@ -27,7 +36,7 @@ SrlRuby passes 10 tests out of 15 standard SRL tests in total.
27
36
  - Fixed rley version dependency
28
37
 
29
38
  ## [0.2.1] - 2018-03-15
30
- SrlRuby passes 7 tests out of 15 standard SRL tests in total.
39
+ SrlRuby passes 7 tests out of 15 from standard SRL test suite.
31
40
  ### Changed
32
41
  - File `acceptance/srl_test_suite_spec.rb`. More examples in spec file.
33
42
  - File `ast_builder.rb` updates to reflect grammar changes.
@@ -39,7 +48,7 @@ SrlRuby passes 7 tests out of 15 standard SRL tests in total.
39
48
  - File `ast_builder.rb` fixed anchor implementation.
40
49
 
41
50
  ## [0.2.0] - 2018-03-14
42
- SrlRuby passes 3 standard out of 15 standard SRL tests in total.
51
+ SrlRuby passes 3 tests out of 15 from standard SRL test suite.
43
52
  ### Added
44
53
  - Added `spec/acceptance/support` directory. It contains test harness to use the .rule files from standard SRL test suite.
45
54
  - Added `acceptance/srl_test_suite_spec.rb`file. Spec file designed to standard SRL test suite. At this date, SrlRuby passes 3 tests out of 15 tests in total.
@@ -0,0 +1,23 @@
1
+ require_relative 'atomic_expression' # Access the superclass
2
+
3
+ module Regex # This module is used as a namespace
4
+ # A raw expression is a string that will be copied verbatim (as is)
5
+ # in the generated regular expression.
6
+ class RawExpression < AtomicExpression
7
+ attr_reader :raw
8
+
9
+ # Constructor
10
+ def initialize(rawLiteral)
11
+ super()
12
+ @raw = rawLiteral
13
+ end
14
+
15
+ protected
16
+
17
+ # Conversion method re-definition.
18
+ # Purpose: Return the String representation of the expression.
19
+ alias_method :text_repr, :raw
20
+ end # class
21
+ end # module
22
+
23
+ # End of file
@@ -272,15 +272,24 @@ module SrlRuby
272
272
  return Regex::CharClass.new(false, *alternatives)
273
273
  end
274
274
 
275
+ # rule('character_class' => %w[NONE OF STRING_LIT]).as 'none_of'
276
+ def reduce_none_of(_production, _range, _tokens, theChildren)
277
+ raw_literal = theChildren[-1].token.lexeme.dup
278
+ chars = raw_literal.chars.map do |ch|
279
+ Regex::Character.new(ch)
280
+ end
281
+ Regex::CharClass.new(true, *chars)
282
+ end
283
+
275
284
  # rule('special_char' => 'TAB').as 'tab'
276
285
  def reduce_tab(_production, _range, _tokens, _children)
277
286
  Regex::Character.new('\t')
278
287
  end
279
-
288
+
280
289
  # rule('special_char' => ' VERTICAL TAB').as 'vtab'
281
290
  def reduce_vtab(_production, _range, _tokens, _children)
282
291
  Regex::Character.new('\v')
283
- end
292
+ end
284
293
 
285
294
  # rule('special_char' => 'BACKSLASH').as 'backslash'
286
295
  def reduce_backslash(_production, _range, _tokens, _children)
@@ -293,7 +302,7 @@ module SrlRuby
293
302
  # TODO: control portability
294
303
  Regex::Character.new('\n')
295
304
  end
296
-
305
+
297
306
  # rule('special_char' => %w[CARRIAGE RETURN]).as 'carriage_return'
298
307
  def reduce_carriage_return(_production, _range, _tokens, _children)
299
308
  Regex::Character.new('\r')
@@ -303,11 +312,11 @@ module SrlRuby
303
312
  def reduce_word(_production, _range, _tokens, _children)
304
313
  Regex::Anchor.new('\b')
305
314
  end
306
-
315
+
307
316
  # rule('special_char' => %w[NO WORD]).as 'no word'
308
317
  def reduce_no_word(_production, _range, _tokens, _children)
309
318
  Regex::Anchor.new('\B')
310
- end
319
+ end
311
320
 
312
321
  # rule('literal' => %w[LITERALLY STRING_LIT]).as 'literally'
313
322
  def reduce_literally(_production, _range, _tokens, theChildren)
@@ -317,6 +326,12 @@ module SrlRuby
317
326
  return string_literal(raw_literal)
318
327
  end
319
328
 
329
+ # rule('raw' => %w[RAW STRING_LIT]).as 'raw_literal'
330
+ def reduce_raw_literal(_production, _range, _tokens, theChildren)
331
+ raw_literal = theChildren[-1].token.lexeme.dup
332
+ return Regex::RawExpression.new(raw_literal)
333
+ end
334
+
320
335
  # rule('alternation' => %w[ANY OF LPAREN alternatives RPAREN]).as 'any_of'
321
336
  def reduce_any_of(_production, _range, _tokens, theChildren)
322
337
  first_alternative = theChildren[3].first
@@ -4,17 +4,22 @@ module SrlRuby
4
4
  ########################################
5
5
  # SRL grammar
6
6
  builder = Rley::Syntax::GrammarBuilder.new do
7
+ # Separators...
7
8
  add_terminals('LPAREN', 'RPAREN', 'COMMA')
8
- add_terminals('DIGIT_LIT', 'INTEGER', 'LETTER_LIT')
9
+
10
+ # Literal values...
11
+ add_terminals('DIGIT_LIT', 'INTEGER', 'LETTER_LIT', 'CHAR_CLASS')
9
12
  add_terminals('LITERALLY', 'STRING_LIT', 'IDENTIFIER')
13
+
14
+ # Keywords...
10
15
  add_terminals('BEGIN', 'STARTS', 'WITH')
11
- add_terminals('MUST', 'END')
16
+ add_terminals('MUST', 'END', 'RAW')
12
17
  add_terminals('UPPERCASE', 'LETTER', 'FROM', 'TO')
13
- add_terminals('DIGIT', 'NUMBER', 'ANY', 'NO')
18
+ add_terminals('DIGIT', 'NUMBER', 'ANY', 'EITHER', 'NO')
14
19
  add_terminals('CHARACTER', 'WHITESPACE', 'ANYTHING')
15
20
  add_terminals('TAB', 'BACKSLASH', 'NEW', 'LINE', 'WORD')
16
- add_terminals('CARRIAGE', 'RETURN', 'VERTICAL', 'OF', 'ONE')
17
- add_terminals('EXACTLY', 'TIMES', 'ONCE', 'TWICE')
21
+ add_terminals('CARRIAGE', 'RETURN', 'VERTICAL', 'OF', 'NONE')
22
+ add_terminals('ONE', 'EXACTLY', 'TIMES', 'ONCE', 'TWICE')
18
23
  add_terminals('BETWEEN', 'AND', 'OPTIONAL', 'OR')
19
24
  add_terminals('MORE', 'NEVER', 'AT', 'LEAST')
20
25
  add_terminals('IF', 'FOLLOWED', 'BY', 'NOT')
@@ -23,6 +28,7 @@ module SrlRuby
23
28
  add_terminals('CASE', 'INSENSITIVE', 'MULTI', 'ALL')
24
29
  add_terminals('LAZY')
25
30
 
31
+ # Grammar rules...
26
32
  rule('srl' => 'expression').as 'start_rule'
27
33
  rule('expression' => %w[pattern flags]).as 'flagged_expr'
28
34
  rule('expression' => 'pattern').as 'simple_expr'
@@ -60,6 +66,7 @@ module SrlRuby
60
66
  rule('atom' => 'character_class').as 'character_class_atom'
61
67
  rule('atom' => 'special_char').as 'special_char_atom'
62
68
  rule('atom' => 'literal').as 'literal_atom'
69
+ rule('atom' => 'raw').as 'raw_atom'
63
70
  rule('letter_range' => %w[LETTER FROM LETTER_LIT TO LETTER_LIT]).as 'lowercase_from_to'
64
71
  rule('letter_range' => %w[UPPERCASE LETTER FROM LETTER_LIT TO LETTER_LIT]).as 'uppercase_from_to'
65
72
  rule('letter_range' => 'LETTER').as 'any_lowercase'
@@ -72,7 +79,12 @@ module SrlRuby
72
79
  rule('character_class' => 'WHITESPACE').as 'whitespace'
73
80
  rule('character_class' => %w[NO WHITESPACE]).as 'no_whitespace'
74
81
  rule('character_class' => 'ANYTHING').as 'anything'
75
- rule('character_class' => %w[ONE OF STRING_LIT]).as 'one_of'
82
+ rule('character_class' => %w[ONE OF cclass]).as 'one_of'
83
+ rule('character_class' => %w[NONE OF cclass]).as 'none_of'
84
+ rule('cclass' => 'STRING_LIT').as 'quoted_cclass' # Preferred syntax
85
+ rule('cclass' => 'INTEGER').as 'digits_cclass'
86
+ rule('cclass' => 'IDENTIFIER').as 'identifier_cclass'
87
+ rule('cclass' => 'CHAR_CLASS').as 'unquoted_cclass'
76
88
  rule('special_char' => 'TAB').as 'tab'
77
89
  rule('special_char' => 'VERTICAL TAB').as 'vtab'
78
90
  rule('special_char' => 'BACKSLASH').as 'backslash'
@@ -81,9 +93,12 @@ module SrlRuby
81
93
  rule('special_char' => %w[WORD]).as 'word'
82
94
  rule('special_char' => %w[NO WORD]).as 'no_word'
83
95
  rule('literal' => %w[LITERALLY STRING_LIT]).as 'literally'
84
- rule('alternation' => %w[ANY OF LPAREN alternatives RPAREN]).as 'any_of'
96
+ rule('raw' => 'RAW STRING_LIT').as 'raw_literal'
97
+ rule('alternation' => %w[any_or_either OF LPAREN alternatives RPAREN]).as 'any_of'
85
98
  rule('alternatives' => %w[alternatives separator quantifiable]).as 'alternative_list'
86
99
  rule('alternatives' => 'quantifiable').as 'simple_alternative'
100
+ rule('any_or_either' => 'ANY').as 'any_keyword'
101
+ rule('any_or_either' => 'EITHER').as 'either_keyword'
87
102
  rule('grouping' => %w[LPAREN pattern RPAREN]).as 'grouping_parenthenses'
88
103
  rule('capturing_group' => %w[CAPTURE assertable]).as 'capture'
89
104
  rule('capturing_group' => %w[CAPTURE assertable UNTIL assertable]).as 'capture_until'
@@ -11,4 +11,5 @@ require_relative '../regex/non_capturing_group'
11
11
  require_relative '../regex/anchor'
12
12
  require_relative '../regex/lookaround'
13
13
  require_relative '../regex/capturing_group'
14
- require_relative '../regex/match_option'
14
+ require_relative '../regex/match_option'
15
+ require_relative '../regex/raw_expression'
@@ -42,6 +42,7 @@ module SrlRuby
42
42
  CASE
43
43
  CHARACTER
44
44
  DIGIT
45
+ EITHER
45
46
  END
46
47
  EXACTLY
47
48
  FOLLOWED
@@ -60,6 +61,7 @@ module SrlRuby
60
61
  NEVER
61
62
  NEW
62
63
  NO
64
+ NONE
63
65
  NOT
64
66
  NUMBER
65
67
  OF
@@ -67,6 +69,7 @@ module SrlRuby
67
69
  ONE
68
70
  OPTIONAL
69
71
  OR
72
+ RAW
70
73
  RETURN
71
74
  STARTS
72
75
  TAB
@@ -111,22 +114,24 @@ module SrlRuby
111
114
  if '(),'.include? curr_ch
112
115
  # Delimiters, separators => single character token
113
116
  token = build_token(@@lexeme2name[curr_ch], scanner.getch)
114
- elsif (lexeme = scanner.scan(/[0-9]{2,}/))
117
+ elsif (lexeme = scanner.scan(/[0-9]{2,}((?=\s)|$)/))
115
118
  token = build_token('INTEGER', lexeme) # An integer has 2..* digits
116
- elsif (lexeme = scanner.scan(/[0-9]/))
119
+ elsif (lexeme = scanner.scan(/[0-9]((?=\s)|$)/))
117
120
  token = build_token('DIGIT_LIT', lexeme)
118
- elsif (lexeme = scanner.scan(/[a-zA-Z_][a-zA-Z0-9_]+/))
119
- keyw = @@keywords[lexeme.upcase]
120
- tok_type = keyw ? keyw : 'IDENTIFIER'
121
- token = build_token(tok_type, lexeme)
122
- elsif (lexeme = scanner.scan(/[a-zA-Z]((?=\s)|$)/))
123
- token = build_token('LETTER_LIT', lexeme)
124
121
  elsif (lexeme = scanner.scan(/"(?:\\"|[^"])*"/)) # Double quotes literal?
125
122
  unquoted = lexeme.gsub(/(^")|("$)/, '')
126
123
  token = build_token('STRING_LIT', unquoted)
127
124
  elsif (lexeme = scanner.scan(/'(?:\\'|[^'])*'/)) # Single quotes literal?
128
125
  unquoted = lexeme.gsub(/(^')|('$)/, '')
129
126
  token = build_token('STRING_LIT', unquoted)
127
+ elsif (lexeme = scanner.scan(/[a-zA-Z]((?=\s)|$)/))
128
+ token = build_token('LETTER_LIT', lexeme)
129
+ elsif (lexeme = scanner.scan(/[a-zA-Z_][a-zA-Z0-9_]+/))
130
+ keyw = @@keywords[lexeme.upcase]
131
+ tok_type = keyw ? keyw : 'IDENTIFIER'
132
+ token = build_token(tok_type, lexeme)
133
+ elsif (lexeme = scanner.scan(/[^,"\s]{2,}/))
134
+ token = build_token('CHAR_CLASS', lexeme)
130
135
  else # Unknown token
131
136
  erroneous = curr_ch.nil? ? '' : curr_ch
132
137
  sequel = scanner.scan(/.{1,20}/)
@@ -1,3 +1,3 @@
1
1
  module SrlRuby
2
- VERSION = '0.2.5'.freeze
2
+ VERSION = '0.2.6'.freeze
3
3
  end
@@ -71,6 +71,11 @@ RSpec.describe Acceptance do
71
71
  rule_file_repr = load_file('nondigit.rule')
72
72
  test_rule_file(rule_file_repr)
73
73
  end
74
+
75
+ it 'should support negative character class' do
76
+ rule_file_repr = load_file('none_of.rule')
77
+ test_rule_file(rule_file_repr)
78
+ end
74
79
 
75
80
  it 'should match a tab' do
76
81
  rule_file_repr = load_file('tab.rule')
@@ -44,6 +44,11 @@ describe SrlRuby do
44
44
  regexp = SrlRuby.parse("literally '.'")
45
45
  expect(regexp.source).to eq('\.')
46
46
  end
47
+
48
+ it 'should parse single quotes literal string' do
49
+ regexp = SrlRuby.parse('literally "an", whitespace, raw "[a-zA-Z]"')
50
+ expect(regexp.source).to eq('an\s[a-zA-Z]')
51
+ end
47
52
  end # context
48
53
 
49
54
  context 'Parsing character classes:' do
@@ -93,6 +98,41 @@ describe SrlRuby do
93
98
  # (escapes more characters than required)
94
99
  expect(regexp.source).to eq('[._%+\-]')
95
100
  end
101
+
102
+ it "should parse 'one of' with unquoted character class syntax" do
103
+ # Case of digit sequence
104
+ regexp = SrlRuby.parse('one of 13579, must end')
105
+ expect(regexp.source).to eq('[13579]$')
106
+
107
+ # Case of identifier-like character class
108
+ regexp = SrlRuby.parse('one of abcd, must end')
109
+ expect(regexp.source).to eq('[abcd]$')
110
+
111
+ # Case of arbitrary character class
112
+ regexp = SrlRuby.parse('one of 12hms:, must end')
113
+ expect(regexp.source).to eq('[12hms:]$')
114
+ end
115
+
116
+ it "should parse 'none of' syntax" do
117
+ regexp = SrlRuby.parse('none of "._%+-"')
118
+ # Remark: reference implementation less readable
119
+ # (escapes more characters than required)
120
+ expect(regexp.source).to eq('[^._%+\-]')
121
+ end
122
+
123
+ it "should parse 'none of' with unquoted character class syntax" do
124
+ # Case of digit sequence
125
+ regexp = SrlRuby.parse('none of 13579, must end')
126
+ expect(regexp.source).to eq('[^13579]$')
127
+
128
+ # Case of identifier-like character class
129
+ regexp = SrlRuby.parse('none of abcd, must end')
130
+ expect(regexp.source).to eq('[^abcd]$')
131
+
132
+ # Case of arbitrary character class
133
+ regexp = SrlRuby.parse('none of 12hms:^, must end')
134
+ expect(regexp.source).to eq('[^12hms:\^]$')
135
+ end
96
136
  end # context
97
137
 
98
138
  context 'Parsing special character declarations:' do
@@ -137,6 +177,12 @@ describe SrlRuby do
137
177
  source = 'any of (any character, one of "._%-+")'
138
178
  regexp = SrlRuby.parse(source)
139
179
  expect(regexp.source).to eq('(?:\w|[._%\-+])')
180
+ end
181
+
182
+ it "should parse 'either of' syntax" do
183
+ source = 'either of (any character, one of "._%-+")'
184
+ regexp = SrlRuby.parse(source)
185
+ expect(regexp.source).to eq('(?:\w|[._%\-+])')
140
186
  end
141
187
 
142
188
  it 'should anchor as alternative' do
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: srl_ruby
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.5
4
+ version: 0.2.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dimitri Geshef
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-04-02 00:00:00.000000000 Z
11
+ date: 2018-04-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rley
@@ -104,6 +104,7 @@ files:
104
104
  - lib/regex/non_capturing_group.rb
105
105
  - lib/regex/polyadic_expression.rb
106
106
  - lib/regex/quantifiable.rb
107
+ - lib/regex/raw_expression.rb
107
108
  - lib/regex/repetition.rb
108
109
  - lib/regex/wildcard.rb
109
110
  - lib/srl_ruby.rb