rley 0.6.01 → 0.6.02

Sign up to get free protection for your applications and to get access to all the features.
Files changed (36) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +5 -1
  3. data/examples/NLP/engtagger.rb +58 -60
  4. data/lib/rley/constants.rb +1 -1
  5. metadata +2 -33
  6. data/examples/general/SRL/lib/ast_builder.rb +0 -382
  7. data/examples/general/SRL/lib/grammar.rb +0 -106
  8. data/examples/general/SRL/lib/regex/abstract_method.rb +0 -35
  9. data/examples/general/SRL/lib/regex/alternation.rb +0 -27
  10. data/examples/general/SRL/lib/regex/anchor.rb +0 -45
  11. data/examples/general/SRL/lib/regex/atomic_expression.rb +0 -16
  12. data/examples/general/SRL/lib/regex/capturing_group.rb +0 -51
  13. data/examples/general/SRL/lib/regex/char_class.rb +0 -38
  14. data/examples/general/SRL/lib/regex/char_range.rb +0 -51
  15. data/examples/general/SRL/lib/regex/char_shorthand.rb +0 -50
  16. data/examples/general/SRL/lib/regex/character.rb +0 -204
  17. data/examples/general/SRL/lib/regex/compound_expression.rb +0 -57
  18. data/examples/general/SRL/lib/regex/concatenation.rb +0 -29
  19. data/examples/general/SRL/lib/regex/expression.rb +0 -60
  20. data/examples/general/SRL/lib/regex/lookaround.rb +0 -50
  21. data/examples/general/SRL/lib/regex/match_option.rb +0 -34
  22. data/examples/general/SRL/lib/regex/monadic_expression.rb +0 -28
  23. data/examples/general/SRL/lib/regex/multiplicity.rb +0 -91
  24. data/examples/general/SRL/lib/regex/non_capturing_group.rb +0 -27
  25. data/examples/general/SRL/lib/regex/polyadic_expression.rb +0 -60
  26. data/examples/general/SRL/lib/regex/quantifiable.rb +0 -22
  27. data/examples/general/SRL/lib/regex/repetition.rb +0 -29
  28. data/examples/general/SRL/lib/regex/wildcard.rb +0 -23
  29. data/examples/general/SRL/lib/regex_repr.rb +0 -13
  30. data/examples/general/SRL/lib/tokenizer.rb +0 -147
  31. data/examples/general/SRL/spec/integration_spec.rb +0 -448
  32. data/examples/general/SRL/spec/regex/character_spec.rb +0 -166
  33. data/examples/general/SRL/spec/regex/multiplicity_spec.rb +0 -79
  34. data/examples/general/SRL/spec/spec_helper.rb +0 -25
  35. data/examples/general/SRL/spec/tokenizer_spec.rb +0 -148
  36. data/examples/general/SRL/srl_demo.rb +0 -75
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: e5431e3dddd23b1b29be7db8b2b5fe268301f16f
4
- data.tar.gz: 929f4f15a4d996cf0e14ddf903b15a5c5325fb93
3
+ metadata.gz: 9764b3e89d5395584b93ca2a06d80a3381d58560
4
+ data.tar.gz: 91eb805167c77da1d86cf23e04382d9c791f47c5
5
5
  SHA512:
6
- metadata.gz: 3cda86bffff68e3591e89cbd05a669a59a7ee111dc75cd29fa56b0968798a981210ba3d3565bf5ed8435c2070b6ac9cb1efd35290ea10b60eee1ad3bb7d3caa7
7
- data.tar.gz: c49cbc2d61484807e23cee6801057cf7618e9a0f18e85a80082acb8c08bad9012067c895ad581b7b0cdc3d3ac02699f45db42d525ce8b62e4803023d9a5cbca0
6
+ metadata.gz: 9dc181140698cab328974cbbca977ccbd0de1afc5eb0af628f2c7517fbb1ad93af058341e6146542540adcd7ea7268dd3a234131b3b046838c6a26e2cf05146e
7
+ data.tar.gz: 90e957d8e37e957b50c769de6661c3ce5f5ad9d6c48beb5c7a8e7487622568273f715c5bacdbbd7ed382f410c6cd9b420218ac447e92fc279fd901b6e0a2baea
data/CHANGELOG.md CHANGED
@@ -1,4 +1,8 @@
1
- ### 0.6.01 / 2018-02-25
1
+ ### 0.6.02 / 2018-03-03
2
+ * [FIX] File `examples/general/NLP/engtagger.rb` ode re-styling to remove most style offenses found by Rubocop 0.52.1
3
+ * [DELETE] All files `examples/general/SRL` are removed. It will become a new gem by itself.
4
+
5
+ ### 0.6.01 / 2018-03-03
2
6
  * [FIX] Code re-styling to remove most style offenses found by Rubocop 0.52.1
3
7
 
4
8
  ### 0.6.00 / 2018-02-25
@@ -1,6 +1,5 @@
1
- require "rley"
2
- require "engtagger"
3
- require "pp"
1
+ require 'rley'
2
+ require 'engtagger' # Load POS (Part-Of-Speech) tagger EngTagger
4
3
 
5
4
  # REGEX to remove XML tags from Engtagger output
6
5
  GET_TAG = /<(.+?)>(.*?)<.+?>/
@@ -23,7 +22,7 @@ end
23
22
  def valid_text(text)
24
23
  if !text
25
24
  # there's nothing to parse
26
- "method call on uninitialized variable" if @conf[:debug]
25
+ puts 'method call on uninitialized variable'
27
26
  return false
28
27
  elsif /\A\s*\z/ =~ text
29
28
  # text is an empty string, nothing to parse
@@ -36,26 +35,26 @@ end
36
35
 
37
36
  def split_sentences(array)
38
37
  tokenized = array
39
- people = %w(jr mr ms mrs dr prof esq sr sen sens rep reps gov attys attys
40
- supt det mssrs rev)
41
- army = %w(col gen lt cmdr adm capt sgt cpl maj brig)
42
- inst = %w(dept univ assn bros ph.d)
43
- place = %w(arc al ave blvd bld cl ct cres exp expy dist mt mtn ft fy fwy
44
- hwy hway la pde pd plz pl rd st tce)
45
- comp = %w(mfg inc ltd co corp)
46
- state = %w(ala ariz ark cal calif colo col conn del fed fla ga ida id ill
38
+ people = %w[jr mr ms mrs dr prof esq sr sen sens rep reps gov attys attys
39
+ supt det mssrs rev]
40
+ army = %w[col gen lt cmdr adm capt sgt cpl maj brig]
41
+ inst = %w[dept univ assn bros ph.d]
42
+ place = %w[arc al ave blvd bld cl ct cres exp expy dist mt mtn ft fy fwy
43
+ hwy hway la pde pd plz pl rd st tce]
44
+ comp = %w[mfg inc ltd co corp]
45
+ state = %w[ala ariz ark cal calif colo col conn del fed fla ga ida id ill
47
46
  ind ia kans kan ken ky la me md is mass mich minn miss mo mont
48
47
  neb nebr nev mex okla ok ore penna penn pa dak tenn tex ut vt
49
- va wash wis wisc wy wyo usafa alta man ont que sask yuk)
50
- month = %w(jan feb mar apr may jun jul aug sep sept oct nov dec)
51
- misc = %w(vs etc no esp)
48
+ va wash wis wisc wy wyo usafa alta man ont que sask yuk]
49
+ month = %w[jan feb mar apr may jun jul aug sep sept oct nov dec]
50
+ misc = %w[vs etc no esp]
52
51
  abbr = Hash.new
53
52
  [people, army, inst, place, comp, state, month, misc].flatten.each do |i|
54
53
  abbr[i] = true
55
54
  end
56
55
  words = Array.new
57
- tokenized.each_with_index do |t, i|
58
- if tokenized[i + 1] and tokenized [i + 1] =~ /[A-Z\W]/ and tokenized[i] =~ /\A(.+)\.\z/
56
+ tokenized.each_with_index do |_t, i|
57
+ if tokenized[i + 1] && tokenized [i + 1] =~ /[A-Z\W]/ && tokenized[i] =~ /\A(.+)\.\z/
59
58
  w = $1
60
59
  # Don't separate the period off words that
61
60
  # meet any of the following conditions:
@@ -63,7 +62,7 @@ def split_sentences(array)
63
62
  # 1. It is defined in one of the lists above
64
63
  # 2. It is only one letter long: Alfred E. Sloan
65
64
  # 3. It has a repeating letter-dot: U.S.A. or J.C. Penney
66
- unless abbr[w.downcase] or w =~ /\A[a-z]\z/i or w =~ /[a-z](?:\.[a-z])+\z/i
65
+ unless abbr[w.downcase] || w =~ /\A[a-z]\z/i || w =~ /[a-z](?:\.[a-z])+\z/i
67
66
  words << w
68
67
  words << '.'
69
68
  next
@@ -72,7 +71,7 @@ def split_sentences(array)
72
71
  words << tokenized[i]
73
72
  end
74
73
  # If the final word ends in a period..
75
- if words[-1] and words[-1] =~ /\A(.*\w)\.\z/
74
+ if words[-1] && words[-1] =~ /\A(.*\w)\.\z/
76
75
  words[-1] = $1
77
76
  words.push '.'
78
77
  end
@@ -85,39 +84,42 @@ def split_punct(text)
85
84
  # If there's no punctuation, return immediately
86
85
  return [text] if /\A\w+\z/ =~ text
87
86
  # Sanity checks
88
- text = text.gsub(/\W{10,}/o, " ")
87
+ text = text.gsub(/\W{10,}/o, ' ')
89
88
 
90
89
  # Put quotes into a standard format
91
- text = text.gsub(/`(?!`)(?=.*\w)/o, "` ") # Shift left quotes off text
92
- text = text.gsub(/"(?=.*\w)/o, " `` ") # Convert left quotes to ``
93
- text = text.gsub(/(\W|^)'(?=.*\w)/o){$1 ? $1 + " ` " : " ` "} # Convert left quotes to `
90
+ text = text.gsub(/`(?!`)(?=.*\w)/o, '` ') # Shift left quotes off text
91
+ text = text.gsub(/"(?=.*\w)/o, ' `` ') # Convert left quotes to ``
92
+ text = text.gsub(/(\W|^)'(?=.*\w)/o) { $1 ? $1 + ' ` ' : ' ` ' } # Convert left quote to `
94
93
  text = text.gsub(/"/, " '' ") # Convert (remaining) quotes to ''
95
- text = text.gsub(/(\w)'(?!')(?=\W|$)/o){$1 + " ' "} # Separate right single quotes
94
+ text = text.gsub(/(\w)'(?!')(?=\W|$)/o, "\\1 ' ") # Separate right single quotes
96
95
 
97
96
  # Handle all other punctuation
98
- text = text.gsub(/--+/o, " - ") # Convert and separate dashes
99
- text = text.gsub(/,(?!\d)/o, " , ") # Shift commas off everything but numbers
100
- text = text.gsub(/:/o, " :") # Shift semicolons off
101
- text = text.gsub(/(\.\.\.+)/o){" " + $1 + " "} # Shift ellipses off
102
- text = text.gsub(/([\(\[\{\}\]\)])/o){" " + $1 + " "} # Shift off brackets
103
- text = text.gsub(/([\!\?#\$%;~|])/o){" " + $1 + " "} # Shift off other ``standard'' punctuation
97
+ text = text.gsub(/--+/o, ' - ') # Convert and separate dashes
98
+ text = text.gsub(/,(?!\d)/o, ' , ') # Shift comma if not following by digit
99
+ text = text.gsub(/:/o, ' :') # Shift semicolon off
100
+ text = text.gsub(/(\.\.\.+)/o, ' \1 ') # Shift ellipses off
101
+ text = text.gsub(/([\(\[\{\}\]\)])/o, ' \1 ') # Shift off brackets
102
+ text = text.gsub(/([\!\?#\$%;~|])/o, ' \1 ') # Shift off other ``standard'' punctuation
104
103
 
105
104
  # English-specific contractions
106
- text = text.gsub(/([A-Za-z])'([dms])\b/o){$1 + " '" + $2} # Separate off 'd 'm 's
107
- text = text.gsub(/n't\b/o, " n't") # Separate off n't
108
- text = text.gsub(/'(ve|ll|re)\b/o){" '" + $1} # Separate off 've, 'll, 're
105
+ text = text.gsub(/([A-Za-z])'([dms])\b/o, "\\1 '\\2") # Separate off 'd 'm 's
106
+ text = text.gsub(/n't\b/o, " n't") # Separate off n't
107
+ text = text.gsub(/'(ve|ll|re)\b/o, " '\\1") # Separate off 've, 'll, 're
109
108
  result = text.split(' ')
110
109
  return result
111
110
  end
112
111
 
113
112
 
114
- # Instantiate a builder object that will build the grammar for us
115
- builder = Rley::Syntax::GrammarBuilder.new do
113
+ # Instantiate a facade object as our Rley interface
114
+ nlp_engine = Rley::Engine.new
116
115
 
117
- add_terminals('NN', 'NNP')
116
+ # Now build a very simplified English grammar...
117
+ nlp_engine.build_grammar do
118
+ # Terminals have same names as POS tags returned by Engtagger
119
+ add_terminals('NN', 'NNP')
118
120
  add_terminals('DET', 'IN', 'VBD')
119
121
 
120
- # Here we define the productions (= grammar rules)
122
+ # Here we define the productions (= grammar rules)
121
123
  rule 'S' => %w[NP VP]
122
124
  rule 'NP' => 'NNP'
123
125
  rule 'NP' => %w[DET NN]
@@ -125,52 +127,48 @@ builder = Rley::Syntax::GrammarBuilder.new do
125
127
  rule 'VP' => %w[VBD NP]
126
128
  rule 'VP' => %w[VBD NP PP]
127
129
  rule 'PP' => %w[IN NP]
128
- end
129
-
130
- # And now, let's build the grammar...
131
- grammar = builder.grammar
132
-
133
- parser = Rley::Parser::GFGEarleyParser.new(grammar)
130
+ end
134
131
 
135
132
  # text = "Yo I'm not done with you"
136
- text= "John saw Mary with a telescope"
137
- pp "Input text --> #{text}"
133
+ text = 'John saw Mary with a telescope'
134
+ puts "Input text --> #{text}"
138
135
 
139
136
  tgr = EngTagger.new
140
137
 
141
- # Generte POS
138
+ # Generate raw POS output
142
139
  tagged = tgr.add_tags(text)
143
140
 
144
141
  # Generte tokenied lexicon of input text
145
- # Instead of creating a lexicon dictionary, we would simply generate one each time on the fly for the current text only.
142
+ # Instead of creating a lexicon dictionary,
143
+ # we would simply generate one each time on the fly for the current text only.
146
144
  lexicon = clean_text(text)
147
145
 
148
- # Generte POS tokens in [[word, pos], ..] format
146
+ # Convert EngTagger POS tokens in [[word, pos], ..] format
149
147
  tokens = tagged.scan(GET_TAG).map { |tag, word| [word, tag.upcase] }
150
148
 
151
- def tokenizer(lexicon, grammar, tokens)
149
+ def tokenizer(lexicon, tokens)
152
150
  rley_tokens = []
153
- lexicon.each_with_index do |word, i|
151
+ lexicon.each_with_index do |word, i|
154
152
  term_name = tokens[i].last
155
- terminal = grammar.name2symbol[term_name]
156
- rley_tokens << Rley::Lexical::Token.new(word, terminal)
153
+ rley_tokens << Rley::Lexical::Token.new(word, term_name)
157
154
  end
158
155
  return rley_tokens
159
156
  end
160
157
 
161
158
  # Convert input text into a sequence of rley token objects...
162
- rley_tokens = tokenizer(lexicon, grammar, tokens)
159
+ rley_tokens = tokenizer(lexicon, tokens)
163
160
 
164
- result = parser.parse(rley_tokens)
161
+ # Let Rley grok the tokens
162
+ result = nlp_engine.parse(rley_tokens)
165
163
 
166
- pp "Parsing successful? #{result.success?}" # => Parsing successful? true
167
- pp result.failure_reason.message unless result.success?
164
+ puts "Parsing successful? #{result.success?}" # => Parsing successful? true
165
+ puts result.failure_reason.message unless result.success?
168
166
 
169
- ptree = result.parse_tree
167
+ ptree = nlp_engine.convert(result)
170
168
 
171
- visitor = Rley::ParseTreeVisitor.new(ptree)
169
+ visitor = nlp_engine.ptree_visitor(ptree)
172
170
 
173
171
  renderer = Rley::Formatter::Asciitree.new($stdout)
174
172
 
175
- # Subscribe the formatter to the visitor's event and launch the visit
176
- pp renderer.render(visitor)
173
+ # Let's visualize the parse tree (in text format...)
174
+ puts renderer.render(visitor)
@@ -3,7 +3,7 @@
3
3
 
4
4
  module Rley # Module used as a namespace
5
5
  # The version number of the gem.
6
- Version = '0.6.01'.freeze
6
+ Version = '0.6.02'.freeze
7
7
 
8
8
  # Brief description of the gem.
9
9
  Description = "Ruby implementation of the Earley's parsing algorithm".freeze
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rley
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.01
4
+ version: 0.6.02
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dimitri Geshef
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-03-02 00:00:00.000000000 Z
11
+ date: 2018-03-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: coveralls
@@ -142,37 +142,6 @@ files:
142
142
  - examples/data_formats/JSON/json_grammar.rb
143
143
  - examples/data_formats/JSON/json_lexer.rb
144
144
  - examples/data_formats/JSON/json_minifier.rb
145
- - examples/general/SRL/lib/ast_builder.rb
146
- - examples/general/SRL/lib/grammar.rb
147
- - examples/general/SRL/lib/regex/abstract_method.rb
148
- - examples/general/SRL/lib/regex/alternation.rb
149
- - examples/general/SRL/lib/regex/anchor.rb
150
- - examples/general/SRL/lib/regex/atomic_expression.rb
151
- - examples/general/SRL/lib/regex/capturing_group.rb
152
- - examples/general/SRL/lib/regex/char_class.rb
153
- - examples/general/SRL/lib/regex/char_range.rb
154
- - examples/general/SRL/lib/regex/char_shorthand.rb
155
- - examples/general/SRL/lib/regex/character.rb
156
- - examples/general/SRL/lib/regex/compound_expression.rb
157
- - examples/general/SRL/lib/regex/concatenation.rb
158
- - examples/general/SRL/lib/regex/expression.rb
159
- - examples/general/SRL/lib/regex/lookaround.rb
160
- - examples/general/SRL/lib/regex/match_option.rb
161
- - examples/general/SRL/lib/regex/monadic_expression.rb
162
- - examples/general/SRL/lib/regex/multiplicity.rb
163
- - examples/general/SRL/lib/regex/non_capturing_group.rb
164
- - examples/general/SRL/lib/regex/polyadic_expression.rb
165
- - examples/general/SRL/lib/regex/quantifiable.rb
166
- - examples/general/SRL/lib/regex/repetition.rb
167
- - examples/general/SRL/lib/regex/wildcard.rb
168
- - examples/general/SRL/lib/regex_repr.rb
169
- - examples/general/SRL/lib/tokenizer.rb
170
- - examples/general/SRL/spec/integration_spec.rb
171
- - examples/general/SRL/spec/regex/character_spec.rb
172
- - examples/general/SRL/spec/regex/multiplicity_spec.rb
173
- - examples/general/SRL/spec/spec_helper.rb
174
- - examples/general/SRL/spec/tokenizer_spec.rb
175
- - examples/general/SRL/srl_demo.rb
176
145
  - examples/general/calc_iter1/calc_ast_builder.rb
177
146
  - examples/general/calc_iter1/calc_ast_nodes.rb
178
147
  - examples/general/calc_iter1/calc_demo.rb
@@ -1,382 +0,0 @@
1
- require 'stringio'
2
- require_relative 'regex_repr'
3
-
4
- # The purpose of a ASTBuilder is to build piece by piece an AST
5
- # (Abstract Syntax Tree) from a sequence of input tokens and
6
- # visit events produced by walking over a GFGParsing object.
7
- # Uses the Builder GoF pattern.
8
- # The Builder pattern creates a complex object
9
- # (say, a parse tree) from simpler objects (terminal and non-terminal
10
- # nodes) and using a step by step approach.
11
- class ASTBuilder < Rley::ParseRep::ASTBaseBuilder
12
- Terminal2NodeClass = {}.freeze
13
-
14
- attr_reader :options
15
-
16
- protected
17
-
18
- def terminal2node()
19
- Terminal2NodeClass
20
- end
21
-
22
- # Overriding method.
23
- # Factory method for creating a node object for the given
24
- # input token.
25
- # @param aTerminal [Terminal] Terminal symbol associated with the token
26
- # @param aTokenPosition [Integer] Position of token in the input stream
27
- # @param aToken [Token] The input token
28
- def new_leaf_node(_production, _terminal, aTokenPosition, aToken)
29
- node = Rley::PTree::TerminalNode.new(aToken, aTokenPosition)
30
-
31
- return node
32
- end
33
-
34
- def multiplicity(lowerBound, upperBound)
35
- return SRL::Regex::Multiplicity.new(lowerBound, upperBound, :greedy)
36
- end
37
-
38
- def string_literal(aString, to_escape = true)
39
- if aString.size > 1
40
- chars = []
41
- aString.each_char do |ch|
42
- if to_escape && Regex::Character::MetaChars.include?(ch)
43
- chars << Regex::Character.new("\\")
44
- end
45
- chars << Regex::Character.new(ch)
46
- end
47
- result = Regex::Concatenation.new(*chars)
48
- elsif to_escape && Regex::Character::MetaChars.include?(aString)
49
- backslash = Regex::Character.new("\\")
50
- a_string = Regex::Character.new(aString)
51
- result = Regex::Concatenation.new(backslash, a_string)
52
- else
53
- result = Regex::Character.new(aString)
54
- end
55
-
56
- return result
57
- end
58
-
59
- def char_range(lowerBound, upperBound)
60
- # TODO fix module nesting
61
- lower = Regex::Character.new(lowerBound)
62
- upper = Regex::Character.new(upperBound)
63
- return Regex::CharRange.new(lower, upper)
64
- end
65
-
66
- def char_class(toNegate, *theChildren)
67
- Regex::CharClass.new(toNegate, *theChildren)
68
- end
69
-
70
- def char_shorthand(shortName)
71
- Regex::CharShorthand.new(shortName)
72
- end
73
-
74
- def wildcard()
75
- Regex::Wildcard.new
76
- end
77
-
78
- def repetition(expressionToRepeat, aMultiplicity)
79
- return Regex::Repetition.new(expressionToRepeat, aMultiplicity)
80
- end
81
-
82
- def begin_anchor
83
- return Regex::Anchor.new('^')
84
- end
85
-
86
- # rule('expression' => %w[pattern separator flags]).as 'flagged_expr'
87
- def reduce_flagged_expr(_production, aRange, theTokens, theChildren)
88
- @options = theChildren[2] if theChildren[2]
89
- return_first_child(aRange, theTokens, theChildren)
90
- end
91
-
92
- # rule('pattern' => %w[pattern separator quantifiable]).as 'pattern_sequence'
93
- def reduce_pattern_sequence(_production, _range, _tokens, theChildren)
94
- return Regex::Concatenation.new(theChildren[0], theChildren[2])
95
- end
96
-
97
- # rule('flags' => %[flags separator single_flag]).as 'flag_sequence'
98
- def reduce_flag_sequence(_production, _range, _tokens, theChildren)
99
- theChildren[0] << theChildren[2]
100
- end
101
-
102
- # rule('single_flag' => %w[CASE INSENSITIVE]).as 'case_insensitive'
103
- def reduce_case_insensitive(_production, _range, _tokens, _children)
104
- return [Regex::MatchOption.new(:IGNORECASE, true)]
105
- end
106
-
107
- # rule('single_flag' => %w[MULTI LINE]).as 'multi_line'
108
- def reduce_multi_line(_production, _range, _tokens, _children)
109
- return [Regex::MatchOption.new(:MULTILINE, true)]
110
- end
111
-
112
- # rule('single_flag' => %w[ALL LAZY]).as 'all_lazy'
113
- def reduce_all_lazy(_production, _range, _tokens, _children)
114
- return [Regex::MatchOption.new(:ALL_LAZY, true)]
115
- end
116
-
117
- # rule 'quantifiable' => %w[begin_anchor anchorable end_anchor]
118
- def reduce_pinned_quantifiable(_production, _range, _tokens, theChildren)
119
- theChildren[1].begin_anchor = theChildren[0]
120
- theChildren[1].end_anchor = theChildren[2]
121
- return theChildren[1]
122
- end
123
-
124
- # rule 'quantifiable' => %w[begin_anchor anchorable]
125
- def reduce_begin_anchor_quantifiable(_production, _range, _tokens, theChildren)
126
- theChildren[1].begin_anchor = theChildren[0]
127
- return theChildren[1]
128
- end
129
-
130
- # rule 'quantifiable' => %w[anchorable end_anchor]
131
- def reduce_end_anchor_quantifiable(_production, _range, _tokens, theChildren)
132
- theChildren[0].end_anchor = theChildren[1]
133
- return theChildren[0]
134
- end
135
-
136
- # rule 'begin_anchor' => %w[STARTS WITH]
137
- def reduce_starts_with(_production, _range, _tokens, _children)
138
- begin_anchor
139
- end
140
-
141
- # rule 'begin_anchor' => %w[BEGIN WITH]
142
- def reduce_begin_with(_production, _range, _tokens, _children)
143
- begin_anchor
144
- end
145
-
146
- # rule 'end_anchor' => %w[MUST END].as 'end_anchor'
147
- def reduce_end_anchor(_production, _range, _tokens, _children)
148
- return Regex::Anchor.new('$')
149
- end
150
-
151
- # rule('anchorable' => %w[assertable assertion]).as 'asserted_anchorable'
152
- def reduce_asserted_anchorable(_production, _range, _tokens, theChildren)
153
- assertion = theChildren.last
154
- assertion.children.unshift(theChildren[0])
155
- return assertion
156
- end
157
-
158
- # rule('assertion' => %w[IF FOLLOWED BY assertable]).as 'if_followed'
159
- def reduce_if_followed(_production, _range, _tokens, theChildren)
160
- return Regex::Lookaround.new(theChildren.last, :ahead, :positive)
161
- end
162
-
163
- # rule('assertion' => %w[IF NOT FOLLOWED BY assertable]).as 'if_not_followed'
164
- def reduce_if_not_followed(_production, _range, _tokens, theChildren)
165
- return Regex::Lookaround.new(theChildren.last, :ahead, :negative)
166
- end
167
-
168
- # rule('assertion' => %w[IF ALREADY HAD assertable]).as 'if_had'
169
- def reduce_if_had(_production, _range, _tokens, theChildren)
170
- return Regex::Lookaround.new(theChildren.last, :behind, :positive)
171
- end
172
-
173
- # rule('assertion' => %w[IF NOT ALREADY HAD assertable]).as 'if_not_had'
174
- def reduce_if_not_had(_production, _range, _tokens, theChildren)
175
- return Regex::Lookaround.new(theChildren.last, :behind, :negative)
176
- end
177
-
178
- # rule('assertable' => %w[term quantifier]).as 'quantified_assertable'
179
- def reduce_quantified_assertable(_production, _range, _tokens, theChildren)
180
- quantifier = theChildren[1]
181
- term = theChildren[0]
182
- repetition(term, quantifier)
183
- end
184
-
185
- # rule('letter_range' => %w[LETTER FROM LETTER_LIT TO LETTER_LIT]).as 'lowercase_from_to'
186
- def reduce_lowercase_from_to(_production, _range, _tokens, theChildren)
187
- lower = theChildren[2].token.lexeme
188
- upper = theChildren[4].token.lexeme
189
- ch_range = char_range(lower, upper)
190
- char_class(false, ch_range)
191
- end
192
-
193
- # rule('letter_range' => %w[UPPERCASE LETTER FROM LETTER_LIT TO LETTER_LIT]).as 'uppercase_from_to'
194
- def reduce_uppercase_from_to(_production, _range, _tokens, theChildren)
195
- lower = theChildren[3].token.lexeme
196
- upper = theChildren[5].token.lexeme
197
- ch_range = char_range(lower.upcase, upper.upcase)
198
- char_class(false, ch_range)
199
- end
200
-
201
- # rule('letter_range' => 'LETTER').as 'any_lowercase'
202
- def reduce_any_lowercase(_production, _range, _tokens, _children)
203
- ch_range = char_range('a', 'z')
204
- char_class(false, ch_range)
205
- end
206
-
207
- # rule('letter_range' => %w[UPPERCASE LETTER]).as 'any_uppercase'
208
- def reduce_any_uppercase(_production, _range, _tokens, _children)
209
- ch_range = char_range('A', 'Z')
210
- char_class(false, ch_range)
211
- end
212
-
213
- # rule('digit_range' => %w[digit_or_number FROM DIGIT_LIT TO DIGIT_LIT]).as 'digits_from_to'
214
- def reduce_digits_from_to(aProduction, aRange, theTokens, theChildren)
215
- reduce_lowercase_from_to(aProduction, aRange, theTokens, theChildren)
216
- end
217
-
218
- # rule('digit_range' => 'digit_or_number').as 'simple_digit_range'
219
- def reduce_simple_digit_range(_production, _range, _tokens, _children)
220
- char_shorthand('d')
221
- end
222
-
223
- # rule('character_class' => %w[ANY CHARACTER]).as 'any_character'
224
- def reduce_any_character(_production, _range, _tokens, _children)
225
- char_shorthand('w')
226
- end
227
-
228
- # rule('character_class' => %w[NO CHARACTER]).as 'no_character'
229
- def reduce_no_character(_production, _range, _tokens, _children)
230
- char_shorthand('W')
231
- end
232
-
233
- # rule('character_class' => 'WHITESPACE').as 'whitespace'
234
- def reduce_whitespace(_production, _range, _tokens, _children)
235
- char_shorthand('s')
236
- end
237
-
238
- # rule('character_class' => %w[NO WHITESPACE]).as 'no_whitespace'
239
- def reduce_no_whitespace(_production, _range, _tokens, _children)
240
- char_shorthand('S')
241
- end
242
-
243
- # rule('character_class' => 'ANYTHING').as 'anything'
244
- def reduce_anything(_production, _range, _tokens, _children)
245
- wildcard
246
- end
247
-
248
- # rule('alternation' => %w[ANY OF LPAREN alternatives RPAREN]).as 'any_of'
249
- def reduce_one_of(_production, _range, _tokens, theChildren)
250
- raw_literal = theChildren[-1].token.lexeme.dup
251
- alternatives = raw_literal.chars.map { |ch| Regex::Character.new(ch) }
252
- # TODO check other implementations
253
- return Regex::CharClass.new(false, *alternatives)
254
- end
255
-
256
- # rule('special_char' => 'TAB').as 'tab'
257
- def reduce_tab(_production, _range, _tokens, _children)
258
- Regex::Character.new('\t')
259
- end
260
-
261
- # rule('special_char' => 'BACKSLASH').as 'backslash'
262
- def reduce_backslash(_production, _range, _tokens, _children)
263
- Regex::Character.new('\\')
264
- end
265
-
266
- # rule('special_char' => %w[NEW LINE]).as 'new_line'
267
- def reduce_new_line(_production, _range, _tokens, _children)
268
- # TODO: control portability
269
- Regex::Character.new('\n')
270
- end
271
-
272
- # rule('literal' => %w[LITERALLY STRING_LIT]).as 'literally'
273
- def reduce_literally(_production, _range, _tokens, theChildren)
274
- # What if literal is empty?...
275
-
276
- raw_literal = theChildren[-1].token.lexeme.dup
277
- return string_literal(raw_literal)
278
- end
279
-
280
- # rule('alternation' => %w[ANY OF LPAREN alternatives RPAREN]).as 'any_of'
281
- def reduce_any_of(_production, _range, _tokens, theChildren)
282
- return Regex::Alternation.new(*theChildren[3])
283
- end
284
-
285
- # rule('alternatives' => %w[alternatives separator quantifiable]).as 'alternative_list'
286
- def reduce_alternative_list(_production, _range, _tokens, theChildren)
287
- return theChildren[0] << theChildren[-1]
288
- end
289
-
290
- # rule('alternatives' => 'quantifiable').as 'simple_alternative'
291
- def reduce_simple_alternative(_production, _range, _tokens, theChildren)
292
- return [theChildren.last]
293
- end
294
-
295
- # rule('grouping' => %w[LPAREN pattern RPAREN]).as 'grouping_parenthenses'
296
- def reduce_grouping_parenthenses(_production, _range, _tokens, theChildren)
297
- return Regex::NonCapturingGroup.new(theChildren[1])
298
- end
299
-
300
- # rule('capturing_group' => %w[CAPTURE assertable]).as 'capture'
301
- def reduce_capture(_production, _range, _tokens, theChildren)
302
- return Regex::CapturingGroup.new(theChildren[1])
303
- end
304
-
305
- # rule('capturing_group' => %w[CAPTURE assertable UNTIL assertable]).as
306
- # 'capture_until'
307
- def reduce_capture_until(_production, _range, _tokens, theChildren)
308
- group = Regex::CapturingGroup.new(theChildren[1])
309
- return Regex::Concatenation.new(group, theChildren[3])
310
- end
311
-
312
- # rule('capturing_group' => %w[CAPTURE assertable AS var_name]).as
313
- # 'named_capture'
314
- def reduce_named_capture(_production, _range, _tokens, theChildren)
315
- name = theChildren[3].token.lexeme.dup
316
- return Regex::CapturingGroup.new(theChildren[1], name)
317
- end
318
-
319
- # rule('capturing_group' => %w[CAPTURE assertable AS var_name
320
- # UNTIL assertable]).as 'named_capture_until'
321
- def reduce_named_capture_until(_production, _range, _tokens, theChildren)
322
- name = theChildren[3].token.lexeme.dup
323
- group = Regex::CapturingGroup.new(theChildren[1], name)
324
- return Regex::Concatenation.new(group, theChildren[5])
325
- end
326
-
327
- # rule('quantifier' => 'ONCE').as 'once'
328
- def reduce_once(_production, _range, _tokens, _children)
329
- multiplicity(1, 1)
330
- end
331
-
332
- # rule('quantifier' => 'TWICE').as 'twice'
333
- def reduce_twice(_production, _range, _tokens, _children)
334
- multiplicity(2, 2)
335
- end
336
-
337
- # rule('quantifier' => %w[EXACTLY count TIMES]).as 'exactly'
338
- def reduce_exactly(_production, _range, _tokens, theChildren)
339
- count = theChildren[1].token.lexeme.to_i
340
- multiplicity(count, count)
341
- end
342
-
343
- # rule('quantifier' => %w[BETWEEN count AND count times_suffix]).as
344
- # 'between_and'
345
- def reduce_between_and(_production, _range, _tokens, theChildren)
346
- lower = theChildren[1].token.lexeme.to_i
347
- upper = theChildren[3].token.lexeme.to_i
348
- multiplicity(lower, upper)
349
- end
350
-
351
- # rule('quantifier' => 'OPTIONAL').as 'optional'
352
- def reduce_optional(_production, _range, _tokens, _children)
353
- multiplicity(0, 1)
354
- end
355
-
356
- # rule('quantifier' => %w[ONCE OR MORE]).as 'once_or_more'
357
- def reduce_once_or_more(_production, _range, _tokens, _children)
358
- multiplicity(1, :more)
359
- end
360
-
361
- # rule('quantifier' => %w[NEVER OR MORE]).as 'never_or_more'
362
- def reduce_never_or_more(_production, _range, _tokens, _children)
363
- multiplicity(0, :more)
364
- end
365
-
366
- # rule('quantifier' => %w[AT LEAST count TIMES]).as 'at_least'
367
- def reduce_at_least(_production, _range, _tokens, theChildren)
368
- count = theChildren[2].token.lexeme.to_i
369
- multiplicity(count, :more)
370
- end
371
-
372
- # rule('times_suffix' => 'TIMES').as 'times_keyword'
373
- def reduce_times_keyword(_production, _range, _tokens, _children)
374
- return nil
375
- end
376
-
377
- # rule('times_suffix' => []).as 'times_dropped'
378
- def reduce_times_dropped(_production, _range, _tokens, _children)
379
- return nil
380
- end
381
- end # class
382
- # End of file