RubyGems - rley - Versions diffs - 0.5.10 → 0.5.11 - Mend

rley 0.5.10 → 0.5.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (51) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +15 -0
data/LICENSE.txt +1 -1
data/README.md +2 -1
data/appveyor.yml +6 -5
data/examples/NLP/engtagger.rb +176 -0
data/examples/general/SRL/lib/ast_builder.rb +217 -21
data/examples/general/SRL/lib/grammar.rb +33 -5
data/examples/general/SRL/lib/regex/alternation.rb +30 -0
data/examples/general/SRL/lib/regex/char_class.rb +28 -22
data/examples/general/SRL/lib/regex/char_shorthand.rb +50 -0
data/examples/general/SRL/lib/regex/character.rb +5 -3
data/examples/general/SRL/lib/regex/concatenation.rb +32 -0
data/examples/general/SRL/lib/regex/non_capturing_group.rb +29 -0
data/examples/general/SRL/lib/regex/wildcard.rb +26 -0
data/examples/general/SRL/lib/regex_repr.rb +5 -0
data/examples/general/SRL/lib/tokenizer.rb +28 -3
data/examples/general/SRL/spec/integration_spec.rb +151 -8
data/examples/general/SRL/spec/tokenizer_spec.rb +12 -0
data/examples/general/left.rb +36 -0
data/examples/general/right.rb +36 -0
data/lib/rley/constants.rb +1 -1
data/lib/rley/gfg/edge.rb +12 -1
data/lib/rley/gfg/grm_flow_graph.rb +21 -1
data/lib/rley/gfg/item_vertex.rb +1 -1
data/lib/rley/gfg/non_terminal_vertex.rb +1 -1
data/lib/rley/gfg/start_vertex.rb +1 -0
data/lib/rley/gfg/vertex.rb +27 -0
data/lib/rley/lexical/token.rb +1 -0
data/lib/rley/parser/error_reason.rb +2 -1
data/lib/rley/parser/gfg_chart.rb +14 -0
data/lib/rley/parser/gfg_earley_parser.rb +0 -1
data/lib/rley/parser/gfg_parsing.rb +4 -3
data/lib/rley/parser/parse_entry.rb +33 -3
data/lib/rley/parser/parse_entry_set.rb +14 -2
data/lib/rley/parser/parse_tree_builder.rb +1 -1
data/lib/rley/parser/parse_walker_factory.rb +0 -1
data/lib/rley/syntax/grm_symbol.rb +2 -0
data/lib/rley/syntax/production.rb +15 -3
data/lib/rley/syntax/symbol_seq.rb +16 -1
data/spec/rley/gfg/end_vertex_spec.rb +9 -1
data/spec/rley/gfg/grm_flow_graph_spec.rb +9 -0
data/spec/rley/gfg/item_vertex_spec.rb +9 -0
data/spec/rley/gfg/start_vertex_spec.rb +9 -1
data/spec/rley/parser/gfg_parsing_spec.rb +0 -1
data/spec/rley/parser/parse_entry_set_spec.rb +15 -0
data/spec/rley/parser/parse_entry_spec.rb +24 -13
data/spec/rley/parser/parse_tracer_spec.rb +1 -1
data/spec/rley/syntax/production_spec.rb +10 -0
data/spec/rley/syntax/symbol_seq_spec.rb +5 -0
metadata +10 -2

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: ce33adee17693fccafcc29ce8340f57694c229fb
-  data.tar.gz: 8acb3af15c3bd7c292209e3e1cc2e81b20ba3d9f
+  metadata.gz: 0ec06612f299302b861fbaeb04b75c0040a026cf
+  data.tar.gz: d68438efcbacceb2ae4319ac268492e93db35265
 SHA512:
-  metadata.gz: dba9c01e5cb72954185ec5b2a973a7983202bc1733808637bd887feb6a473a57c6b3ef155090dec47f848b0e367ee558f04d0f7463d50b3494dad627ed7101fb
-  data.tar.gz: ffb5e41e0325f51c1ae74cc2bb7739eb21b94dca87611f528067e70295a0cd03c956b7b3a75e32593b611abafd1007853917d32db9691d89ea4948df666ad7cc
+  metadata.gz: ec3be765a424028c986ea4812cf6f1485f04285beb2b9d8fffc774fc0b61108d4d6758a09a648132562752ab25904fb38f8ee57ecff90d0a70bca253150ed130
+  data.tar.gz: 2463def65eecbefed2bbfffc61e63e88dca2d0498078e83bc742811e540718e95e75f3896fa31b5bdc9068f5420906f389615470a86831dbcb5025824645775d

data/CHANGELOG.md CHANGED

@@ -1,3 +1,18 @@
+### 0.5.11 / 2018-01-25
+* [NEW] File `left.rb` added in `examples/general` folder for showing use of left-recursive rules.
+* [NEW] File `right.rb` added in `examples/general` folder for showing use of right-recursive rules (less performant).
+* [NEW] File `examples/general/SRL/lib/alternation.rb Added support for alternation in regular expressions (|).
+* [NEW] File `examples/general/SRL/lib/character.rb Added support for single character in regular expressions.
+* [NEW] File `examples/general/SRL/lib/char_class.rb Added support for character class in regular expressions.
+* [NEW] File `examples/general/SRL/lib/shorthand.rb Added support for character class shorthand in regular expressions.
+* [NEW] File `examples/general/SRL/lib/concatenation.rb Added support for concatenation in regular expressions.
+* [NEW] File `examples/general/SRL/lib/non_capturing_group.rb Added support for non-capturing groups in regular expressions.
+* [NEW] File `examples/general/SRL/lib/wildcard.rb Added support for wilcards in regular expressions.
+* [CHANGE] File `examples/general/SRL/grammar.rb increased coverage of Simple Regex Language parser.
+* [CHANGE] File `examples/general/SRL/ast_builder.rb Added transformation rules for constructing regular expressions.
+* [CHANGE] File `examples/general/SRL/spac/integration_spec.rb Added tests for SRL expressions.
+* [FIX] Added an custom `inspect` method to sevaral core classes. This was necessary because default implementation from Ruby got lost with object graphs.
 ### 0.5.10 / 2017-12-02
 * [CHANGE] Dir `examples/general/SRL/ Added support for digit range to Simple Regex Language parser.

data/LICENSE.txt CHANGED

@@ -1,4 +1,4 @@
-Copyright (c) 2014-2017 Dimitri Geshef
+Copyright (c) 2014-2018 Dimitri Geshef
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal

data/README.md CHANGED

@@ -440,6 +440,7 @@ actively curated by Andrei Beliankou (aka arbox).
 ##  Thanks to:
 * Professor Keshav Pingali, one of the creators of the Grammar Flow Graph parsing approach for his encouraging e-mail exchange.
+* [Arjun Menon](https://github.com/arjunmenon) for his NLP example that uses `engtagger` gem.
 ## Grammar Flow Graph
 Since the Grammar Flow Graph parsing approach is quite new, it has not yet taken a place in
@@ -452,5 +453,5 @@ standard parser textbooks. Here are a few references (and links) of papers on GF
 Copyright
 ---------
-Copyright (c) 2014-2017, Dimitri Geshef.
+Copyright (c) 2014-2018, Dimitri Geshef.
 __Rley__ is released under the MIT License see [LICENSE.txt](https://github.com/famished-tiger/Rley/blob/master/LICENSE.txt) for details.

data/appveyor.yml CHANGED

@@ -11,12 +11,13 @@ environment:
   - Ruby_version: 23
   - Ruby_version: 23-x64
   - Ruby_version: 24
-  - Ruby_version: 24-x64
+  - Ruby_version: 24-x64
 install:
-- cmd: >-
-    SET PATH=C:\Ruby%Ruby_version%\bin;%PATH%
+  - set PATH=C:\Ruby%Ruby_version%\bin;%PATH%
+  - bundle install --retry=3 --clean --force
-    bundle install --retry=3 --clean
 build: off
 test_script:
-- cmd: bundle exec rake
+  - bundle exec rake

data/examples/NLP/engtagger.rb ADDED

@@ -0,0 +1,176 @@
+require "rley"
+require "engtagger"
+require "pp"
+# REGEX to remove XML tags from Engtagger output
+GET_TAG = /<(.+?)>(.*?)<.+?>/
+# Text tokenizer
+# Taken directly from Engtagger, will ensure uniform indexing while parsing
+def clean_text(text)
+    return false unless valid_text(text)
+    text = text.toutf8
+    cleaned_text = text
+    tokenized = []
+    # Tokenize the text (splitting on punctuation as you go)
+    cleaned_text.split(/\s+/).each do |line|
+      tokenized += split_punct(line)
+    end
+    words = split_sentences(tokenized)
+    return words
+end
+def valid_text(text)
+    if !text
+      # there's nothing to parse
+      "method call on uninitialized variable" if @conf[:debug]
+      return false
+    elsif /\A\s*\z/ =~ text
+      # text is an empty string, nothing to parse
+      return false
+    else
+      # $text is valid
+      return true
+    end
+end
+def split_sentences(array)
+    tokenized = array
+    people = %w(jr mr ms mrs dr prof esq sr sen sens rep reps gov attys attys
+                supt det mssrs rev)
+    army   = %w(col gen lt cmdr adm capt sgt cpl maj brig)
+    inst   = %w(dept univ assn bros ph.d)
+    place  = %w(arc al ave blvd bld cl ct cres exp expy dist mt mtn ft fy fwy
+                hwy hway la pde pd plz pl rd st tce)
+    comp   = %w(mfg inc ltd co corp)
+    state  = %w(ala ariz ark cal calif colo col conn del fed fla ga ida id ill
+                ind ia kans kan ken ky la me md is mass mich minn miss mo mont
+                neb nebr nev mex okla ok ore penna penn pa dak tenn tex ut vt
+                va wash wis wisc wy wyo usafa alta man ont que sask yuk)
+    month  = %w(jan feb mar apr may jun jul aug sep sept oct nov dec)
+    misc   = %w(vs etc no esp)
+    abbr = Hash.new
+    [people, army, inst, place, comp, state, month, misc].flatten.each do |i|
+      abbr[i] = true
+    end
+    words = Array.new
+    tokenized.each_with_index do |t, i|
+      if tokenized[i + 1] and tokenized [i + 1] =~ /[A-Z\W]/ and tokenized[i] =~ /\A(.+)\.\z/
+        w = $1
+        # Don't separate the period off words that
+        # meet any of the following conditions:
+        #
+        # 1. It is defined in one of the lists above
+        # 2. It is only one letter long: Alfred E. Sloan
+        # 3. It has a repeating letter-dot: U.S.A. or J.C. Penney
+        unless abbr[w.downcase] or w =~ /\A[a-z]\z/i or w =~ /[a-z](?:\.[a-z])+\z/i
+          words <<  w
+          words << '.'
+          next
+        end
+      end
+      words << tokenized[i]
+    end
+    # If the final word ends in a period..
+    if words[-1] and words[-1] =~ /\A(.*\w)\.\z/
+      words[-1] = $1
+      words.push '.'
+    end
+    return words
+end
+# Separate punctuation from words, where appropriate. This leaves trailing
+# periods in place to be dealt with later. Called by the clean_text method.
+def split_punct(text)
+    # If there's no punctuation, return immediately
+    return [text] if /\A\w+\z/ =~ text
+    # Sanity checks
+    text = text.gsub(/\W{10,}/o, " ")
+    # Put quotes into a standard format
+    text = text.gsub(/`(?!`)(?=.*\w)/o, "` ") # Shift left quotes off text
+    text = text.gsub(/"(?=.*\w)/o, " `` ") # Convert left quotes to ``
+    text = text.gsub(/(\W|^)'(?=.*\w)/o){$1 ? $1 + " ` " : " ` "} # Convert left quotes to `
+    text = text.gsub(/"/, " '' ") # Convert (remaining) quotes to ''
+    text = text.gsub(/(\w)'(?!')(?=\W|$)/o){$1 + " ' "} # Separate right single quotes
+    # Handle all other punctuation
+    text = text.gsub(/--+/o, " - ") # Convert and separate dashes
+    text = text.gsub(/,(?!\d)/o, " , ") # Shift commas off everything but numbers
+    text = text.gsub(/:/o, " :") # Shift semicolons off
+    text = text.gsub(/(\.\.\.+)/o){" " + $1 + " "} # Shift ellipses off
+    text = text.gsub(/([\(\[\{\}\]\)])/o){" " + $1 + " "} # Shift off brackets
+    text = text.gsub(/([\!\?#\$%;~|])/o){" " + $1 + " "} # Shift off other ``standard'' punctuation
+    # English-specific contractions
+    text = text.gsub(/([A-Za-z])'([dms])\b/o){$1 + " '" + $2}  # Separate off 'd 'm 's
+    text = text.gsub(/n't\b/o, " n't")                     # Separate off n't
+    text = text.gsub(/'(ve|ll|re)\b/o){" '" + $1}         # Separate off 've, 'll, 're
+    result = text.split(' ')
+    return result
+end
+# Instantiate a builder object that will build the grammar for us
+builder = Rley::Syntax::GrammarBuilder.new do
+  add_terminals('NN', 'NNP')
+  add_terminals('DET', 'IN', 'VBD')
+# Here we define the productions (= grammar rules)
+  rule 'S' => %w[NP VP]
+  rule 'NP' => 'NNP'
+  rule 'NP' => %w[DET NN]
+  rule 'NP' => %w[DET NN PP]
+  rule 'VP' => %w[VBD NP]
+  rule 'VP' => %w[VBD NP PP]
+  rule 'PP' => %w[IN NP]
+end
+# And now, let's build the grammar...
+grammar = builder.grammar
+parser = Rley::Parser::GFGEarleyParser.new(grammar)
+# text = "Yo I'm not done with you"
+text= "John saw Mary with a telescope"
+pp "Input text --> #{text}"
+tgr = EngTagger.new
+# Generte POS
+tagged = tgr.add_tags(text)
+# Generte tokenied lexicon of input text
+# Instead of creating a lexicon dictionary, we would simply generate one each time on the fly for the current text only.
+lexicon = clean_text(text)
+# Generte POS tokens in [[word, pos], ..] format
+tokens = tagged.scan(GET_TAG).map { |tag, word| [word, tag.upcase] }
+def tokenizer(lexicon, grammar, tokens)
+  rley_tokens = []
+  lexicon.each_with_index do |word, i|
+    term_name = tokens[i].last
+    terminal = grammar.name2symbol[term_name]
+    rley_tokens << Rley::Tokens::Token.new(word, terminal)
+  end
+  return rley_tokens
+end
+# Convert input text into a sequence of rley token objects...
+rley_tokens = tokenizer(lexicon, grammar, tokens)
+result = parser.parse(rley_tokens)
+pp "Parsing successful? #{result.success?}" # => Parsing successful? true
+pp result.failure_reason.message unless result.success?
+ptree = result.parse_tree
+visitor = Rley::ParseTreeVisitor.new(ptree)
+renderer = Rley::Formatter::Asciitree.new($stdout)
+# Subscribe the formatter to the visitor's event and launch the visit
+pp renderer.render(visitor)

data/examples/general/SRL/lib/ast_builder.rb CHANGED

@@ -1,3 +1,4 @@
+require 'stringio'
 require_relative 'ast_building'
 require_relative 'regex_repr'
@@ -35,27 +36,54 @@ class ASTBuilder < Rley::Parser::ParseTreeBuilder
   # @param theChildren [Array] Children nodes (one per rhs symbol)
   def new_parent_node(aProduction, aRange, theTokens, theChildren)
     node = case aProduction.name
-      when 'srl_0' # rule 'srl' => 'term'
+      when 'srl_0' # rule 'srl' => 'pattern'
+        return_first_child(aRange, theTokens, theChildren)
+      when 'pattern_0' # rule 'pattern' => %w[pattern COMMA quantifiable]
+        reduce_pattern_0(aProduction, aRange, theTokens, theChildren)
+      when 'pattern_1' # rule 'pattern' => %w[pattern quantifiable]
+        reduce_pattern_1(aProduction, aRange, theTokens, theChildren)
+      when 'pattern_2' # rule 'pattern' => 'quantifiable'
+        return_first_child(aRange, theTokens, theChildren)
+      when 'quantifiable_0' # rule 'quantifiable' => 'term'
         return_first_child(aRange, theTokens, theChildren)
+      when 'quantifiable_1' # rule 'quantifiable' = %w[term quantifier]
+        reduce_quantifiable_1(aProduction, aRange, theTokens, theChildren)
       when 'term_0' # rule 'term' => 'atom'
         return_first_child(aRange, theTokens, theChildren)
-      when 'term_1' # rule 'term' => %w[atom quantifier]
-        reduce_term_1(aProduction, aRange, theTokens, theChildren)
+      when 'term_1' # rule 'term' => 'alternation'
+        return_first_child(aRange, theTokens, theChildren)
+      when 'term_2' # rule 'term' => 'grouping'
+        return_first_child(aRange, theTokens, theChildren)
       when 'atom_0' # rule 'atom' => 'letter_range'
         return_first_child(aRange, theTokens, theChildren)
       when 'atom_1' # rule 'atom' => 'digit_range'
         return_first_child(aRange, theTokens, theChildren)
+      when 'atom_2' # rule 'atom' => 'character_class'
+        return_first_child(aRange, theTokens, theChildren)
+      when 'atom_3' # rule 'atom' => 'special_char'
+        return_first_child(aRange, theTokens, theChildren)
+      when 'atom_4' # rule 'atom' => 'literal'
+        return_first_child(aRange, theTokens, theChildren)
       # rule 'letter_range' => %w[LETTER FROM LETTER_LIT TO LETTER_LIT]
-      when 'letter_range_0'
+      when 'letter_range_0'
         reduce_letter_range_0(aProduction, aRange, theTokens, theChildren)
-      #rule 'letter_range' => %w[UPPERCASE LETTER FROM LETTER_LIT TO LETTER_LIT]
-      when 'letter_range_1'
+      #rule 'letter_range' => %w[UPPERCASE LETTER FROM LETTER_LIT TO LETTER_LIT]
+      when 'letter_range_1'
         reduce_letter_range_1(aProduction, aRange, theTokens, theChildren)
       when 'letter_range_2' # rule 'letter_range' => 'LETTER'
@@ -65,12 +93,60 @@ class ASTBuilder < Rley::Parser::ParseTreeBuilder
         reduce_letter_range_3(aProduction, aRange, theTokens, theChildren)
       # rule 'digit_range' => %w[digit_or_number FROM DIGIT_LIT TO DIGIT_LIT]
-      when 'digit_range_0'
+      when 'digit_range_0'
         reduce_digit_range_0(aProduction, aRange, theTokens, theChildren)
-      when 'digit_range_1' #rule 'digit_range' => 'digit_or_number'
+      when 'digit_range_1' # rule 'digit_range' => 'digit_or_number'
         reduce_digit_range_1(aProduction, aRange, theTokens, theChildren)
+      when 'character_class_0' # rule 'character_class' => %w[ANY CHARACTER]
+        reduce_character_class_0(aProduction, aRange, theTokens, theChildren)
+      when 'character_class_1' # rule 'character_class' => %w[NO CHARACTER]
+        reduce_character_class_1(aProduction, aRange, theTokens, theChildren)
+      when 'character_class_2' # rule 'character_class' => 'WHITESPACE'
+        reduce_character_class_2(aProduction, aRange, theTokens, theChildren)
+      when 'character_class_3' # rule 'character_class' => %w[NO WHITESPACE]
+        reduce_character_class_3(aProduction, aRange, theTokens, theChildren)
+      when 'character_class_4' # rule 'character_class' => 'ANYTHING'
+        reduce_character_class_4(aProduction, aRange, theTokens, theChildren)
+       when 'character_class_5' # rule 'character_class' => %w[ONE OF STRING_LIT]
+        reduce_character_class_5(aProduction, aRange, theTokens, theChildren)
+      when 'special_char_0' # rule 'special_char' => 'TAB'
+        reduce_special_char_0(aProduction, aRange, theTokens, theChildren)
+      when 'special_char_1' # rule 'special_char' => 'BACKSLASH'
+        reduce_special_char_1(aProduction, aRange, theTokens, theChildren)
+      when 'special_char_2' # rule 'special_char' => %w[NEW LINE]
+        reduce_special_char_2(aProduction, aRange, theTokens, theChildren)
+      when 'literal_0' # rule 'literal' => %[LITERALLY STRING_LIT]
+        reduce_literal_0(aProduction, aRange, theTokens, theChildren)
+      # rule 'alternation' => %w[ANY OF LPAREN alternatives RPAREN]
+      when 'alternation_0'
+        reduce_alternation_0(aProduction, aRange, theTokens, theChildren)
+      # rule 'alternatives' => %w[alternatives COMMA quantifiable]
+      when 'alternatives_0'
+        reduce_alternatives_0(aProduction, aRange, theTokens, theChildren)
+      # rule 'alternatives' => %w[alternatives quantifiable]
+      when 'alternatives_1'
+        reduce_alternatives_1(aProduction, aRange, theTokens, theChildren)
+      when 'alternatives_2' # rule 'alternatives' => 'quantifiable'
+        reduce_alternatives_2(aProduction, aRange, theTokens, theChildren)
+      when 'grouping' # rule 'grouping' => %w[LPAREN pattern RPAREN]
+        reduce_grouping_0(aProduction, aRange, theTokens, theChildren)
       when 'quantifier_0' # rule 'quantifier' => 'ONCE'
         multiplicity(1, 1)
@@ -81,7 +157,7 @@ class ASTBuilder < Rley::Parser::ParseTreeBuilder
         reduce_quantifier_2(aProduction, aRange, theTokens, theChildren)
       # rule 'quantifier' => %w[BETWEEN count AND count times_suffix]
-      when 'quantifier_3'
+      when 'quantifier_3'
         reduce_quantifier_3(aProduction, aRange, theTokens, theChildren)
       when 'quantifier_4' # rule 'quantifier' => 'OPTIONAL'
@@ -95,10 +171,10 @@ class ASTBuilder < Rley::Parser::ParseTreeBuilder
       when 'quantifier_7' # rule 'quantifier' => %w[AT LEAST count TIMES]
         reduce_quantifier_7(aProduction, aRange, theTokens, theChildren)
       # rule 'digit_or_number' => 'DIGIT'
       # rule 'digit_or_number' => 'NUMER'
-      when 'digit_or_number_0', 'digit_or_number_1'
+      when 'digit_or_number_0', 'digit_or_number_1'
         return_first_child(aRange, theTokens, theChildren)
       when 'count_0', 'count_1'
@@ -117,6 +193,28 @@ class ASTBuilder < Rley::Parser::ParseTreeBuilder
     return SRL::Regex::Multiplicity.new(lowerBound, upperBound, :greedy)
   end
+  def string_literal(aString, to_escape = true)
+    if aString.size > 1
+      chars = []
+      aString.each_char do |ch|
+        if to_escape && Regex::Character::MetaChars.include?(ch)
+          chars << Regex::Character.new("\\")
+        end
+        chars << Regex::Character.new(ch)
+      end
+      result = Regex::Concatenation.new(*chars)
+    else
+        if to_escape && Regex::Character::MetaChars.include?(aString)
+          result = Regex::Concatenation.new(Regex::Character.new("\\"),
+            Regex::Character.new(aString))
+        else
+          result = Regex::Character.new(aString)
+        end
+    end
+    return result
+  end
   def char_range(lowerBound, upperBound)
     # TODO fix module nesting
     lower = Regex::Character.new(lowerBound)
@@ -128,15 +226,33 @@ class ASTBuilder < Rley::Parser::ParseTreeBuilder
     Regex::CharClass.new(toNegate, *theChildren)
   end
+  def char_shorthand(shortName)
+    Regex::CharShorthand.new(shortName)
+  end
+  def wildcard()
+    Regex::Wildcard.new
+  end
   def repetition(expressionToRepeat, aMultiplicity)
     return Regex::Repetition.new(expressionToRepeat, aMultiplicity)
   end
+  # rule 'pattern' => %w[pattern COMMA quantifiable]
+  def reduce_pattern_0(aProduction, aRange, theTokens, theChildren)
+    return Regex::Concatenation.new(theChildren[0], theChildren[2])
+  end
+  # rule 'pattern' => %w[pattern quantifiable]
+  def reduce_pattern_1(aProduction, aRange, theTokens, theChildren)
+    return Regex::Concatenation.new(theChildren[0], theChildren[1])
+  end
-  # rule 'term' => %w[atom quantifier]
-  def reduce_term_1(aProduction, aRange, theTokens, theChildren)
+  # rule 'quantifiable' => %w[term quantifier]
+  def reduce_quantifiable_1(aProduction, aRange, theTokens, theChildren)
     quantifier = theChildren.last
-    atom = theChildren.first
-    repetition(atom, quantifier)
+    term = theChildren.first
+    repetition(term, quantifier)
   end
   # rule 'letter_range' => %w[LETTER FROM LETTER_LIT TO LETTER_LIT]
@@ -166,7 +282,7 @@ class ASTBuilder < Rley::Parser::ParseTreeBuilder
     ch_range = char_range('A', 'Z')
     char_class(false, ch_range)
   end
   # rule 'digit_range' => %w[digit_or_number FROM DIGIT_LIT TO DIGIT_LIT]
   def reduce_digit_range_0(aProduction, aRange, theTokens, theChildren)
     reduce_letter_range_0(aProduction, aRange, theTokens, theChildren)
@@ -174,15 +290,95 @@ class ASTBuilder < Rley::Parser::ParseTreeBuilder
   # rule 'digit_range' => 'digit_or_number'
   def reduce_digit_range_1(aProduction, aRange, theTokens, theChildren)
-    ch_range = char_range('0', '9')
-    char_class(false, ch_range)
+    char_shorthand('d')
+  end
+  # rule 'character_class' => %w[ANY CHARACTER]
+  def reduce_character_class_0(aProduction, aRange, theTokens, theChildren)
+    char_shorthand('w')
+  end
+  # rule 'character_class' => %w[NO CHARACTER]
+  def reduce_character_class_1(aProduction, aRange, theTokens, theChildren)
+    char_shorthand('W')
+  end
+  # rule 'character_class' => 'WHITESPACE'
+  def reduce_character_class_2(aProduction, aRange, theTokens, theChildren)
+    char_shorthand('s')
+  end
+  # rule 'character_class' => %w[NO WHITESPACE]
+  def reduce_character_class_3(aProduction, aRange, theTokens, theChildren)
+    char_shorthand('S')
+  end
+  # rule 'character_class' => 'ANYTHING'
+  def reduce_character_class_4(aProduction, aRange, theTokens, theChildren)
+    wildcard
+  end
+  # rule 'character_class' => %w[ONE OF STRING_LIT]
+  def reduce_character_class_5(aProduction, aRange, theTokens, theChildren)
+    raw_literal = theChildren[-1].token.lexeme.dup
+    alternatives = raw_literal.chars.map { |ch| Regex::Character.new(ch) }
+    return Regex::CharClass.new(false, *alternatives) # TODO check other implementations
   end
+  # rule 'special_char' => 'TAB'
+  def reduce_special_char_0(aProduction, aRange, theTokens, theChildren)
+    Regex::Character.new('\t')
+  end
+  # rule 'special_char' => 'BACKSLASH'
+  def reduce_special_char_1(aProduction, aRange, theTokens, theChildren)
+    Regex::Character.new('\\')
+  end
+  # rule 'special_char' => %w[NEW LINE]
+  def reduce_special_char_2(aProduction, aRange, theTokens, theChildren)
+    # TODO: control portability
+    Regex::Character.new('\n')
+  end
+  # rule 'literal' => %[LITERALLY STRING_LIT]
+  def reduce_literal_0(aProduction, aRange, theTokens, theChildren)
+    # What if literal is empty?...
+    raw_literal = theChildren[-1].token.lexeme.dup
+    return string_literal(raw_literal)
+  end
+  # rule 'alternation' => %w[ANY OF LPAREN alternatives RPAREN]
+  def reduce_alternation_0(aProduction, aRange, theTokens, theChildren)
+    return Regex::Alternation.new(*theChildren[3])
+  end
+  # rule 'alternatives' => %w[alternatives COMMA quantifiable]
+  def reduce_alternatives_0(aProduction, aRange, theTokens, theChildren)
+    return theChildren[0] << theChildren[-1]
+  end
+  # rule 'alternatives' => %w[alternatives quantifiable]
+  def reduce_alternatives_1(aProduction, aRange, theTokens, theChildren)
+    return theChildren[0] << theChildren[-1]
+  end
+  # rule 'alternatives' => 'quantifiable'
+  def reduce_alternatives_2(aProduction, aRange, theTokens, theChildren)
+    return [theChildren.last]
+  end
+  # rule 'grouping' => %w[LPAREN pattern RPAREN]
+  def reduce_grouping_0(aProduction, aRange, theTokens, theChildren)
+    return Regex::NonCapturingGroup.new(theChildren[1])
+  end
   # rule 'quantifier' => %w[EXACTLY count TIMES]
   def reduce_quantifier_2(aProduction, aRange, theTokens, theChildren)
     count = theChildren[1].token.lexeme.to_i
     multiplicity(count, count)
-  end
+  end
   # rule 'quantifier' => %w[BETWEEN count AND count times_suffix]
   def reduce_quantifier_3(aProduction, aRange, theTokens, theChildren)