fabulator-grammar 0.0.1 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. data/History.txt +22 -0
  2. data/Rakefile +3 -1
  3. data/VERSION +1 -1
  4. data/features/grammar.feature +116 -12
  5. data/features/step_definitions/expression_steps.rb +2 -2
  6. data/features/step_definitions/grammar_steps.rb +46 -2
  7. data/features/step_definitions/xml_steps.rb +5 -16
  8. data/features/support/env.rb +1 -0
  9. data/lib/fabulator-grammar.rb +1 -0
  10. data/lib/fabulator/grammar.rb +12 -3
  11. data/lib/fabulator/grammar/actions.rb +17 -7
  12. data/lib/fabulator/grammar/actions/context.rb +18 -0
  13. data/lib/fabulator/grammar/actions/grammar.rb +76 -0
  14. data/lib/fabulator/grammar/actions/rule.rb +51 -0
  15. data/lib/fabulator/grammar/actions/token.rb +27 -0
  16. data/lib/fabulator/grammar/actions/when.rb +35 -0
  17. data/lib/fabulator/grammar/cursor.rb +118 -0
  18. data/lib/fabulator/grammar/expr/anchor.rb +28 -0
  19. data/lib/fabulator/grammar/expr/char_set.rb +67 -18
  20. data/lib/fabulator/grammar/expr/look_ahead.rb +44 -0
  21. data/lib/fabulator/grammar/expr/rule.rb +33 -28
  22. data/lib/fabulator/grammar/expr/rule_alternative.rb +45 -0
  23. data/lib/fabulator/grammar/expr/rule_mode.rb +16 -0
  24. data/lib/fabulator/grammar/expr/rule_ref.rb +15 -4
  25. data/lib/fabulator/grammar/expr/rule_sequence.rb +59 -0
  26. data/lib/fabulator/grammar/expr/sequence.rb +7 -1
  27. data/lib/fabulator/grammar/expr/set_skip.rb +16 -0
  28. data/lib/fabulator/grammar/expr/text.rb +8 -0
  29. data/lib/fabulator/grammar/expr/{rules.rb → token.rb} +12 -1
  30. data/lib/fabulator/grammar/expr/token_alternative.rb +42 -0
  31. data/lib/fabulator/grammar/rule_parser.rb +667 -0
  32. data/lib/fabulator/grammar/token_parser.rb +733 -0
  33. data/rules.racc +249 -0
  34. data/tokens.racc +257 -0
  35. metadata +29 -12
  36. data/lib/fabulator/grammar/parser.rb +0 -548
  37. data/regex.racc +0 -183
data/History.txt ADDED
@@ -0,0 +1,22 @@
1
+ === 0.0.3 2010-09-07
2
+
3
+ * 5 major enhancements:
4
+ * structural elements in place for writing grammars
5
+ * limited rule matching support
6
+ * use [,] for repetition counts
7
+ * character set algrebra supported
8
+ * pre-defined character sets
9
+ * requires Fabulator 0.0.7 or higher
10
+
11
+ === 0.0.2 2010-08-19
12
+
13
+ * 3 minor enhancements:
14
+ * Create file that keeps config.gem from complaining
15
+ * Change character sets to look a little like Perl 6
16
+ * Use <,> for repetition counts
17
+
18
+ === 0.0.1 2010-08-11
19
+
20
+ * 1 major enhancement:
21
+ * Initial release
22
+
data/Rakefile CHANGED
@@ -7,8 +7,10 @@ begin
7
7
  gem.email = "jgsmith@tamu.edu"
8
8
  gem.homepage = "http://github.com/jgsmith/ruby-fabulator-grammar"
9
9
  gem.authors = ["James Smith"]
10
- gem.add_dependency(%q<fabulator>, [">= 0.0.1"])
10
+ gem.add_dependency(%q<fabulator>, [">= 0.0.7"])
11
11
  # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
12
+ # not sure how to add dependency of a library that's not a gem
13
+ gem.requirements << 'bitset, 1.0 or greater'
12
14
  end
13
15
  rescue LoadError
14
16
  puts "Jeweler (or a dependency) not available. This is only required if you plan to package fabulator-exhibit as a gem."
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.1
1
+ 0.0.3
@@ -23,7 +23,7 @@ Feature: Basic regex parsing
23
23
  Scenario: Parsing a simple text string
24
24
  Given a context
25
25
  And the prefix g as "http://dh.tamu.edu/ns/fabulator/grammar/1.0#"
26
- When I parse the regex (foo{1,4}$)
26
+ When I parse the regex (foo[1,4]$)
27
27
  Then it should match "foo"
28
28
  Then it should match "fooooo"
29
29
  Then it should not match "foooooo"
@@ -33,7 +33,7 @@ Feature: Basic regex parsing
33
33
  Scenario: Parsing a simple text string
34
34
  Given a context
35
35
  And the prefix g as "http://dh.tamu.edu/ns/fabulator/grammar/1.0#"
36
- When I parse the regex (^[Ff]o[-a-zA-F01234-9]+?)
36
+ When I parse the regex (^[ [Ff] ]o[ [-a-zA-F01234-9] ]+?)
37
37
  Then it should match "foo"
38
38
  And it should match "Foo"
39
39
  And it should match "FoF03z-"
@@ -43,33 +43,45 @@ Feature: Basic regex parsing
43
43
  Scenario: Parsing a simple text string
44
44
  Given a context
45
45
  And the prefix g as "http://dh.tamu.edu/ns/fabulator/grammar/1.0#"
46
- When I parse the regex (^[^0-9][a-z][0-9]$)
47
- Then it should match "fo0"
48
- And it should not match "0l0"
46
+ When I parse the regex (^[ [Ff] ]o[ [-a-z] + [A-F] + [0-9] - [5] ]+?)
47
+ Then it should match "foo"
48
+ And it should match "Foo"
49
+ And it should match "FoF03z-"
50
+ And it should not match "hellofoo"
51
+ And it should not match "hellof5o"
49
52
 
53
+ @chars
50
54
  Scenario: Parsing a simple text string
51
55
  Given a context
52
56
  And the prefix g as "http://dh.tamu.edu/ns/fabulator/grammar/1.0#"
53
- When I parse the regex (^[^0-9].[0-9]$)
54
- Then it should match "fo0"
55
- And it should not match "0l0"
56
- And it should not match "00"
57
+ When I parse the regex (^[ [Ff] ]o[ [-] + :lower: + :xdigit: - [5] ]+?)
58
+ Then it should match "foo"
59
+ And it should match "Foo"
60
+ And it should match "FoF03z-"
61
+ And it should not match "hellofoo"
62
+ And it should not match "hellof5o"
57
63
 
58
64
  @chars
59
65
  Scenario: Parsing a simple text string
60
66
  Given a context
61
67
  And the prefix g as "http://dh.tamu.edu/ns/fabulator/grammar/1.0#"
62
- When I parse the regex ([\[-\]]o[a-z\]A-F01234-9]+?)
68
+ When I parse the regex (^[ -[0-9] ][ :lower: ][ [0-9] ]$)
69
+ Then it should match "fo0"
70
+ And it should not match "0l0"
63
71
 
64
72
  Scenario: Parsing a simple text string
65
73
  Given a context
66
74
  And the prefix g as "http://dh.tamu.edu/ns/fabulator/grammar/1.0#"
67
- When I parse the regex (<foo><g:bar>*)
75
+ When I parse the regex (^[-:digit:].[:digit:]$)
76
+ Then it should match "fo0"
77
+ And it should not match "0l0"
78
+ And it should not match "00"
68
79
 
80
+ @chars
69
81
  Scenario: Parsing a simple text string
70
82
  Given a context
71
83
  And the prefix g as "http://dh.tamu.edu/ns/fabulator/grammar/1.0#"
72
- When I parse the regex (bar ( <foo> <g:bar> ) *)
84
+ When I parse the regex ([[\[-\]]]o[[a-z\]A-F01234-9]]+?)
73
85
 
74
86
  Scenario: Adding two numbers together as a union
75
87
  Given a context
@@ -86,3 +98,95 @@ Feature: Basic regex parsing
86
98
  When I run the expression (g:match('^foo', 'bfooo'))
87
99
  Then I should get 1 item
88
100
  And item 0 should be false
101
+
102
+ @rules
103
+ Scenario: Parsing a simple text string
104
+ Given a context
105
+ Then I can parse the rule (foo bar)
106
+ And I can parse the rule (foo(s) bar(?))
107
+ And I can parse the rule (foo(s?) bar(3))
108
+ And I can parse the rule (foo(s NL) bar(3..4))
109
+ And I can parse the rule (foo ';'(s) bar(3..4 ','))
110
+ And I can parse the rule ((foo bar)(s))
111
+ And I can not parse the rule (foo(s NL(s)))
112
+ And I can parse the rule (foo(s (NL NL(s))))
113
+ And I can not parse the rule (foo(s NL NL(s)))
114
+
115
+ @grammar
116
+ Scenario: Parsing a grammar xml definition
117
+ Given a context
118
+ Given the grammar
119
+ """
120
+ <g:grammar xmlns:g="http://dh.tamu.edu/ns/fabulator/grammar/1.0#">
121
+ <g:token g:name="LETTER" g:matches="[:alpha:]" />
122
+ <g:token g:name="NUMBER" g:matches="[:digit:]" />
123
+ <g:rule g:name="something">
124
+ <g:when g:matches="LETTER NUMBER LETTER" />
125
+ </g:rule>
126
+ <g:rule g:name="other">
127
+ <g:when g:matches="a := LETTER b := NUMBER c := LETTER" />
128
+ </g:rule>
129
+ <g:rule g:name="or">
130
+ <g:when g:matches="other(s) d := LETTER(s)" />
131
+ </g:rule>
132
+ <g:rule g:name="ooor">
133
+ <g:when g:matches="other(s ',') d := LETTER(s)" />
134
+ </g:rule>
135
+ </g:grammar>
136
+ """
137
+ Then "something" should match "a0a"
138
+ And "something" should not match "abc"
139
+ And "something" should match "ab0c"
140
+ And "other" should match "a0a"
141
+ And "other" should match "ab1c"
142
+ And "or" should match "a1bcde"
143
+ And "or" should match "a1bb2ccde"
144
+ And "or" should match "a1bwb2ccde"
145
+ And "or" should not parse "a1bwb2ccde"
146
+ And "or" should match "acbwb2ccde"
147
+ And "or" should not parse "acbwb2ccde"
148
+ And "or" should not match "acbwb2d"
149
+ And "ooor" should parse "a1c,b2df"
150
+ And the expression (d) should equal ['f']
151
+ And the expression (other[1]/b) should equal ['1']
152
+ And the expression (other[2]/b) should equal ['2']
153
+ And "ooor" should not parse "a1c,b2d"
154
+
155
+ @mode
156
+ Scenario: Parsing a grammar xml definition
157
+ Given a context
158
+ Given the grammar
159
+ """
160
+ <g:grammar xmlns:g="http://dh.tamu.edu/ns/fabulator/grammar/1.0#">
161
+ <g:token g:name="LETTER" g:matches="[:alpha:]" g:mode="normal"/>
162
+ <g:token g:name="NUMBER" g:matches="[:digit:]" />
163
+ <g:token g:name="LETTER" g:matches="[:upper:]" g:mode="upper"/>
164
+ <g:token g:name="LETTER" g:matches="[:lower:]" g:mode="lower"/>
165
+ <g:rule g:name="something">
166
+ <g:when g:matches="[mode normal] LETTER NUMBER [mode upper] LETTER" />
167
+ </g:rule>
168
+ </g:grammar>
169
+ """
170
+ Then "something" should parse "a0A"
171
+ And "something" should not parse "a0a"
172
+
173
+ @context
174
+ Scenario: Parsing a grammar xml definition
175
+ Given a context
176
+ Given the grammar
177
+ """
178
+ <g:grammar xmlns:g="http://dh.tamu.edu/ns/fabulator/grammar/1.0#">
179
+ <g:context g:mode="normal">
180
+ <g:token g:name="LETTER" g:matches="[:alpha:]" />
181
+ </g:context>
182
+ <g:token g:name="NUMBER" g:matches="[:digit:]" />
183
+ <g:token g:name="LETTER" g:matches="[:upper:]" g:mode="upper"/>
184
+ <g:token g:name="LETTER" g:matches="[:lower:]" g:mode="lower"/>
185
+ <g:rule g:name="something">
186
+ <g:when g:matches="^^ [mode normal] LETTER NUMBER [mode upper] LETTER" />
187
+ </g:rule>
188
+ </g:grammar>
189
+ """
190
+ Then "something" should match "a0A"
191
+ And "something" should not match "a0a"
192
+ And "something" should not match "aa0A"
@@ -19,7 +19,6 @@ end
19
19
  Given 'a context' do
20
20
  @context ||= Fabulator::Expr::Context.new
21
21
  @parser ||= Fabulator::Expr::Parser.new
22
- @grammar_parser ||= Fabulator::Grammar::Parser.new
23
22
  end
24
23
 
25
24
  Given /the prefix (\S+) as "([^"]+)"/ do |p,h|
@@ -72,7 +71,8 @@ Then /I should get the type (.*)/ do |t|
72
71
  end
73
72
 
74
73
  Then /I should get (\d+) items?/ do |count|
75
- @result.length.should == count
74
+ #puts "result types: #{@result.collect{|r| r.class.name}.join(', ')}"
75
+ @result.size.should == count
76
76
  end
77
77
 
78
78
  Then /item (\d+) should be (\[.*\])/ do |i,t|
@@ -2,13 +2,30 @@ require 'yaml'
2
2
 
3
3
  When /^I parse the regex \((.*)\)$/ do |regex|
4
4
  @context ||= Fabulator::Expr::Context.new
5
- @grammar_parser ||= Fabulator::Grammar::Parser.new
6
- @regex = @grammar_parser.parse(regex, @context)
5
+ @grammar_parser ||= Fabulator::Grammar::TokenParser.new
6
+ @rule_parser ||= Fabulator::Grammar::RuleParser.new
7
+ @regex = @grammar_parser.parse(regex)
7
8
  # puts YAML::dump(r)
8
9
  # puts @regex.to_regex
9
10
  # pending # express the regexp above with the code you wish you had
10
11
  end
11
12
 
13
+ Then /^I can parse the rule \((.*)\)$/ do |rule|
14
+ @context ||= Fabulator::Expr::Context.new
15
+ @grammar_parser ||= Fabulator::Grammar::TokenParser.new
16
+ @rule_parser ||= Fabulator::Grammar::RuleParser.new
17
+ @rule = @rule_parser.parse(rule)
18
+ @rule.should_not == nil
19
+ end
20
+
21
+ Then /^I can not parse the rule \((.*)\)$/ do |rule|
22
+ @context ||= Fabulator::Expr::Context.new
23
+ @grammar_parser ||= Fabulator::Grammar::TokenParser.new
24
+ @rule_parser ||= Fabulator::Grammar::RuleParser.new
25
+ @rule = (@rule_parser.parse(rule) rescue nil)
26
+ @rule.should == nil
27
+ end
28
+
12
29
  Then /^it should match "(.*)"$/ do |str|
13
30
  str.should =~ @regex.to_regex
14
31
  end
@@ -16,3 +33,30 @@ end
16
33
  Then /^it should not match "(.*)"$/ do |str|
17
34
  str.should_not =~ @regex.to_regex
18
35
  end
36
+
37
+ Then /^"(.*)" should match "(.*)"$/ do |nom, str|
38
+ ret = @grammar.match(@context, nom, str)
39
+ # puts YAML::dump(ret)
40
+ ret.should == true
41
+ end
42
+
43
+ Then /^"(.*)" should parse "(.*)"$/ do |nom, str|
44
+ ret = @grammar.parse(@context, nom, str)
45
+ #puts YAML::dump(ret)
46
+ ret.should_not == nil
47
+
48
+ @context.root = @context.root.anon_node(nil)
49
+ if !ret.nil?
50
+ @context.merge_data(ret)
51
+ @context.root.roots['data'] = @context.root
52
+ #puts YAML::dump(@context.root.to_h)
53
+ end
54
+ end
55
+
56
+ Then /^"(.*)" should not parse "(.*)"$/ do |nom, str|
57
+ @grammar.parse(@context, nom, str).should == nil
58
+ end
59
+
60
+ Then /^"(.*)" should not match "(.*)"$/ do |nom, str|
61
+ @grammar.match(@context, nom, str).should == false
62
+ end
@@ -1,23 +1,12 @@
1
- Given /the statemachine/ do |doc_xml|
1
+ Given /the grammar/ do |doc_xml|
2
2
  @context ||= Fabulator::Expr::Context.new
3
3
 
4
- if @sm.nil?
5
- @sm = Fabulator::Core::StateMachine.new.compile_xml(doc_xml)
4
+ if @grammar.nil?
5
+ @grammar = Fabulator::Grammar::Actions::Grammar.new.compile_xml(doc_xml, @context)
6
6
  else
7
- @sm.compile_xml(doc_xml)
7
+ @grammar.compile_xml(doc_xml, @context)
8
8
  end
9
- @sm.init_context(@context)
10
- end
11
9
 
12
- When /I run it with the following params:/ do |param_table|
13
- params = { }
14
- param_table.hashes.each do |hash|
15
- params[hash['key']] = hash['value']
16
- end
17
- @sm.run(params)
18
- #puts YAML::dump(@sm)
10
+ # puts YAML::dump(@grammar)
19
11
  end
20
12
 
21
- Then /it should be in the '(.*)' state/ do |s|
22
- @sm.state.should == s
23
- end
@@ -1,6 +1,7 @@
1
1
  # This file makes it possible to install RubyCAS-Client as a Rails plugin.
2
2
 
3
3
  $: << File.expand_path(File.dirname(__FILE__))+'/../../lib'
4
+ $: << File.expand_path(File.dirname(__FILE__))+'/../../../fabulator/lib'
4
5
 
5
6
  require 'fabulator'
6
7
  require 'fabulator/grammar'
@@ -0,0 +1 @@
1
+ require 'fabulator/grammar'
@@ -1,12 +1,21 @@
1
- require 'fabulator/grammar/parser'
1
+ require 'fabulator/grammar/rule_parser'
2
+ require 'fabulator/grammar/token_parser'
2
3
  require 'fabulator/grammar/actions'
3
- require 'fabulator/grammar/expr/rules'
4
- require 'fabulator/grammar/expr/rule'
4
+ require 'fabulator/grammar/expr/token'
5
+ require 'fabulator/grammar/expr/token_alternative'
5
6
  require 'fabulator/grammar/expr/rule_ref'
6
7
  require 'fabulator/grammar/expr/text'
7
8
  require 'fabulator/grammar/expr/sequence'
9
+ require 'fabulator/grammar/expr/set_skip'
8
10
  require 'fabulator/grammar/expr/char_set'
9
11
  require 'fabulator/grammar/expr/any'
12
+ require 'fabulator/grammar/expr/look_ahead'
13
+ require 'fabulator/grammar/cursor'
14
+ require 'fabulator/grammar/expr/rule'
15
+ require 'fabulator/grammar/expr/rule_mode'
16
+ require 'fabulator/grammar/expr/rule_alternative'
17
+ require 'fabulator/grammar/expr/rule_sequence'
18
+ require 'fabulator/grammar/expr/anchor'
10
19
 
11
20
  module Fabulator
12
21
  module Grammar
@@ -1,22 +1,32 @@
1
- #require 'fabulator/grammar/actions/grammar'
2
1
 
3
2
  module Fabulator
4
3
  GRAMMAR_NS = "http://dh.tamu.edu/ns/fabulator/grammar/1.0#"
4
+
5
+ require 'fabulator/grammar/actions/grammar'
6
+ require 'fabulator/grammar/actions/context'
7
+ require 'fabulator/grammar/actions/rule'
8
+ require 'fabulator/grammar/actions/token'
9
+ require 'fabulator/grammar/actions/when'
10
+
5
11
  module Grammar
6
12
  module Actions
7
- class Lib
8
- include Fabulator::ActionLib
9
-
13
+ class Lib < Fabulator::TagLib
10
14
  register_namespace GRAMMAR_NS
11
15
 
12
- #action 'grammar', Grammar
16
+ structural 'grammar', Grammar
17
+ structural 'context', Context
18
+ structural 'rule', Rule
19
+ structural 'token', Token
20
+ structural 'when', When
21
+
22
+ # action 'result', Result
13
23
 
14
24
  ## reference a grammar name
15
25
  function 'match' do |ctx, args|
16
26
  # first arg is the regex or <rule name>
17
27
  regex = args[0].to_s
18
- parser = Fabulator::Grammar::Parser.new
19
- compiled = parser.parse(regex, ctx).to_regex
28
+ parser = Fabulator::Grammar::TokenParser.new
29
+ compiled = parser.parse(regex).to_regex
20
30
  if args[1].is_a?(Array)
21
31
  args[1].collect{|a|
22
32
  if a.to_s =~ compiled
@@ -0,0 +1,18 @@
1
+ module Fabulator
2
+ module Grammar
3
+ module Actions
4
+ class Context < Fabulator::Structural
5
+
6
+ namespace GRAMMAR_NS
7
+
8
+ attribute :mode, :default => :default, :static => true
9
+
10
+ attr_accessor :mode, :tokens, :rules
11
+
12
+ contains :rule
13
+ contains :token
14
+
15
+ end
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,76 @@
1
+ module Fabulator
2
+ module Grammar
3
+ module Actions
4
+ class Grammar < Fabulator::Structural
5
+
6
+ namespace GRAMMAR_NS
7
+
8
+ contains :rule
9
+ contains :token
10
+ contains :context
11
+
12
+ has_actions
13
+
14
+ def compile_xml(xml, context = nil)
15
+ super
16
+
17
+ @modes = { :default => { } }
18
+
19
+ @contexts.each do |c|
20
+ c.tokens.each do |token|
21
+ self.add_rule(token, c.mode)
22
+ end
23
+ c.rules.each do |rule|
24
+ self.add_rule(rule, c.mode)
25
+ end
26
+ end
27
+ @tokens.each do |token|
28
+ self.add_rule(token)
29
+ end
30
+ @rules.each do |rule|
31
+ self.add_rule(rule)
32
+ end
33
+
34
+ @tokens = nil
35
+ @rules = nil
36
+
37
+ self
38
+ end
39
+
40
+ def add_rule(r, m = :default)
41
+ return if r.nil?
42
+ mode = ((r.mode.nil? || r.mode.to_sym == :default) ? m : r.mode).to_sym
43
+ @modes[mode] ||= { }
44
+ @modes[mode][r.name.to_sym] = r
45
+ end
46
+
47
+ def get_rule(m, nom)
48
+ @modes[m.to_sym].nil? ? nil : @modes[m.to_sym][nom.to_sym]
49
+ end
50
+
51
+ def parse(ctx, nom, s)
52
+ cursor = Fabulator::Grammar::Cursor.new(self, ctx, s)
53
+ cursor.anchored = true
54
+ ret = do_parse(nom, cursor)
55
+ cursor.do_skip
56
+ cursor.eof ? ret : nil
57
+ end
58
+
59
+ def match(ctx, nom, s)
60
+ cursor = Fabulator::Grammar::Cursor.new(self, ctx, s)
61
+ !do_parse(nom, cursor).nil?
62
+ end
63
+
64
+ protected
65
+
66
+ def do_parse(nom, cursor)
67
+ obj = get_rule(:default, nom)
68
+ return nil if obj.nil?
69
+
70
+ obj.parse(cursor)
71
+ end
72
+
73
+ end
74
+ end
75
+ end
76
+ end