fabulator-grammar 0.0.1 → 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (37) hide show
  1. data/History.txt +22 -0
  2. data/Rakefile +3 -1
  3. data/VERSION +1 -1
  4. data/features/grammar.feature +116 -12
  5. data/features/step_definitions/expression_steps.rb +2 -2
  6. data/features/step_definitions/grammar_steps.rb +46 -2
  7. data/features/step_definitions/xml_steps.rb +5 -16
  8. data/features/support/env.rb +1 -0
  9. data/lib/fabulator-grammar.rb +1 -0
  10. data/lib/fabulator/grammar.rb +12 -3
  11. data/lib/fabulator/grammar/actions.rb +17 -7
  12. data/lib/fabulator/grammar/actions/context.rb +18 -0
  13. data/lib/fabulator/grammar/actions/grammar.rb +76 -0
  14. data/lib/fabulator/grammar/actions/rule.rb +51 -0
  15. data/lib/fabulator/grammar/actions/token.rb +27 -0
  16. data/lib/fabulator/grammar/actions/when.rb +35 -0
  17. data/lib/fabulator/grammar/cursor.rb +118 -0
  18. data/lib/fabulator/grammar/expr/anchor.rb +28 -0
  19. data/lib/fabulator/grammar/expr/char_set.rb +67 -18
  20. data/lib/fabulator/grammar/expr/look_ahead.rb +44 -0
  21. data/lib/fabulator/grammar/expr/rule.rb +33 -28
  22. data/lib/fabulator/grammar/expr/rule_alternative.rb +45 -0
  23. data/lib/fabulator/grammar/expr/rule_mode.rb +16 -0
  24. data/lib/fabulator/grammar/expr/rule_ref.rb +15 -4
  25. data/lib/fabulator/grammar/expr/rule_sequence.rb +59 -0
  26. data/lib/fabulator/grammar/expr/sequence.rb +7 -1
  27. data/lib/fabulator/grammar/expr/set_skip.rb +16 -0
  28. data/lib/fabulator/grammar/expr/text.rb +8 -0
  29. data/lib/fabulator/grammar/expr/{rules.rb → token.rb} +12 -1
  30. data/lib/fabulator/grammar/expr/token_alternative.rb +42 -0
  31. data/lib/fabulator/grammar/rule_parser.rb +667 -0
  32. data/lib/fabulator/grammar/token_parser.rb +733 -0
  33. data/rules.racc +249 -0
  34. data/tokens.racc +257 -0
  35. metadata +29 -12
  36. data/lib/fabulator/grammar/parser.rb +0 -548
  37. data/regex.racc +0 -183
data/History.txt ADDED
@@ -0,0 +1,22 @@
1
+ === 0.0.3 2010-09-07
2
+
3
+ * 5 major enhancements:
4
+ * structural elements in place for writing grammars
5
+ * limited rule matching support
6
+ * use [,] for repetition counts
7
+ * character set algrebra supported
8
+ * pre-defined character sets
9
+ * requires Fabulator 0.0.7 or higher
10
+
11
+ === 0.0.2 2010-08-19
12
+
13
+ * 3 minor enhancements:
14
+ * Create file that keeps config.gem from complaining
15
+ * Change character sets to look a little like Perl 6
16
+ * Use <,> for repetition counts
17
+
18
+ === 0.0.1 2010-08-11
19
+
20
+ * 1 major enhancement:
21
+ * Initial release
22
+
data/Rakefile CHANGED
@@ -7,8 +7,10 @@ begin
7
7
  gem.email = "jgsmith@tamu.edu"
8
8
  gem.homepage = "http://github.com/jgsmith/ruby-fabulator-grammar"
9
9
  gem.authors = ["James Smith"]
10
- gem.add_dependency(%q<fabulator>, [">= 0.0.1"])
10
+ gem.add_dependency(%q<fabulator>, [">= 0.0.7"])
11
11
  # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
12
+ # not sure how to add dependency of a library that's not a gem
13
+ gem.requirements << 'bitset, 1.0 or greater'
12
14
  end
13
15
  rescue LoadError
14
16
  puts "Jeweler (or a dependency) not available. This is only required if you plan to package fabulator-exhibit as a gem."
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.1
1
+ 0.0.3
@@ -23,7 +23,7 @@ Feature: Basic regex parsing
23
23
  Scenario: Parsing a simple text string
24
24
  Given a context
25
25
  And the prefix g as "http://dh.tamu.edu/ns/fabulator/grammar/1.0#"
26
- When I parse the regex (foo{1,4}$)
26
+ When I parse the regex (foo[1,4]$)
27
27
  Then it should match "foo"
28
28
  Then it should match "fooooo"
29
29
  Then it should not match "foooooo"
@@ -33,7 +33,7 @@ Feature: Basic regex parsing
33
33
  Scenario: Parsing a simple text string
34
34
  Given a context
35
35
  And the prefix g as "http://dh.tamu.edu/ns/fabulator/grammar/1.0#"
36
- When I parse the regex (^[Ff]o[-a-zA-F01234-9]+?)
36
+ When I parse the regex (^[ [Ff] ]o[ [-a-zA-F01234-9] ]+?)
37
37
  Then it should match "foo"
38
38
  And it should match "Foo"
39
39
  And it should match "FoF03z-"
@@ -43,33 +43,45 @@ Feature: Basic regex parsing
43
43
  Scenario: Parsing a simple text string
44
44
  Given a context
45
45
  And the prefix g as "http://dh.tamu.edu/ns/fabulator/grammar/1.0#"
46
- When I parse the regex (^[^0-9][a-z][0-9]$)
47
- Then it should match "fo0"
48
- And it should not match "0l0"
46
+ When I parse the regex (^[ [Ff] ]o[ [-a-z] + [A-F] + [0-9] - [5] ]+?)
47
+ Then it should match "foo"
48
+ And it should match "Foo"
49
+ And it should match "FoF03z-"
50
+ And it should not match "hellofoo"
51
+ And it should not match "hellof5o"
49
52
 
53
+ @chars
50
54
  Scenario: Parsing a simple text string
51
55
  Given a context
52
56
  And the prefix g as "http://dh.tamu.edu/ns/fabulator/grammar/1.0#"
53
- When I parse the regex (^[^0-9].[0-9]$)
54
- Then it should match "fo0"
55
- And it should not match "0l0"
56
- And it should not match "00"
57
+ When I parse the regex (^[ [Ff] ]o[ [-] + :lower: + :xdigit: - [5] ]+?)
58
+ Then it should match "foo"
59
+ And it should match "Foo"
60
+ And it should match "FoF03z-"
61
+ And it should not match "hellofoo"
62
+ And it should not match "hellof5o"
57
63
 
58
64
  @chars
59
65
  Scenario: Parsing a simple text string
60
66
  Given a context
61
67
  And the prefix g as "http://dh.tamu.edu/ns/fabulator/grammar/1.0#"
62
- When I parse the regex ([\[-\]]o[a-z\]A-F01234-9]+?)
68
+ When I parse the regex (^[ -[0-9] ][ :lower: ][ [0-9] ]$)
69
+ Then it should match "fo0"
70
+ And it should not match "0l0"
63
71
 
64
72
  Scenario: Parsing a simple text string
65
73
  Given a context
66
74
  And the prefix g as "http://dh.tamu.edu/ns/fabulator/grammar/1.0#"
67
- When I parse the regex (<foo><g:bar>*)
75
+ When I parse the regex (^[-:digit:].[:digit:]$)
76
+ Then it should match "fo0"
77
+ And it should not match "0l0"
78
+ And it should not match "00"
68
79
 
80
+ @chars
69
81
  Scenario: Parsing a simple text string
70
82
  Given a context
71
83
  And the prefix g as "http://dh.tamu.edu/ns/fabulator/grammar/1.0#"
72
- When I parse the regex (bar ( <foo> <g:bar> ) *)
84
+ When I parse the regex ([[\[-\]]]o[[a-z\]A-F01234-9]]+?)
73
85
 
74
86
  Scenario: Adding two numbers together as a union
75
87
  Given a context
@@ -86,3 +98,95 @@ Feature: Basic regex parsing
86
98
  When I run the expression (g:match('^foo', 'bfooo'))
87
99
  Then I should get 1 item
88
100
  And item 0 should be false
101
+
102
+ @rules
103
+ Scenario: Parsing a simple text string
104
+ Given a context
105
+ Then I can parse the rule (foo bar)
106
+ And I can parse the rule (foo(s) bar(?))
107
+ And I can parse the rule (foo(s?) bar(3))
108
+ And I can parse the rule (foo(s NL) bar(3..4))
109
+ And I can parse the rule (foo ';'(s) bar(3..4 ','))
110
+ And I can parse the rule ((foo bar)(s))
111
+ And I can not parse the rule (foo(s NL(s)))
112
+ And I can parse the rule (foo(s (NL NL(s))))
113
+ And I can not parse the rule (foo(s NL NL(s)))
114
+
115
+ @grammar
116
+ Scenario: Parsing a grammar xml definition
117
+ Given a context
118
+ Given the grammar
119
+ """
120
+ <g:grammar xmlns:g="http://dh.tamu.edu/ns/fabulator/grammar/1.0#">
121
+ <g:token g:name="LETTER" g:matches="[:alpha:]" />
122
+ <g:token g:name="NUMBER" g:matches="[:digit:]" />
123
+ <g:rule g:name="something">
124
+ <g:when g:matches="LETTER NUMBER LETTER" />
125
+ </g:rule>
126
+ <g:rule g:name="other">
127
+ <g:when g:matches="a := LETTER b := NUMBER c := LETTER" />
128
+ </g:rule>
129
+ <g:rule g:name="or">
130
+ <g:when g:matches="other(s) d := LETTER(s)" />
131
+ </g:rule>
132
+ <g:rule g:name="ooor">
133
+ <g:when g:matches="other(s ',') d := LETTER(s)" />
134
+ </g:rule>
135
+ </g:grammar>
136
+ """
137
+ Then "something" should match "a0a"
138
+ And "something" should not match "abc"
139
+ And "something" should match "ab0c"
140
+ And "other" should match "a0a"
141
+ And "other" should match "ab1c"
142
+ And "or" should match "a1bcde"
143
+ And "or" should match "a1bb2ccde"
144
+ And "or" should match "a1bwb2ccde"
145
+ And "or" should not parse "a1bwb2ccde"
146
+ And "or" should match "acbwb2ccde"
147
+ And "or" should not parse "acbwb2ccde"
148
+ And "or" should not match "acbwb2d"
149
+ And "ooor" should parse "a1c,b2df"
150
+ And the expression (d) should equal ['f']
151
+ And the expression (other[1]/b) should equal ['1']
152
+ And the expression (other[2]/b) should equal ['2']
153
+ And "ooor" should not parse "a1c,b2d"
154
+
155
+ @mode
156
+ Scenario: Parsing a grammar xml definition
157
+ Given a context
158
+ Given the grammar
159
+ """
160
+ <g:grammar xmlns:g="http://dh.tamu.edu/ns/fabulator/grammar/1.0#">
161
+ <g:token g:name="LETTER" g:matches="[:alpha:]" g:mode="normal"/>
162
+ <g:token g:name="NUMBER" g:matches="[:digit:]" />
163
+ <g:token g:name="LETTER" g:matches="[:upper:]" g:mode="upper"/>
164
+ <g:token g:name="LETTER" g:matches="[:lower:]" g:mode="lower"/>
165
+ <g:rule g:name="something">
166
+ <g:when g:matches="[mode normal] LETTER NUMBER [mode upper] LETTER" />
167
+ </g:rule>
168
+ </g:grammar>
169
+ """
170
+ Then "something" should parse "a0A"
171
+ And "something" should not parse "a0a"
172
+
173
+ @context
174
+ Scenario: Parsing a grammar xml definition
175
+ Given a context
176
+ Given the grammar
177
+ """
178
+ <g:grammar xmlns:g="http://dh.tamu.edu/ns/fabulator/grammar/1.0#">
179
+ <g:context g:mode="normal">
180
+ <g:token g:name="LETTER" g:matches="[:alpha:]" />
181
+ </g:context>
182
+ <g:token g:name="NUMBER" g:matches="[:digit:]" />
183
+ <g:token g:name="LETTER" g:matches="[:upper:]" g:mode="upper"/>
184
+ <g:token g:name="LETTER" g:matches="[:lower:]" g:mode="lower"/>
185
+ <g:rule g:name="something">
186
+ <g:when g:matches="^^ [mode normal] LETTER NUMBER [mode upper] LETTER" />
187
+ </g:rule>
188
+ </g:grammar>
189
+ """
190
+ Then "something" should match "a0A"
191
+ And "something" should not match "a0a"
192
+ And "something" should not match "aa0A"
@@ -19,7 +19,6 @@ end
19
19
  Given 'a context' do
20
20
  @context ||= Fabulator::Expr::Context.new
21
21
  @parser ||= Fabulator::Expr::Parser.new
22
- @grammar_parser ||= Fabulator::Grammar::Parser.new
23
22
  end
24
23
 
25
24
  Given /the prefix (\S+) as "([^"]+)"/ do |p,h|
@@ -72,7 +71,8 @@ Then /I should get the type (.*)/ do |t|
72
71
  end
73
72
 
74
73
  Then /I should get (\d+) items?/ do |count|
75
- @result.length.should == count
74
+ #puts "result types: #{@result.collect{|r| r.class.name}.join(', ')}"
75
+ @result.size.should == count
76
76
  end
77
77
 
78
78
  Then /item (\d+) should be (\[.*\])/ do |i,t|
@@ -2,13 +2,30 @@ require 'yaml'
2
2
 
3
3
  When /^I parse the regex \((.*)\)$/ do |regex|
4
4
  @context ||= Fabulator::Expr::Context.new
5
- @grammar_parser ||= Fabulator::Grammar::Parser.new
6
- @regex = @grammar_parser.parse(regex, @context)
5
+ @grammar_parser ||= Fabulator::Grammar::TokenParser.new
6
+ @rule_parser ||= Fabulator::Grammar::RuleParser.new
7
+ @regex = @grammar_parser.parse(regex)
7
8
  # puts YAML::dump(r)
8
9
  # puts @regex.to_regex
9
10
  # pending # express the regexp above with the code you wish you had
10
11
  end
11
12
 
13
+ Then /^I can parse the rule \((.*)\)$/ do |rule|
14
+ @context ||= Fabulator::Expr::Context.new
15
+ @grammar_parser ||= Fabulator::Grammar::TokenParser.new
16
+ @rule_parser ||= Fabulator::Grammar::RuleParser.new
17
+ @rule = @rule_parser.parse(rule)
18
+ @rule.should_not == nil
19
+ end
20
+
21
+ Then /^I can not parse the rule \((.*)\)$/ do |rule|
22
+ @context ||= Fabulator::Expr::Context.new
23
+ @grammar_parser ||= Fabulator::Grammar::TokenParser.new
24
+ @rule_parser ||= Fabulator::Grammar::RuleParser.new
25
+ @rule = (@rule_parser.parse(rule) rescue nil)
26
+ @rule.should == nil
27
+ end
28
+
12
29
  Then /^it should match "(.*)"$/ do |str|
13
30
  str.should =~ @regex.to_regex
14
31
  end
@@ -16,3 +33,30 @@ end
16
33
  Then /^it should not match "(.*)"$/ do |str|
17
34
  str.should_not =~ @regex.to_regex
18
35
  end
36
+
37
+ Then /^"(.*)" should match "(.*)"$/ do |nom, str|
38
+ ret = @grammar.match(@context, nom, str)
39
+ # puts YAML::dump(ret)
40
+ ret.should == true
41
+ end
42
+
43
+ Then /^"(.*)" should parse "(.*)"$/ do |nom, str|
44
+ ret = @grammar.parse(@context, nom, str)
45
+ #puts YAML::dump(ret)
46
+ ret.should_not == nil
47
+
48
+ @context.root = @context.root.anon_node(nil)
49
+ if !ret.nil?
50
+ @context.merge_data(ret)
51
+ @context.root.roots['data'] = @context.root
52
+ #puts YAML::dump(@context.root.to_h)
53
+ end
54
+ end
55
+
56
+ Then /^"(.*)" should not parse "(.*)"$/ do |nom, str|
57
+ @grammar.parse(@context, nom, str).should == nil
58
+ end
59
+
60
+ Then /^"(.*)" should not match "(.*)"$/ do |nom, str|
61
+ @grammar.match(@context, nom, str).should == false
62
+ end
@@ -1,23 +1,12 @@
1
- Given /the statemachine/ do |doc_xml|
1
+ Given /the grammar/ do |doc_xml|
2
2
  @context ||= Fabulator::Expr::Context.new
3
3
 
4
- if @sm.nil?
5
- @sm = Fabulator::Core::StateMachine.new.compile_xml(doc_xml)
4
+ if @grammar.nil?
5
+ @grammar = Fabulator::Grammar::Actions::Grammar.new.compile_xml(doc_xml, @context)
6
6
  else
7
- @sm.compile_xml(doc_xml)
7
+ @grammar.compile_xml(doc_xml, @context)
8
8
  end
9
- @sm.init_context(@context)
10
- end
11
9
 
12
- When /I run it with the following params:/ do |param_table|
13
- params = { }
14
- param_table.hashes.each do |hash|
15
- params[hash['key']] = hash['value']
16
- end
17
- @sm.run(params)
18
- #puts YAML::dump(@sm)
10
+ # puts YAML::dump(@grammar)
19
11
  end
20
12
 
21
- Then /it should be in the '(.*)' state/ do |s|
22
- @sm.state.should == s
23
- end
@@ -1,6 +1,7 @@
1
1
  # This file makes it possible to install RubyCAS-Client as a Rails plugin.
2
2
 
3
3
  $: << File.expand_path(File.dirname(__FILE__))+'/../../lib'
4
+ $: << File.expand_path(File.dirname(__FILE__))+'/../../../fabulator/lib'
4
5
 
5
6
  require 'fabulator'
6
7
  require 'fabulator/grammar'
@@ -0,0 +1 @@
1
+ require 'fabulator/grammar'
@@ -1,12 +1,21 @@
1
- require 'fabulator/grammar/parser'
1
+ require 'fabulator/grammar/rule_parser'
2
+ require 'fabulator/grammar/token_parser'
2
3
  require 'fabulator/grammar/actions'
3
- require 'fabulator/grammar/expr/rules'
4
- require 'fabulator/grammar/expr/rule'
4
+ require 'fabulator/grammar/expr/token'
5
+ require 'fabulator/grammar/expr/token_alternative'
5
6
  require 'fabulator/grammar/expr/rule_ref'
6
7
  require 'fabulator/grammar/expr/text'
7
8
  require 'fabulator/grammar/expr/sequence'
9
+ require 'fabulator/grammar/expr/set_skip'
8
10
  require 'fabulator/grammar/expr/char_set'
9
11
  require 'fabulator/grammar/expr/any'
12
+ require 'fabulator/grammar/expr/look_ahead'
13
+ require 'fabulator/grammar/cursor'
14
+ require 'fabulator/grammar/expr/rule'
15
+ require 'fabulator/grammar/expr/rule_mode'
16
+ require 'fabulator/grammar/expr/rule_alternative'
17
+ require 'fabulator/grammar/expr/rule_sequence'
18
+ require 'fabulator/grammar/expr/anchor'
10
19
 
11
20
  module Fabulator
12
21
  module Grammar
@@ -1,22 +1,32 @@
1
- #require 'fabulator/grammar/actions/grammar'
2
1
 
3
2
  module Fabulator
4
3
  GRAMMAR_NS = "http://dh.tamu.edu/ns/fabulator/grammar/1.0#"
4
+
5
+ require 'fabulator/grammar/actions/grammar'
6
+ require 'fabulator/grammar/actions/context'
7
+ require 'fabulator/grammar/actions/rule'
8
+ require 'fabulator/grammar/actions/token'
9
+ require 'fabulator/grammar/actions/when'
10
+
5
11
  module Grammar
6
12
  module Actions
7
- class Lib
8
- include Fabulator::ActionLib
9
-
13
+ class Lib < Fabulator::TagLib
10
14
  register_namespace GRAMMAR_NS
11
15
 
12
- #action 'grammar', Grammar
16
+ structural 'grammar', Grammar
17
+ structural 'context', Context
18
+ structural 'rule', Rule
19
+ structural 'token', Token
20
+ structural 'when', When
21
+
22
+ # action 'result', Result
13
23
 
14
24
  ## reference a grammar name
15
25
  function 'match' do |ctx, args|
16
26
  # first arg is the regex or <rule name>
17
27
  regex = args[0].to_s
18
- parser = Fabulator::Grammar::Parser.new
19
- compiled = parser.parse(regex, ctx).to_regex
28
+ parser = Fabulator::Grammar::TokenParser.new
29
+ compiled = parser.parse(regex).to_regex
20
30
  if args[1].is_a?(Array)
21
31
  args[1].collect{|a|
22
32
  if a.to_s =~ compiled
@@ -0,0 +1,18 @@
1
+ module Fabulator
2
+ module Grammar
3
+ module Actions
4
+ class Context < Fabulator::Structural
5
+
6
+ namespace GRAMMAR_NS
7
+
8
+ attribute :mode, :default => :default, :static => true
9
+
10
+ attr_accessor :mode, :tokens, :rules
11
+
12
+ contains :rule
13
+ contains :token
14
+
15
+ end
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,76 @@
1
+ module Fabulator
2
+ module Grammar
3
+ module Actions
4
+ class Grammar < Fabulator::Structural
5
+
6
+ namespace GRAMMAR_NS
7
+
8
+ contains :rule
9
+ contains :token
10
+ contains :context
11
+
12
+ has_actions
13
+
14
+ def compile_xml(xml, context = nil)
15
+ super
16
+
17
+ @modes = { :default => { } }
18
+
19
+ @contexts.each do |c|
20
+ c.tokens.each do |token|
21
+ self.add_rule(token, c.mode)
22
+ end
23
+ c.rules.each do |rule|
24
+ self.add_rule(rule, c.mode)
25
+ end
26
+ end
27
+ @tokens.each do |token|
28
+ self.add_rule(token)
29
+ end
30
+ @rules.each do |rule|
31
+ self.add_rule(rule)
32
+ end
33
+
34
+ @tokens = nil
35
+ @rules = nil
36
+
37
+ self
38
+ end
39
+
40
+ def add_rule(r, m = :default)
41
+ return if r.nil?
42
+ mode = ((r.mode.nil? || r.mode.to_sym == :default) ? m : r.mode).to_sym
43
+ @modes[mode] ||= { }
44
+ @modes[mode][r.name.to_sym] = r
45
+ end
46
+
47
+ def get_rule(m, nom)
48
+ @modes[m.to_sym].nil? ? nil : @modes[m.to_sym][nom.to_sym]
49
+ end
50
+
51
+ def parse(ctx, nom, s)
52
+ cursor = Fabulator::Grammar::Cursor.new(self, ctx, s)
53
+ cursor.anchored = true
54
+ ret = do_parse(nom, cursor)
55
+ cursor.do_skip
56
+ cursor.eof ? ret : nil
57
+ end
58
+
59
+ def match(ctx, nom, s)
60
+ cursor = Fabulator::Grammar::Cursor.new(self, ctx, s)
61
+ !do_parse(nom, cursor).nil?
62
+ end
63
+
64
+ protected
65
+
66
+ def do_parse(nom, cursor)
67
+ obj = get_rule(:default, nom)
68
+ return nil if obj.nil?
69
+
70
+ obj.parse(cursor)
71
+ end
72
+
73
+ end
74
+ end
75
+ end
76
+ end