fabulator-grammar 0.0.1 → 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (37) hide show
  1. data/History.txt +22 -0
  2. data/Rakefile +3 -1
  3. data/VERSION +1 -1
  4. data/features/grammar.feature +116 -12
  5. data/features/step_definitions/expression_steps.rb +2 -2
  6. data/features/step_definitions/grammar_steps.rb +46 -2
  7. data/features/step_definitions/xml_steps.rb +5 -16
  8. data/features/support/env.rb +1 -0
  9. data/lib/fabulator-grammar.rb +1 -0
  10. data/lib/fabulator/grammar.rb +12 -3
  11. data/lib/fabulator/grammar/actions.rb +17 -7
  12. data/lib/fabulator/grammar/actions/context.rb +18 -0
  13. data/lib/fabulator/grammar/actions/grammar.rb +76 -0
  14. data/lib/fabulator/grammar/actions/rule.rb +51 -0
  15. data/lib/fabulator/grammar/actions/token.rb +27 -0
  16. data/lib/fabulator/grammar/actions/when.rb +35 -0
  17. data/lib/fabulator/grammar/cursor.rb +118 -0
  18. data/lib/fabulator/grammar/expr/anchor.rb +28 -0
  19. data/lib/fabulator/grammar/expr/char_set.rb +67 -18
  20. data/lib/fabulator/grammar/expr/look_ahead.rb +44 -0
  21. data/lib/fabulator/grammar/expr/rule.rb +33 -28
  22. data/lib/fabulator/grammar/expr/rule_alternative.rb +45 -0
  23. data/lib/fabulator/grammar/expr/rule_mode.rb +16 -0
  24. data/lib/fabulator/grammar/expr/rule_ref.rb +15 -4
  25. data/lib/fabulator/grammar/expr/rule_sequence.rb +59 -0
  26. data/lib/fabulator/grammar/expr/sequence.rb +7 -1
  27. data/lib/fabulator/grammar/expr/set_skip.rb +16 -0
  28. data/lib/fabulator/grammar/expr/text.rb +8 -0
  29. data/lib/fabulator/grammar/expr/{rules.rb → token.rb} +12 -1
  30. data/lib/fabulator/grammar/expr/token_alternative.rb +42 -0
  31. data/lib/fabulator/grammar/rule_parser.rb +667 -0
  32. data/lib/fabulator/grammar/token_parser.rb +733 -0
  33. data/rules.racc +249 -0
  34. data/tokens.racc +257 -0
  35. metadata +29 -12
  36. data/lib/fabulator/grammar/parser.rb +0 -548
  37. data/regex.racc +0 -183
data/regex.racc DELETED
@@ -1,183 +0,0 @@
1
- class Fabulator::Grammar::Parser
2
-
3
- start rules
4
-
5
- rule
6
- rules: anchored_rule { result = Fabulator::Grammar::Expr::Rules.new; result.add_alternative(val[0]) }
7
- | rules PIPE anchored_rule { result = val[0]; result.add_alternative(val[2]) }
8
-
9
- anchored_rule: rule { result = val[0] }
10
- | CARET rule { result = val[1]; result.anchor_start }
11
- | rule DOLLAR { result = val[0]; result.anchor_end }
12
- | CARET rule DOLLAR { result = val[1]; result.anchor_start; result.anchor_end }
13
-
14
- rule: { result = Fabulator::Grammar::Expr::Rule.new; }
15
- | rule sequence { result = val[0]; result.add_sequence(val[1]); }
16
-
17
- sequence: sub_sequence sequence_qualifiers { result = Fabulator::Grammar::Expr::Sequence.new(val[0], val[1]) }
18
- | sub_sequence { result = Fabulator::Grammar::Expr::Sequence.new(val[0]) }
19
-
20
- sub_sequence: LT qname GT { result = Fabulator::Grammar::Expr::RuleRef.new(val[1]) }
21
- | text { result = Fabulator::Grammar::Expr::Text.new(val[0]) }
22
- | DOT { result = Fabulator::Grammar::Expr::Any.new }
23
- | LP rules RP { result = val[1] }
24
- | LB text RB { result = Fabulator::Grammar::Expr::CharSet.new(val[1]) }
25
- | LB CARET text RB { result = Fabulator::Grammar::Expr::CharSet.new(val[2]); result.inverted }
26
-
27
- text: qname { result = val[0] }
28
- | TEXT { result = val[0] }
29
- | INTEGER { result = val[0] }
30
-
31
- qname: NCNAME { result = val[0] }
32
- | NCNAME COLON NCNAME { result = val[0] + ':' + val[2] }
33
-
34
- sequence_qualifiers: STAR { result = [ :zero_or_more ] }
35
- | STAR QUESTION { result = [ :zero_or_more, :min ] }
36
- | PLUS { result = [ :one_or_more ] }
37
- | PLUS QUESTION { result = [ :one_or_more, :min ] }
38
- | QUESTION { result = [ :zero_or_one ] }
39
- | QUESTION QUESTION { result = [ :zero_or_one, :min ] }
40
- | LC INTEGER RC { result = [ :exact, val[1].to_i ] }
41
- | LC INTEGER COMMA INTEGER RC { result = [ :range, val[1].to_i, val[3].to_i ] }
42
- | LC INTEGER COMMA RC { result = [ :range, val[1], '' ] }
43
- | LC INTEGER COMMA RC QUESTION { result = [ :min, :range, val[1], '' ] }
44
- | LC INTEGER COMMA INTEGER RC QUESTION { result = [ :min, :range, val[1].to_i, val[3].to_i ] }
45
-
46
-
47
- ---- inner
48
- require 'fabulator/grammar'
49
-
50
- def parse(t, ctx)
51
- @source = t
52
- @curpos = 0
53
- @context = ctx
54
- @line = 0
55
-
56
- @yydebug = true
57
-
58
- @last_token = nil
59
-
60
- do_parse
61
- end
62
-
63
- def on_error(*args)
64
- raise Fabulator::Grammar::ParserError.new("unable to parse '#{args[1]}' near line #{@line + 1}, column #{@col}")
65
- end
66
-
67
- @@regex = {
68
- :ncname => %r{(?:[a-zA-Z_][-a-zA-Z0-9_.]*)}
69
- }
70
-
71
- @@regex[:qname] = %r{((?:#{@@regex[:ncname]}:)?#{@@regex[:ncname]})}
72
-
73
- def next_token
74
- @token = nil
75
- white_space = 0
76
- new_line = 0
77
- @col = 0
78
- while @curpos < @source.length && @source[@curpos..@curpos] =~ /\s/ do
79
- if @source[@curpos..@curpos] =~ /\n/
80
- new_line = new_line + 1
81
- @line = @line + 1
82
- @col = 0
83
- else
84
- @col = @col + 1
85
- end
86
- @curpos = @curpos + 1
87
- white_space = white_space + 1
88
- end
89
-
90
- # skip comments delimited by (: :)
91
- # comments can be nested
92
- # these are XPath 2.0 comments
93
- #
94
- if @curpos < @source.length && @source[@curpos..@curpos+1] == '(:'
95
- comment_depth = 1
96
- @curpos = @curpos + 2
97
- @col = @col + 2
98
- while comment_depth > 0 && @curpos < @source.length
99
- if @source[@curpos..@curpos+1] == '(:'
100
- comment_depth = comment_depth + 1
101
- @curpos = @curpos + 1
102
- @col = @col + 1
103
- end
104
- if @source[@curpos..@curpos+1] == ':)'
105
- comment_depth = comment_depth - 1
106
- @curpos = @curpos + 1
107
- @col = @col + 1
108
- end
109
- @curpos = @curpos + 1
110
- @col = @col + 1
111
- end
112
- white_space = white_space + 1
113
- end
114
-
115
- while @curpos < @source.length && @source[@curpos..@curpos] =~ /\s/ do
116
- if @source[@curpos..@curpos] =~ /\n/
117
- new_line = new_line + 1
118
- @line = @line + 1
119
- @col = 0
120
- else
121
- @col = @col + 1
122
- end
123
- @curpos = @curpos + 1
124
- white_space = white_space + 1
125
- end
126
-
127
- if @curpos >= @source.length
128
- @last_token = nil
129
- return [ false, false ]
130
- end
131
-
132
- case @source[@curpos..@curpos]
133
- when '<': @token = [ :LT, '<' ]
134
- when '>': @token = [ :GT, '>' ]
135
- when '[': @token = [ :LB, '[' ]
136
- when ']': @token = [ :RB, ']' ]
137
- when '(': @token = [ :LP, '(' ]
138
- when ')': @token = [ :RP, ')' ]
139
- when '{': @token = [ :LC, '{' ]
140
- when '}': @token = [ :RC, '}' ]
141
- when ':': @token = [ :COLON, ':' ]
142
- when ',': @token = [ :COMMA, ',' ]
143
- when '|': @token = [ :PIPE, '|' ]
144
- when '*': @token = [ :STAR, '*' ]
145
- when '+': @token = [ :PLUS, '+' ]
146
- when '.': @token = [ :DOT, '.' ]
147
- when '?': @token = [ :QUESTION, '?' ]
148
- when '$': @token = [ :DOLLAR, '$' ]
149
- when '^': @token = [ :CARET, '^' ]
150
- end
151
-
152
- if @token.nil?
153
- # get longest sequence of non-special characters
154
- # if it's all digits, report INTEGER
155
- # if it's a qname, report QNAME
156
- # otherwise, report TEXT
157
- @source[@curpos..@source.length-1] =~ /^(((\\.)|[^ \$\^\[\]<>\{\}\(\):,|*+.?])+)*/
158
- text = $1
159
- bits = text.split(/\\/)
160
- text = bits.join('')
161
- @curpos += bits.size - 1
162
- if text.length > 0
163
- if @source[@curpos+text.length .. @curpos+text.length] =~ /[*?+\{]/
164
- text = text[0..text.length-2]
165
- @token = [ :TEXT, text ]
166
- else
167
- case text
168
- when /^\d+$/: @token = [ :INTEGER, text ]
169
- when /^#{@@regex[:ncname]}$/: @token = [ :NCNAME, text ]
170
- else @token = [ :TEXT, text ]
171
- end
172
- end
173
- end
174
- end
175
-
176
- if @token.nil?
177
- puts "Uh oh... we don't know what to do: #{@source[@curpos .. @source.length-1]}"
178
- else
179
- @curpos += @token[1].length
180
- end
181
-
182
- return @token
183
- end