fabulator-grammar 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.markdown +73 -0
- data/Rakefile +63 -0
- data/VERSION +1 -0
- data/features/grammar.feature +88 -0
- data/features/step_definitions/expression_steps.rb +103 -0
- data/features/step_definitions/grammar_steps.rb +18 -0
- data/features/step_definitions/template_steps.rb +25 -0
- data/features/step_definitions/xml_steps.rb +23 -0
- data/features/support/env.rb +7 -0
- data/lib/fabulator/grammar.rb +16 -0
- data/lib/fabulator/grammar/actions.rb +65 -0
- data/lib/fabulator/grammar/expr/any.rb +7 -0
- data/lib/fabulator/grammar/expr/char_set.rb +45 -0
- data/lib/fabulator/grammar/expr/rule.rb +36 -0
- data/lib/fabulator/grammar/expr/rule_ref.rb +13 -0
- data/lib/fabulator/grammar/expr/rules.rb +15 -0
- data/lib/fabulator/grammar/expr/sequence.rb +26 -0
- data/lib/fabulator/grammar/expr/text.rb +11 -0
- data/lib/fabulator/grammar/parser.rb +548 -0
- data/regex.racc +183 -0
- metadata +101 -0
data/regex.racc
ADDED
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
class Fabulator::Grammar::Parser
|
|
2
|
+
|
|
3
|
+
start rules
|
|
4
|
+
|
|
5
|
+
rule
|
|
6
|
+
rules: anchored_rule { result = Fabulator::Grammar::Expr::Rules.new; result.add_alternative(val[0]) }
|
|
7
|
+
| rules PIPE anchored_rule { result = val[0]; result.add_alternative(val[2]) }
|
|
8
|
+
|
|
9
|
+
anchored_rule: rule { result = val[0] }
|
|
10
|
+
| CARET rule { result = val[1]; result.anchor_start }
|
|
11
|
+
| rule DOLLAR { result = val[0]; result.anchor_end }
|
|
12
|
+
| CARET rule DOLLAR { result = val[1]; result.anchor_start; result.anchor_end }
|
|
13
|
+
|
|
14
|
+
rule: { result = Fabulator::Grammar::Expr::Rule.new; }
|
|
15
|
+
| rule sequence { result = val[0]; result.add_sequence(val[1]); }
|
|
16
|
+
|
|
17
|
+
sequence: sub_sequence sequence_qualifiers { result = Fabulator::Grammar::Expr::Sequence.new(val[0], val[1]) }
|
|
18
|
+
| sub_sequence { result = Fabulator::Grammar::Expr::Sequence.new(val[0]) }
|
|
19
|
+
|
|
20
|
+
sub_sequence: LT qname GT { result = Fabulator::Grammar::Expr::RuleRef.new(val[1]) }
|
|
21
|
+
| text { result = Fabulator::Grammar::Expr::Text.new(val[0]) }
|
|
22
|
+
| DOT { result = Fabulator::Grammar::Expr::Any.new }
|
|
23
|
+
| LP rules RP { result = val[1] }
|
|
24
|
+
| LB text RB { result = Fabulator::Grammar::Expr::CharSet.new(val[1]) }
|
|
25
|
+
| LB CARET text RB { result = Fabulator::Grammar::Expr::CharSet.new(val[2]); result.inverted }
|
|
26
|
+
|
|
27
|
+
text: qname { result = val[0] }
|
|
28
|
+
| TEXT { result = val[0] }
|
|
29
|
+
| INTEGER { result = val[0] }
|
|
30
|
+
|
|
31
|
+
qname: NCNAME { result = val[0] }
|
|
32
|
+
| NCNAME COLON NCNAME { result = val[0] + ':' + val[2] }
|
|
33
|
+
|
|
34
|
+
sequence_qualifiers: STAR { result = [ :zero_or_more ] }
|
|
35
|
+
| STAR QUESTION { result = [ :zero_or_more, :min ] }
|
|
36
|
+
| PLUS { result = [ :one_or_more ] }
|
|
37
|
+
| PLUS QUESTION { result = [ :one_or_more, :min ] }
|
|
38
|
+
| QUESTION { result = [ :zero_or_one ] }
|
|
39
|
+
| QUESTION QUESTION { result = [ :zero_or_one, :min ] }
|
|
40
|
+
| LC INTEGER RC { result = [ :exact, val[1].to_i ] }
|
|
41
|
+
| LC INTEGER COMMA INTEGER RC { result = [ :range, val[1].to_i, val[3].to_i ] }
|
|
42
|
+
| LC INTEGER COMMA RC { result = [ :range, val[1], '' ] }
|
|
43
|
+
| LC INTEGER COMMA RC QUESTION { result = [ :min, :range, val[1], '' ] }
|
|
44
|
+
| LC INTEGER COMMA INTEGER RC QUESTION { result = [ :min, :range, val[1].to_i, val[3].to_i ] }
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
---- inner
|
|
48
|
+
require 'fabulator/grammar'
|
|
49
|
+
|
|
50
|
+
def parse(t, ctx)
|
|
51
|
+
@source = t
|
|
52
|
+
@curpos = 0
|
|
53
|
+
@context = ctx
|
|
54
|
+
@line = 0
|
|
55
|
+
|
|
56
|
+
@yydebug = true
|
|
57
|
+
|
|
58
|
+
@last_token = nil
|
|
59
|
+
|
|
60
|
+
do_parse
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
def on_error(*args)
|
|
64
|
+
raise Fabulator::Grammar::ParserError.new("unable to parse '#{args[1]}' near line #{@line + 1}, column #{@col}")
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
@@regex = {
|
|
68
|
+
:ncname => %r{(?:[a-zA-Z_][-a-zA-Z0-9_.]*)}
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
@@regex[:qname] = %r{((?:#{@@regex[:ncname]}:)?#{@@regex[:ncname]})}
|
|
72
|
+
|
|
73
|
+
def next_token
|
|
74
|
+
@token = nil
|
|
75
|
+
white_space = 0
|
|
76
|
+
new_line = 0
|
|
77
|
+
@col = 0
|
|
78
|
+
while @curpos < @source.length && @source[@curpos..@curpos] =~ /\s/ do
|
|
79
|
+
if @source[@curpos..@curpos] =~ /\n/
|
|
80
|
+
new_line = new_line + 1
|
|
81
|
+
@line = @line + 1
|
|
82
|
+
@col = 0
|
|
83
|
+
else
|
|
84
|
+
@col = @col + 1
|
|
85
|
+
end
|
|
86
|
+
@curpos = @curpos + 1
|
|
87
|
+
white_space = white_space + 1
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
# skip comments delimited by (: :)
|
|
91
|
+
# comments can be nested
|
|
92
|
+
# these are XPath 2.0 comments
|
|
93
|
+
#
|
|
94
|
+
if @curpos < @source.length && @source[@curpos..@curpos+1] == '(:'
|
|
95
|
+
comment_depth = 1
|
|
96
|
+
@curpos = @curpos + 2
|
|
97
|
+
@col = @col + 2
|
|
98
|
+
while comment_depth > 0 && @curpos < @source.length
|
|
99
|
+
if @source[@curpos..@curpos+1] == '(:'
|
|
100
|
+
comment_depth = comment_depth + 1
|
|
101
|
+
@curpos = @curpos + 1
|
|
102
|
+
@col = @col + 1
|
|
103
|
+
end
|
|
104
|
+
if @source[@curpos..@curpos+1] == ':)'
|
|
105
|
+
comment_depth = comment_depth - 1
|
|
106
|
+
@curpos = @curpos + 1
|
|
107
|
+
@col = @col + 1
|
|
108
|
+
end
|
|
109
|
+
@curpos = @curpos + 1
|
|
110
|
+
@col = @col + 1
|
|
111
|
+
end
|
|
112
|
+
white_space = white_space + 1
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
while @curpos < @source.length && @source[@curpos..@curpos] =~ /\s/ do
|
|
116
|
+
if @source[@curpos..@curpos] =~ /\n/
|
|
117
|
+
new_line = new_line + 1
|
|
118
|
+
@line = @line + 1
|
|
119
|
+
@col = 0
|
|
120
|
+
else
|
|
121
|
+
@col = @col + 1
|
|
122
|
+
end
|
|
123
|
+
@curpos = @curpos + 1
|
|
124
|
+
white_space = white_space + 1
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
if @curpos >= @source.length
|
|
128
|
+
@last_token = nil
|
|
129
|
+
return [ false, false ]
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
case @source[@curpos..@curpos]
|
|
133
|
+
when '<': @token = [ :LT, '<' ]
|
|
134
|
+
when '>': @token = [ :GT, '>' ]
|
|
135
|
+
when '[': @token = [ :LB, '[' ]
|
|
136
|
+
when ']': @token = [ :RB, ']' ]
|
|
137
|
+
when '(': @token = [ :LP, '(' ]
|
|
138
|
+
when ')': @token = [ :RP, ')' ]
|
|
139
|
+
when '{': @token = [ :LC, '{' ]
|
|
140
|
+
when '}': @token = [ :RC, '}' ]
|
|
141
|
+
when ':': @token = [ :COLON, ':' ]
|
|
142
|
+
when ',': @token = [ :COMMA, ',' ]
|
|
143
|
+
when '|': @token = [ :PIPE, '|' ]
|
|
144
|
+
when '*': @token = [ :STAR, '*' ]
|
|
145
|
+
when '+': @token = [ :PLUS, '+' ]
|
|
146
|
+
when '.': @token = [ :DOT, '.' ]
|
|
147
|
+
when '?': @token = [ :QUESTION, '?' ]
|
|
148
|
+
when '$': @token = [ :DOLLAR, '$' ]
|
|
149
|
+
when '^': @token = [ :CARET, '^' ]
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
if @token.nil?
|
|
153
|
+
# get longest sequence of non-special characters
|
|
154
|
+
# if it's all digits, report INTEGER
|
|
155
|
+
# if it's a qname, report QNAME
|
|
156
|
+
# otherwise, report TEXT
|
|
157
|
+
@source[@curpos..@source.length-1] =~ /^(((\\.)|[^ \$\^\[\]<>\{\}\(\):,|*+.?])+)*/
|
|
158
|
+
text = $1
|
|
159
|
+
bits = text.split(/\\/)
|
|
160
|
+
text = bits.join('')
|
|
161
|
+
@curpos += bits.size - 1
|
|
162
|
+
if text.length > 0
|
|
163
|
+
if @source[@curpos+text.length .. @curpos+text.length] =~ /[*?+\{]/
|
|
164
|
+
text = text[0..text.length-2]
|
|
165
|
+
@token = [ :TEXT, text ]
|
|
166
|
+
else
|
|
167
|
+
case text
|
|
168
|
+
when /^\d+$/: @token = [ :INTEGER, text ]
|
|
169
|
+
when /^#{@@regex[:ncname]}$/: @token = [ :NCNAME, text ]
|
|
170
|
+
else @token = [ :TEXT, text ]
|
|
171
|
+
end
|
|
172
|
+
end
|
|
173
|
+
end
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
if @token.nil?
|
|
177
|
+
puts "Uh oh... we don't know what to do: #{@source[@curpos .. @source.length-1]}"
|
|
178
|
+
else
|
|
179
|
+
@curpos += @token[1].length
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
return @token
|
|
183
|
+
end
|
metadata
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: fabulator-grammar
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
hash: 29
|
|
5
|
+
prerelease: false
|
|
6
|
+
segments:
|
|
7
|
+
- 0
|
|
8
|
+
- 0
|
|
9
|
+
- 1
|
|
10
|
+
version: 0.0.1
|
|
11
|
+
platform: ruby
|
|
12
|
+
authors:
|
|
13
|
+
- James Smith
|
|
14
|
+
autorequire:
|
|
15
|
+
bindir: bin
|
|
16
|
+
cert_chain: []
|
|
17
|
+
|
|
18
|
+
date: 2010-08-11 00:00:00 +00:00
|
|
19
|
+
default_executable:
|
|
20
|
+
dependencies:
|
|
21
|
+
- !ruby/object:Gem::Dependency
|
|
22
|
+
name: fabulator
|
|
23
|
+
prerelease: false
|
|
24
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
|
25
|
+
none: false
|
|
26
|
+
requirements:
|
|
27
|
+
- - ">="
|
|
28
|
+
- !ruby/object:Gem::Version
|
|
29
|
+
hash: 29
|
|
30
|
+
segments:
|
|
31
|
+
- 0
|
|
32
|
+
- 0
|
|
33
|
+
- 1
|
|
34
|
+
version: 0.0.1
|
|
35
|
+
type: :runtime
|
|
36
|
+
version_requirements: *id001
|
|
37
|
+
description: The grammar Fabulator extension provides regular expression support.
|
|
38
|
+
email: jgsmith@tamu.edu
|
|
39
|
+
executables: []
|
|
40
|
+
|
|
41
|
+
extensions: []
|
|
42
|
+
|
|
43
|
+
extra_rdoc_files:
|
|
44
|
+
- README.markdown
|
|
45
|
+
files:
|
|
46
|
+
- README.markdown
|
|
47
|
+
- Rakefile
|
|
48
|
+
- VERSION
|
|
49
|
+
- features/grammar.feature
|
|
50
|
+
- features/step_definitions/expression_steps.rb
|
|
51
|
+
- features/step_definitions/grammar_steps.rb
|
|
52
|
+
- features/step_definitions/template_steps.rb
|
|
53
|
+
- features/step_definitions/xml_steps.rb
|
|
54
|
+
- features/support/env.rb
|
|
55
|
+
- lib/fabulator/grammar.rb
|
|
56
|
+
- lib/fabulator/grammar/actions.rb
|
|
57
|
+
- lib/fabulator/grammar/expr/any.rb
|
|
58
|
+
- lib/fabulator/grammar/expr/char_set.rb
|
|
59
|
+
- lib/fabulator/grammar/expr/rule.rb
|
|
60
|
+
- lib/fabulator/grammar/expr/rule_ref.rb
|
|
61
|
+
- lib/fabulator/grammar/expr/rules.rb
|
|
62
|
+
- lib/fabulator/grammar/expr/sequence.rb
|
|
63
|
+
- lib/fabulator/grammar/expr/text.rb
|
|
64
|
+
- lib/fabulator/grammar/parser.rb
|
|
65
|
+
- regex.racc
|
|
66
|
+
has_rdoc: true
|
|
67
|
+
homepage: http://github.com/jgsmith/ruby-fabulator-grammar
|
|
68
|
+
licenses: []
|
|
69
|
+
|
|
70
|
+
post_install_message:
|
|
71
|
+
rdoc_options:
|
|
72
|
+
- --charset=UTF-8
|
|
73
|
+
require_paths:
|
|
74
|
+
- lib
|
|
75
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
76
|
+
none: false
|
|
77
|
+
requirements:
|
|
78
|
+
- - ">="
|
|
79
|
+
- !ruby/object:Gem::Version
|
|
80
|
+
hash: 3
|
|
81
|
+
segments:
|
|
82
|
+
- 0
|
|
83
|
+
version: "0"
|
|
84
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
85
|
+
none: false
|
|
86
|
+
requirements:
|
|
87
|
+
- - ">="
|
|
88
|
+
- !ruby/object:Gem::Version
|
|
89
|
+
hash: 3
|
|
90
|
+
segments:
|
|
91
|
+
- 0
|
|
92
|
+
version: "0"
|
|
93
|
+
requirements: []
|
|
94
|
+
|
|
95
|
+
rubyforge_project:
|
|
96
|
+
rubygems_version: 1.3.7
|
|
97
|
+
signing_key:
|
|
98
|
+
specification_version: 3
|
|
99
|
+
summary: Grammar extension to Fabulator.
|
|
100
|
+
test_files: []
|
|
101
|
+
|