tdp4r 1.4.0 → 1.4.1
Sign up to get free protection for your applications and to get access to all the features.
- data/doc/guide.txt +1 -1
- data/lib/tdp.rb +44 -10
- data/lib/{tdputils.rb → tdp/utils.rb} +0 -0
- data/lib/tdp/xml.rb +184 -0
- data/samples/sample2.rb +1 -1
- data/samples/sample3.rb +1 -1
- data/samples/sample4.rb +1 -1
- data/samples/sample5.rb +1 -1
- data/samples/sample_expr.rb +1 -1
- data/samples/sample_optimize.rb +4 -3
- data/samples/sample_xml.rb +33 -104
- data/test/test_tdp.rb +29 -1
- metadata +5 -3
data/doc/guide.txt
CHANGED
data/lib/tdp.rb
CHANGED
@@ -88,7 +88,6 @@ module TDParser
|
|
88
88
|
|
89
89
|
def recover(buff, ts)
|
90
90
|
buff.each{|b| ts.unshift(b)}
|
91
|
-
buff.clear()
|
92
91
|
end
|
93
92
|
end
|
94
93
|
include BufferUtils
|
@@ -169,7 +168,8 @@ module TDParser
|
|
169
168
|
NegativeParser.new(self)
|
170
169
|
end
|
171
170
|
|
172
|
-
def parse(tokens=nil, &blk)
|
171
|
+
def parse(tokens=nil, buff=nil, &blk)
|
172
|
+
buff ||= TokenBuffer.new()
|
173
173
|
if( blk.nil? )
|
174
174
|
if( tokens.respond_to?(:shift) && tokens.respond_to?(:unshift) )
|
175
175
|
@tokens = tokens
|
@@ -181,7 +181,7 @@ module TDParser
|
|
181
181
|
else
|
182
182
|
@tokens = TokenGenerator.new(&blk)
|
183
183
|
end
|
184
|
-
r = call(@tokens,
|
184
|
+
r = call(@tokens, buff)
|
185
185
|
if( r.nil? )
|
186
186
|
nil
|
187
187
|
else
|
@@ -685,15 +685,15 @@ module TDParser
|
|
685
685
|
end
|
686
686
|
|
687
687
|
class BackrefParser < ReferenceParser
|
688
|
-
attr_reader :
|
688
|
+
attr_reader :label, :equality
|
689
689
|
|
690
|
-
def initialize(
|
691
|
-
@
|
690
|
+
def initialize(label, eqsym)
|
691
|
+
@label = label
|
692
692
|
@equality = eqsym
|
693
693
|
end
|
694
694
|
|
695
695
|
def call(tokens, buff)
|
696
|
-
ys = buff.map[@
|
696
|
+
ys = buff.map[@label]
|
697
697
|
if (ys.nil? || ys.empty?)
|
698
698
|
nil
|
699
699
|
else
|
@@ -702,12 +702,12 @@ module TDParser
|
|
702
702
|
end
|
703
703
|
|
704
704
|
def to_s()
|
705
|
-
"<backref:#{@
|
705
|
+
"<backref:#{@label}>"
|
706
706
|
end
|
707
707
|
|
708
708
|
def ==(r)
|
709
709
|
super(r) &&
|
710
|
-
(@
|
710
|
+
(@label == r.label) &&
|
711
711
|
(@equality == r.equality)
|
712
712
|
end
|
713
713
|
end
|
@@ -735,11 +735,40 @@ module TDParser
|
|
735
735
|
|
736
736
|
def ==(r)
|
737
737
|
super(r) &&
|
738
|
-
(@stack == r.stack) &&
|
738
|
+
(@stack.object_id == r.stack.object_id) &&
|
739
739
|
(@equality == r.equality)
|
740
740
|
end
|
741
741
|
end
|
742
742
|
|
743
|
+
class ConditionParser < Parser
|
744
|
+
attr_reader :condition
|
745
|
+
|
746
|
+
def initialize(&condition)
|
747
|
+
@condition = condition
|
748
|
+
end
|
749
|
+
|
750
|
+
def call(tokens, buff)
|
751
|
+
if (res = @condition.call(buff.map))
|
752
|
+
Sequence[res]
|
753
|
+
else
|
754
|
+
nil
|
755
|
+
end
|
756
|
+
end
|
757
|
+
|
758
|
+
def to_s()
|
759
|
+
"<condition:#{@condition}>"
|
760
|
+
end
|
761
|
+
|
762
|
+
def ==(r)
|
763
|
+
super(r) &&
|
764
|
+
(@condition == r.condition)
|
765
|
+
end
|
766
|
+
|
767
|
+
def same?(r)
|
768
|
+
false
|
769
|
+
end
|
770
|
+
end
|
771
|
+
|
743
772
|
class StateParser < Parser
|
744
773
|
attr_reader :state
|
745
774
|
|
@@ -809,6 +838,11 @@ module TDParser
|
|
809
838
|
end
|
810
839
|
alias fail fail_rule
|
811
840
|
|
841
|
+
def condition_rule(&b)
|
842
|
+
ConditionParser.new(&b)
|
843
|
+
end
|
844
|
+
alias condition condition_rule
|
845
|
+
|
812
846
|
def leftrec(*rules, &act)
|
813
847
|
f = Proc.new{|x|
|
814
848
|
x[1].inject(x[0]){|acc,y|
|
File without changes
|
data/lib/tdp/xml.rb
ADDED
@@ -0,0 +1,184 @@
|
|
1
|
+
require 'tdp'
|
2
|
+
require 'rexml/parsers/pullparser'
|
3
|
+
require 'rexml/document'
|
4
|
+
|
5
|
+
module TDPXML
|
6
|
+
module XMLParser
|
7
|
+
class XMLTokenGenerator < TDParser::TokenGenerator
|
8
|
+
def initialize(src)
|
9
|
+
@xparser = REXML::Parsers::BaseParser.new(src)
|
10
|
+
super(){|g|
|
11
|
+
while(@xparser.has_next?)
|
12
|
+
e = @xparser.pull()
|
13
|
+
g.yield(e)
|
14
|
+
end
|
15
|
+
}
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
class XArray < Array
|
20
|
+
def ===(ary)
|
21
|
+
if super(ary)
|
22
|
+
return true
|
23
|
+
end
|
24
|
+
if !ary.is_a?(Array)
|
25
|
+
return false
|
26
|
+
end
|
27
|
+
each_with_index{|v,idx|
|
28
|
+
case ary[idx]
|
29
|
+
when v
|
30
|
+
else
|
31
|
+
return false
|
32
|
+
end
|
33
|
+
}
|
34
|
+
true
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
class XHash < Hash
|
39
|
+
def ===(h)
|
40
|
+
if super(h)
|
41
|
+
return true
|
42
|
+
end
|
43
|
+
if !h.is_a?(Hash)
|
44
|
+
return false
|
45
|
+
end
|
46
|
+
each{|k,v|
|
47
|
+
case h[k]
|
48
|
+
when v
|
49
|
+
else
|
50
|
+
return false
|
51
|
+
end
|
52
|
+
}
|
53
|
+
true
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
def start_element(name=String)
|
58
|
+
token(XArray[:start_element, name, Hash])
|
59
|
+
end
|
60
|
+
|
61
|
+
def end_element(name=String)
|
62
|
+
token(XArray[:end_element, name])
|
63
|
+
end
|
64
|
+
|
65
|
+
def element(elem=String, &inner)
|
66
|
+
if inner
|
67
|
+
crule = inner.call()|empty()
|
68
|
+
else
|
69
|
+
crule = empty()
|
70
|
+
end
|
71
|
+
start_element(elem) - crule - end_element(elem) >> Proc.new{|x|
|
72
|
+
name = x[0][1]
|
73
|
+
attrs = x[0][2]
|
74
|
+
node = REXML::Element.new()
|
75
|
+
node.name = name
|
76
|
+
node.attributes.merge!(attrs)
|
77
|
+
[node,x[1]]
|
78
|
+
}
|
79
|
+
end
|
80
|
+
|
81
|
+
def text(match=String)
|
82
|
+
token(XArray[:text, match]) >> Proc.new{|x|
|
83
|
+
REXML::Text.new(x[0][1])
|
84
|
+
}
|
85
|
+
end
|
86
|
+
|
87
|
+
def pi()
|
88
|
+
token(XArray[:processing_instruction, String, String]) >> Proc.new{|x|
|
89
|
+
REXML::Instruction.new(x[0][1],x[0][2])
|
90
|
+
}
|
91
|
+
end
|
92
|
+
|
93
|
+
def cdata(match=String)
|
94
|
+
token(XArray[:cdata, match]) >> Proc.new{|x|
|
95
|
+
REXML::CData.new(x[0][1])
|
96
|
+
}
|
97
|
+
end
|
98
|
+
|
99
|
+
def comment(match=String)
|
100
|
+
token(XArray[:comment, match]) >> Proc.new{|x|
|
101
|
+
REXML::Comment.new(x[0][1])
|
102
|
+
}
|
103
|
+
end
|
104
|
+
|
105
|
+
def xmldecl()
|
106
|
+
token(XArray[:xmldecl]) >> Proc.new{|x|
|
107
|
+
REXML::XMLDecl.new(x[0][1],x[0][2], x[0][3])
|
108
|
+
}
|
109
|
+
end
|
110
|
+
|
111
|
+
def start_doctype(name=String)
|
112
|
+
token(XArray[:start_doctype, name])
|
113
|
+
end
|
114
|
+
|
115
|
+
def end_doctype()
|
116
|
+
token(XArray[:end_doctype])
|
117
|
+
end
|
118
|
+
|
119
|
+
def doctype(name=String, &inner)
|
120
|
+
if (inner)
|
121
|
+
crule = inner.call()|empty()
|
122
|
+
else
|
123
|
+
crule = empty()
|
124
|
+
end
|
125
|
+
start_doctype(name) - crule - end_doctype() >> Proc.new{|x|
|
126
|
+
node = REXML::DocType.new(x[0][1..-1])
|
127
|
+
[node, x[1]]
|
128
|
+
}
|
129
|
+
end
|
130
|
+
|
131
|
+
def externalentity(entity=String)
|
132
|
+
token(XArray[:externalentity, entity]) >> Proc.new{|x|
|
133
|
+
REXML::ExternalEntity.new(x[0][1])
|
134
|
+
}
|
135
|
+
end
|
136
|
+
|
137
|
+
def elementdecl(elem=String)
|
138
|
+
token(XArray[:elementdecl, elem]) >> Proc.new{|x|
|
139
|
+
REXML::ElementDecl.new(x[0][1])
|
140
|
+
}
|
141
|
+
end
|
142
|
+
|
143
|
+
def entitydecl(entity=String)
|
144
|
+
token(XArray[:entitydecl, elem]) >> Proc.new{|x|
|
145
|
+
REXML::Entity.new(x[0])
|
146
|
+
}
|
147
|
+
end
|
148
|
+
|
149
|
+
def attlistdecl(decl=String)
|
150
|
+
token(XArray[:attlistdecl]) >> Proc.new{|x|
|
151
|
+
REXML::AttlistDecl.new(x[0][1..-1])
|
152
|
+
}
|
153
|
+
end
|
154
|
+
|
155
|
+
def notationdecl(decl=String)
|
156
|
+
token(XArray[:notationdecl]) >> Proc.new{|x|
|
157
|
+
REXML::NotationDecl.new(*x[0][1..-1])
|
158
|
+
}
|
159
|
+
end
|
160
|
+
|
161
|
+
def any_node(&b)
|
162
|
+
(element(&b) | doctype(&b) | text() | pi() | cdata() |
|
163
|
+
comment() | xmldecl() | externalentity() | elementdecl() |
|
164
|
+
entitydecl() | attlistdecl() | notationdecl()) >> Proc.new{|x| x[2]}
|
165
|
+
end
|
166
|
+
|
167
|
+
def dom_constructor(&act)
|
168
|
+
Proc.new{|x|
|
169
|
+
node = x[0][0]
|
170
|
+
child = x[0][1]
|
171
|
+
if (child.is_a?(Array))
|
172
|
+
child.each{|c| node.add(c) }
|
173
|
+
else
|
174
|
+
node.add(child)
|
175
|
+
end
|
176
|
+
if (act)
|
177
|
+
act[node]
|
178
|
+
else
|
179
|
+
node
|
180
|
+
end
|
181
|
+
}
|
182
|
+
end
|
183
|
+
end
|
184
|
+
end
|
data/samples/sample2.rb
CHANGED
data/samples/sample3.rb
CHANGED
data/samples/sample4.rb
CHANGED
data/samples/sample5.rb
CHANGED
data/samples/sample_expr.rb
CHANGED
data/samples/sample_optimize.rb
CHANGED
@@ -33,13 +33,14 @@ puts(parser.rule1.to_s)
|
|
33
33
|
puts(parser.rule2.to_s)
|
34
34
|
puts(parser.rule3.to_s)
|
35
35
|
|
36
|
+
N = 10
|
36
37
|
Benchmark.bm{|x|
|
37
38
|
buff = ["1","2"]
|
38
39
|
b = ["b"]
|
39
40
|
for i in [5,10,15]
|
40
41
|
puts("--")
|
41
|
-
x.report{ $r1 = parser.rule1.parse(buff*i + b*i) }
|
42
|
-
x.report{ $r2 = parser.rule2.parse(buff*i + b*i) }
|
43
|
-
x.report{ $r3 = parser.rule3.parse(buff*i + b*i) }
|
42
|
+
x.report{ N.times{ $r1 = parser.rule1.parse(buff*i + b*i) } }
|
43
|
+
x.report{ N.times{ $r2 = parser.rule2.parse(buff*i + b*i) } }
|
44
|
+
x.report{ N.times{ $r3 = parser.rule3.parse(buff*i + b*i) } }
|
44
45
|
end
|
45
46
|
}
|
data/samples/sample_xml.rb
CHANGED
@@ -1,123 +1,52 @@
|
|
1
1
|
require 'tdp'
|
2
|
-
require '
|
3
|
-
require '
|
4
|
-
require 'rexml/document'
|
5
|
-
|
6
|
-
class Array
|
7
|
-
def ===(ary)
|
8
|
-
if super(ary)
|
9
|
-
return true
|
10
|
-
end
|
11
|
-
if !ary.is_a?(Array)
|
12
|
-
return false
|
13
|
-
end
|
14
|
-
each_with_index{|v,idx|
|
15
|
-
case ary[idx]
|
16
|
-
when v
|
17
|
-
else
|
18
|
-
return false
|
19
|
-
end
|
20
|
-
}
|
21
|
-
true
|
22
|
-
end
|
23
|
-
end
|
24
|
-
|
25
|
-
class Hash
|
26
|
-
def ===(h)
|
27
|
-
if super(h)
|
28
|
-
return true
|
29
|
-
end
|
30
|
-
if !h.is_a?(Hash)
|
31
|
-
return false
|
32
|
-
end
|
33
|
-
each{|k,v|
|
34
|
-
case h[k]
|
35
|
-
when v
|
36
|
-
else
|
37
|
-
return false
|
38
|
-
end
|
39
|
-
}
|
40
|
-
true
|
41
|
-
end
|
42
|
-
end
|
43
|
-
|
44
|
-
module XMLParser
|
45
|
-
def xml_stag(name)
|
46
|
-
token([:start_element, name, Hash])
|
47
|
-
end
|
48
|
-
alias stag xml_stag
|
49
|
-
|
50
|
-
def xml_etag(name)
|
51
|
-
token([:end_element, name])
|
52
|
-
end
|
53
|
-
alias etag xml_etag
|
54
|
-
|
55
|
-
def dom_element(elem, &inner)
|
56
|
-
stag(elem) - (inner.call()|empty()) - etag(elem)
|
57
|
-
end
|
58
|
-
alias element dom_element
|
59
|
-
|
60
|
-
def dom_filter(&act)
|
61
|
-
Proc.new{|x|
|
62
|
-
name = x[0][1]
|
63
|
-
attrs = x[0][2]
|
64
|
-
node = REXML::Element.new()
|
65
|
-
node.name = name
|
66
|
-
node.attributes.merge!(attrs)
|
67
|
-
act[node,x[1]]
|
68
|
-
}
|
69
|
-
end
|
70
|
-
alias filter dom_filter
|
71
|
-
|
72
|
-
def dom_construct(&act)
|
73
|
-
dom_filter{|node,child|
|
74
|
-
if (child.is_a?(Array))
|
75
|
-
child.each{|c| node.add(c) }
|
76
|
-
else
|
77
|
-
node.add(child)
|
78
|
-
end
|
79
|
-
if (act)
|
80
|
-
act[node]
|
81
|
-
else
|
82
|
-
node
|
83
|
-
end
|
84
|
-
}
|
85
|
-
end
|
86
|
-
alias construct dom_construct
|
87
|
-
end
|
2
|
+
require 'tdp/utils'
|
3
|
+
require 'tdp/xml'
|
88
4
|
|
89
5
|
translator = TDParser.define{|g|
|
90
|
-
extend XMLParser
|
6
|
+
extend TDPXML::XMLParser
|
91
7
|
|
92
8
|
g.xml =
|
93
9
|
element("a"){
|
94
10
|
element("b"){
|
95
|
-
g.
|
96
|
-
} >>
|
97
|
-
} >>
|
11
|
+
g.xmlseq
|
12
|
+
} >> dom_constructor{|node| node.children() }
|
13
|
+
} >> dom_constructor{|node| node.name = "AB"; node } |
|
98
14
|
element(String){
|
99
|
-
g.
|
100
|
-
} >>
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
15
|
+
g.xmlseq
|
16
|
+
} >> dom_constructor{|node|
|
17
|
+
node.name = node.name.upcase()
|
18
|
+
node
|
19
|
+
} |
|
20
|
+
doctype{
|
21
|
+
g.xmlseq
|
22
|
+
} >> dom_constructor{|node| node} |
|
23
|
+
text >> Proc.new{|x| x[0]} |
|
24
|
+
elementdecl >> Proc.new{|x| x[0]} |
|
25
|
+
xmldecl >> Proc.new{|x| x[0]} |
|
26
|
+
comment >> Proc.new{|x| x[0]} |
|
27
|
+
any_node() >> Proc.new{|x| x[0] }
|
28
|
+
|
29
|
+
g.xmlseq =
|
30
|
+
g.xml()*0 >> Proc.new{|x| x[0].collect{|y| y[0]}} |
|
105
31
|
|
106
32
|
def translate(src)
|
107
|
-
|
108
|
-
|
109
|
-
while(xparser.has_next?)
|
110
|
-
g.yield(xparser.pull())
|
111
|
-
end
|
112
|
-
}
|
33
|
+
gen = TDPXML::XMLParser::XMLTokenGenerator.new(src)
|
34
|
+
xmlseq.parse(gen)
|
113
35
|
end
|
114
36
|
}
|
115
37
|
|
116
|
-
|
38
|
+
seq = translator.translate(<<EOS)
|
117
39
|
<?xml version="1.0" ?>
|
40
|
+
<!DOCTYPE body [
|
41
|
+
<!ELEMENT body (#PCDATA, strong*)>
|
42
|
+
<!ELEMENT strong (#PCDATA)>
|
43
|
+
]>
|
118
44
|
<list>
|
45
|
+
<!-- comment -->
|
119
46
|
<a><b><c>hoge</c></b></a>
|
120
47
|
<b>b?</b>
|
121
48
|
</list>
|
122
49
|
EOS
|
123
|
-
|
50
|
+
doc = REXML::Document.new()
|
51
|
+
seq.each{|x| doc.add(x) }
|
52
|
+
puts(doc)
|
data/test/test_tdp.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
require 'test/unit'
|
2
2
|
require 'tdp'
|
3
|
-
require '
|
3
|
+
require 'tdp/utils'
|
4
|
+
require 'tdp/xml'
|
4
5
|
|
5
6
|
class Tokens
|
6
7
|
include Enumerable
|
@@ -367,6 +368,14 @@ class TestTDParser < Test::Unit::TestCase
|
|
367
368
|
assert_equal(["a","b",[["a","b"],["a","b"]]], rule.parse(buff))
|
368
369
|
end
|
369
370
|
|
371
|
+
def test_backref4()
|
372
|
+
rule = (token(/\w/) - token(/\w/))/:x - (token("-")|backref(:x))*0 >> proc{|x| x}
|
373
|
+
assert_equal(["a","b",[["a","b"],["a","b"]]],
|
374
|
+
rule.parse(["a","b","a","b","a","b"]))
|
375
|
+
assert_equal(["a","b",[["-"],["a","b"]]],
|
376
|
+
rule.parse(["a","b","-","a","b"]))
|
377
|
+
end
|
378
|
+
|
370
379
|
def test_stackref1()
|
371
380
|
buff = ["a","b","a"]
|
372
381
|
stack = []
|
@@ -456,6 +465,25 @@ class TestTDParser < Test::Unit::TestCase
|
|
456
465
|
assert_equal(0, rule.parse(buff))
|
457
466
|
end
|
458
467
|
|
468
|
+
def test_condition1()
|
469
|
+
rule = condition{|m|m["n"]=20} - condition{|m|m["n"]} >> Proc.new{|x| x}
|
470
|
+
assert_equal([20,20], rule.parse([]))
|
471
|
+
end
|
472
|
+
|
473
|
+
def test_condition2()
|
474
|
+
rule = condition{|m|m["n"]=20} - condition{|m|m["n"]>20} >> Proc.new{|x| x}
|
475
|
+
assert_equal(nil, rule.parse([]))
|
476
|
+
end
|
477
|
+
|
478
|
+
def test_condition3()
|
479
|
+
rule =
|
480
|
+
condition{|m|m["n"]=20} -
|
481
|
+
(token("a") - condition{|m|m["n"]>20} |
|
482
|
+
token("b") - condition{|m|m["n"]>10}) >> Proc.new{|x| x}
|
483
|
+
assert_equal(nil, rule.parse(["a"]))
|
484
|
+
assert_equal([20,"b",true], rule.parse(["b"]))
|
485
|
+
end
|
486
|
+
|
459
487
|
def test_rule1()
|
460
488
|
expr = "1 + 2"
|
461
489
|
assert_equal(3, @calc.parse(expr))
|
metadata
CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.9.0
|
|
3
3
|
specification_version: 1
|
4
4
|
name: tdp4r
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 1.4.
|
7
|
-
date: 2006-07-
|
6
|
+
version: 1.4.1
|
7
|
+
date: 2006-07-23 00:00:00 +09:00
|
8
8
|
summary: TDP4R is a top-down parser library that consists of parser combinators and utility functions.
|
9
9
|
require_paths:
|
10
10
|
- lib
|
@@ -29,8 +29,10 @@ post_install_message:
|
|
29
29
|
authors:
|
30
30
|
- Takaaki Tateishi
|
31
31
|
files:
|
32
|
+
- lib/tdp
|
32
33
|
- lib/tdp.rb
|
33
|
-
- lib/
|
34
|
+
- lib/tdp/utils.rb
|
35
|
+
- lib/tdp/xml.rb
|
34
36
|
- samples/sample1.rb
|
35
37
|
- samples/sample2.rb
|
36
38
|
- samples/sample3.rb
|