tdp4r 1.4.0 → 1.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/doc/guide.txt +1 -1
- data/lib/tdp.rb +44 -10
- data/lib/{tdputils.rb → tdp/utils.rb} +0 -0
- data/lib/tdp/xml.rb +184 -0
- data/samples/sample2.rb +1 -1
- data/samples/sample3.rb +1 -1
- data/samples/sample4.rb +1 -1
- data/samples/sample5.rb +1 -1
- data/samples/sample_expr.rb +1 -1
- data/samples/sample_optimize.rb +4 -3
- data/samples/sample_xml.rb +33 -104
- data/test/test_tdp.rb +29 -1
- metadata +5 -3
data/doc/guide.txt
CHANGED
data/lib/tdp.rb
CHANGED
@@ -88,7 +88,6 @@ module TDParser
|
|
88
88
|
|
89
89
|
def recover(buff, ts)
|
90
90
|
buff.each{|b| ts.unshift(b)}
|
91
|
-
buff.clear()
|
92
91
|
end
|
93
92
|
end
|
94
93
|
include BufferUtils
|
@@ -169,7 +168,8 @@ module TDParser
|
|
169
168
|
NegativeParser.new(self)
|
170
169
|
end
|
171
170
|
|
172
|
-
def parse(tokens=nil, &blk)
|
171
|
+
def parse(tokens=nil, buff=nil, &blk)
|
172
|
+
buff ||= TokenBuffer.new()
|
173
173
|
if( blk.nil? )
|
174
174
|
if( tokens.respond_to?(:shift) && tokens.respond_to?(:unshift) )
|
175
175
|
@tokens = tokens
|
@@ -181,7 +181,7 @@ module TDParser
|
|
181
181
|
else
|
182
182
|
@tokens = TokenGenerator.new(&blk)
|
183
183
|
end
|
184
|
-
r = call(@tokens,
|
184
|
+
r = call(@tokens, buff)
|
185
185
|
if( r.nil? )
|
186
186
|
nil
|
187
187
|
else
|
@@ -685,15 +685,15 @@ module TDParser
|
|
685
685
|
end
|
686
686
|
|
687
687
|
class BackrefParser < ReferenceParser
|
688
|
-
attr_reader :
|
688
|
+
attr_reader :label, :equality
|
689
689
|
|
690
|
-
def initialize(
|
691
|
-
@
|
690
|
+
def initialize(label, eqsym)
|
691
|
+
@label = label
|
692
692
|
@equality = eqsym
|
693
693
|
end
|
694
694
|
|
695
695
|
def call(tokens, buff)
|
696
|
-
ys = buff.map[@
|
696
|
+
ys = buff.map[@label]
|
697
697
|
if (ys.nil? || ys.empty?)
|
698
698
|
nil
|
699
699
|
else
|
@@ -702,12 +702,12 @@ module TDParser
|
|
702
702
|
end
|
703
703
|
|
704
704
|
def to_s()
|
705
|
-
"<backref:#{@
|
705
|
+
"<backref:#{@label}>"
|
706
706
|
end
|
707
707
|
|
708
708
|
def ==(r)
|
709
709
|
super(r) &&
|
710
|
-
(@
|
710
|
+
(@label == r.label) &&
|
711
711
|
(@equality == r.equality)
|
712
712
|
end
|
713
713
|
end
|
@@ -735,11 +735,40 @@ module TDParser
|
|
735
735
|
|
736
736
|
def ==(r)
|
737
737
|
super(r) &&
|
738
|
-
(@stack == r.stack) &&
|
738
|
+
(@stack.object_id == r.stack.object_id) &&
|
739
739
|
(@equality == r.equality)
|
740
740
|
end
|
741
741
|
end
|
742
742
|
|
743
|
+
class ConditionParser < Parser
|
744
|
+
attr_reader :condition
|
745
|
+
|
746
|
+
def initialize(&condition)
|
747
|
+
@condition = condition
|
748
|
+
end
|
749
|
+
|
750
|
+
def call(tokens, buff)
|
751
|
+
if (res = @condition.call(buff.map))
|
752
|
+
Sequence[res]
|
753
|
+
else
|
754
|
+
nil
|
755
|
+
end
|
756
|
+
end
|
757
|
+
|
758
|
+
def to_s()
|
759
|
+
"<condition:#{@condition}>"
|
760
|
+
end
|
761
|
+
|
762
|
+
def ==(r)
|
763
|
+
super(r) &&
|
764
|
+
(@condition == r.condition)
|
765
|
+
end
|
766
|
+
|
767
|
+
def same?(r)
|
768
|
+
false
|
769
|
+
end
|
770
|
+
end
|
771
|
+
|
743
772
|
class StateParser < Parser
|
744
773
|
attr_reader :state
|
745
774
|
|
@@ -809,6 +838,11 @@ module TDParser
|
|
809
838
|
end
|
810
839
|
alias fail fail_rule
|
811
840
|
|
841
|
+
def condition_rule(&b)
|
842
|
+
ConditionParser.new(&b)
|
843
|
+
end
|
844
|
+
alias condition condition_rule
|
845
|
+
|
812
846
|
def leftrec(*rules, &act)
|
813
847
|
f = Proc.new{|x|
|
814
848
|
x[1].inject(x[0]){|acc,y|
|
File without changes
|
data/lib/tdp/xml.rb
ADDED
@@ -0,0 +1,184 @@
|
|
1
|
+
require 'tdp'
|
2
|
+
require 'rexml/parsers/pullparser'
|
3
|
+
require 'rexml/document'
|
4
|
+
|
5
|
+
module TDPXML
|
6
|
+
module XMLParser
|
7
|
+
class XMLTokenGenerator < TDParser::TokenGenerator
|
8
|
+
def initialize(src)
|
9
|
+
@xparser = REXML::Parsers::BaseParser.new(src)
|
10
|
+
super(){|g|
|
11
|
+
while(@xparser.has_next?)
|
12
|
+
e = @xparser.pull()
|
13
|
+
g.yield(e)
|
14
|
+
end
|
15
|
+
}
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
class XArray < Array
|
20
|
+
def ===(ary)
|
21
|
+
if super(ary)
|
22
|
+
return true
|
23
|
+
end
|
24
|
+
if !ary.is_a?(Array)
|
25
|
+
return false
|
26
|
+
end
|
27
|
+
each_with_index{|v,idx|
|
28
|
+
case ary[idx]
|
29
|
+
when v
|
30
|
+
else
|
31
|
+
return false
|
32
|
+
end
|
33
|
+
}
|
34
|
+
true
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
class XHash < Hash
|
39
|
+
def ===(h)
|
40
|
+
if super(h)
|
41
|
+
return true
|
42
|
+
end
|
43
|
+
if !h.is_a?(Hash)
|
44
|
+
return false
|
45
|
+
end
|
46
|
+
each{|k,v|
|
47
|
+
case h[k]
|
48
|
+
when v
|
49
|
+
else
|
50
|
+
return false
|
51
|
+
end
|
52
|
+
}
|
53
|
+
true
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
def start_element(name=String)
|
58
|
+
token(XArray[:start_element, name, Hash])
|
59
|
+
end
|
60
|
+
|
61
|
+
def end_element(name=String)
|
62
|
+
token(XArray[:end_element, name])
|
63
|
+
end
|
64
|
+
|
65
|
+
def element(elem=String, &inner)
|
66
|
+
if inner
|
67
|
+
crule = inner.call()|empty()
|
68
|
+
else
|
69
|
+
crule = empty()
|
70
|
+
end
|
71
|
+
start_element(elem) - crule - end_element(elem) >> Proc.new{|x|
|
72
|
+
name = x[0][1]
|
73
|
+
attrs = x[0][2]
|
74
|
+
node = REXML::Element.new()
|
75
|
+
node.name = name
|
76
|
+
node.attributes.merge!(attrs)
|
77
|
+
[node,x[1]]
|
78
|
+
}
|
79
|
+
end
|
80
|
+
|
81
|
+
def text(match=String)
|
82
|
+
token(XArray[:text, match]) >> Proc.new{|x|
|
83
|
+
REXML::Text.new(x[0][1])
|
84
|
+
}
|
85
|
+
end
|
86
|
+
|
87
|
+
def pi()
|
88
|
+
token(XArray[:processing_instruction, String, String]) >> Proc.new{|x|
|
89
|
+
REXML::Instruction.new(x[0][1],x[0][2])
|
90
|
+
}
|
91
|
+
end
|
92
|
+
|
93
|
+
def cdata(match=String)
|
94
|
+
token(XArray[:cdata, match]) >> Proc.new{|x|
|
95
|
+
REXML::CData.new(x[0][1])
|
96
|
+
}
|
97
|
+
end
|
98
|
+
|
99
|
+
def comment(match=String)
|
100
|
+
token(XArray[:comment, match]) >> Proc.new{|x|
|
101
|
+
REXML::Comment.new(x[0][1])
|
102
|
+
}
|
103
|
+
end
|
104
|
+
|
105
|
+
def xmldecl()
|
106
|
+
token(XArray[:xmldecl]) >> Proc.new{|x|
|
107
|
+
REXML::XMLDecl.new(x[0][1],x[0][2], x[0][3])
|
108
|
+
}
|
109
|
+
end
|
110
|
+
|
111
|
+
def start_doctype(name=String)
|
112
|
+
token(XArray[:start_doctype, name])
|
113
|
+
end
|
114
|
+
|
115
|
+
def end_doctype()
|
116
|
+
token(XArray[:end_doctype])
|
117
|
+
end
|
118
|
+
|
119
|
+
def doctype(name=String, &inner)
|
120
|
+
if (inner)
|
121
|
+
crule = inner.call()|empty()
|
122
|
+
else
|
123
|
+
crule = empty()
|
124
|
+
end
|
125
|
+
start_doctype(name) - crule - end_doctype() >> Proc.new{|x|
|
126
|
+
node = REXML::DocType.new(x[0][1..-1])
|
127
|
+
[node, x[1]]
|
128
|
+
}
|
129
|
+
end
|
130
|
+
|
131
|
+
def externalentity(entity=String)
|
132
|
+
token(XArray[:externalentity, entity]) >> Proc.new{|x|
|
133
|
+
REXML::ExternalEntity.new(x[0][1])
|
134
|
+
}
|
135
|
+
end
|
136
|
+
|
137
|
+
def elementdecl(elem=String)
|
138
|
+
token(XArray[:elementdecl, elem]) >> Proc.new{|x|
|
139
|
+
REXML::ElementDecl.new(x[0][1])
|
140
|
+
}
|
141
|
+
end
|
142
|
+
|
143
|
+
def entitydecl(entity=String)
|
144
|
+
token(XArray[:entitydecl, elem]) >> Proc.new{|x|
|
145
|
+
REXML::Entity.new(x[0])
|
146
|
+
}
|
147
|
+
end
|
148
|
+
|
149
|
+
def attlistdecl(decl=String)
|
150
|
+
token(XArray[:attlistdecl]) >> Proc.new{|x|
|
151
|
+
REXML::AttlistDecl.new(x[0][1..-1])
|
152
|
+
}
|
153
|
+
end
|
154
|
+
|
155
|
+
def notationdecl(decl=String)
|
156
|
+
token(XArray[:notationdecl]) >> Proc.new{|x|
|
157
|
+
REXML::NotationDecl.new(*x[0][1..-1])
|
158
|
+
}
|
159
|
+
end
|
160
|
+
|
161
|
+
def any_node(&b)
|
162
|
+
(element(&b) | doctype(&b) | text() | pi() | cdata() |
|
163
|
+
comment() | xmldecl() | externalentity() | elementdecl() |
|
164
|
+
entitydecl() | attlistdecl() | notationdecl()) >> Proc.new{|x| x[2]}
|
165
|
+
end
|
166
|
+
|
167
|
+
def dom_constructor(&act)
|
168
|
+
Proc.new{|x|
|
169
|
+
node = x[0][0]
|
170
|
+
child = x[0][1]
|
171
|
+
if (child.is_a?(Array))
|
172
|
+
child.each{|c| node.add(c) }
|
173
|
+
else
|
174
|
+
node.add(child)
|
175
|
+
end
|
176
|
+
if (act)
|
177
|
+
act[node]
|
178
|
+
else
|
179
|
+
node
|
180
|
+
end
|
181
|
+
}
|
182
|
+
end
|
183
|
+
end
|
184
|
+
end
|
data/samples/sample2.rb
CHANGED
data/samples/sample3.rb
CHANGED
data/samples/sample4.rb
CHANGED
data/samples/sample5.rb
CHANGED
data/samples/sample_expr.rb
CHANGED
data/samples/sample_optimize.rb
CHANGED
@@ -33,13 +33,14 @@ puts(parser.rule1.to_s)
|
|
33
33
|
puts(parser.rule2.to_s)
|
34
34
|
puts(parser.rule3.to_s)
|
35
35
|
|
36
|
+
N = 10
|
36
37
|
Benchmark.bm{|x|
|
37
38
|
buff = ["1","2"]
|
38
39
|
b = ["b"]
|
39
40
|
for i in [5,10,15]
|
40
41
|
puts("--")
|
41
|
-
x.report{ $r1 = parser.rule1.parse(buff*i + b*i) }
|
42
|
-
x.report{ $r2 = parser.rule2.parse(buff*i + b*i) }
|
43
|
-
x.report{ $r3 = parser.rule3.parse(buff*i + b*i) }
|
42
|
+
x.report{ N.times{ $r1 = parser.rule1.parse(buff*i + b*i) } }
|
43
|
+
x.report{ N.times{ $r2 = parser.rule2.parse(buff*i + b*i) } }
|
44
|
+
x.report{ N.times{ $r3 = parser.rule3.parse(buff*i + b*i) } }
|
44
45
|
end
|
45
46
|
}
|
data/samples/sample_xml.rb
CHANGED
@@ -1,123 +1,52 @@
|
|
1
1
|
require 'tdp'
|
2
|
-
require '
|
3
|
-
require '
|
4
|
-
require 'rexml/document'
|
5
|
-
|
6
|
-
class Array
|
7
|
-
def ===(ary)
|
8
|
-
if super(ary)
|
9
|
-
return true
|
10
|
-
end
|
11
|
-
if !ary.is_a?(Array)
|
12
|
-
return false
|
13
|
-
end
|
14
|
-
each_with_index{|v,idx|
|
15
|
-
case ary[idx]
|
16
|
-
when v
|
17
|
-
else
|
18
|
-
return false
|
19
|
-
end
|
20
|
-
}
|
21
|
-
true
|
22
|
-
end
|
23
|
-
end
|
24
|
-
|
25
|
-
class Hash
|
26
|
-
def ===(h)
|
27
|
-
if super(h)
|
28
|
-
return true
|
29
|
-
end
|
30
|
-
if !h.is_a?(Hash)
|
31
|
-
return false
|
32
|
-
end
|
33
|
-
each{|k,v|
|
34
|
-
case h[k]
|
35
|
-
when v
|
36
|
-
else
|
37
|
-
return false
|
38
|
-
end
|
39
|
-
}
|
40
|
-
true
|
41
|
-
end
|
42
|
-
end
|
43
|
-
|
44
|
-
module XMLParser
|
45
|
-
def xml_stag(name)
|
46
|
-
token([:start_element, name, Hash])
|
47
|
-
end
|
48
|
-
alias stag xml_stag
|
49
|
-
|
50
|
-
def xml_etag(name)
|
51
|
-
token([:end_element, name])
|
52
|
-
end
|
53
|
-
alias etag xml_etag
|
54
|
-
|
55
|
-
def dom_element(elem, &inner)
|
56
|
-
stag(elem) - (inner.call()|empty()) - etag(elem)
|
57
|
-
end
|
58
|
-
alias element dom_element
|
59
|
-
|
60
|
-
def dom_filter(&act)
|
61
|
-
Proc.new{|x|
|
62
|
-
name = x[0][1]
|
63
|
-
attrs = x[0][2]
|
64
|
-
node = REXML::Element.new()
|
65
|
-
node.name = name
|
66
|
-
node.attributes.merge!(attrs)
|
67
|
-
act[node,x[1]]
|
68
|
-
}
|
69
|
-
end
|
70
|
-
alias filter dom_filter
|
71
|
-
|
72
|
-
def dom_construct(&act)
|
73
|
-
dom_filter{|node,child|
|
74
|
-
if (child.is_a?(Array))
|
75
|
-
child.each{|c| node.add(c) }
|
76
|
-
else
|
77
|
-
node.add(child)
|
78
|
-
end
|
79
|
-
if (act)
|
80
|
-
act[node]
|
81
|
-
else
|
82
|
-
node
|
83
|
-
end
|
84
|
-
}
|
85
|
-
end
|
86
|
-
alias construct dom_construct
|
87
|
-
end
|
2
|
+
require 'tdp/utils'
|
3
|
+
require 'tdp/xml'
|
88
4
|
|
89
5
|
translator = TDParser.define{|g|
|
90
|
-
extend XMLParser
|
6
|
+
extend TDPXML::XMLParser
|
91
7
|
|
92
8
|
g.xml =
|
93
9
|
element("a"){
|
94
10
|
element("b"){
|
95
|
-
g.
|
96
|
-
} >>
|
97
|
-
} >>
|
11
|
+
g.xmlseq
|
12
|
+
} >> dom_constructor{|node| node.children() }
|
13
|
+
} >> dom_constructor{|node| node.name = "AB"; node } |
|
98
14
|
element(String){
|
99
|
-
g.
|
100
|
-
} >>
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
15
|
+
g.xmlseq
|
16
|
+
} >> dom_constructor{|node|
|
17
|
+
node.name = node.name.upcase()
|
18
|
+
node
|
19
|
+
} |
|
20
|
+
doctype{
|
21
|
+
g.xmlseq
|
22
|
+
} >> dom_constructor{|node| node} |
|
23
|
+
text >> Proc.new{|x| x[0]} |
|
24
|
+
elementdecl >> Proc.new{|x| x[0]} |
|
25
|
+
xmldecl >> Proc.new{|x| x[0]} |
|
26
|
+
comment >> Proc.new{|x| x[0]} |
|
27
|
+
any_node() >> Proc.new{|x| x[0] }
|
28
|
+
|
29
|
+
g.xmlseq =
|
30
|
+
g.xml()*0 >> Proc.new{|x| x[0].collect{|y| y[0]}} |
|
105
31
|
|
106
32
|
def translate(src)
|
107
|
-
|
108
|
-
|
109
|
-
while(xparser.has_next?)
|
110
|
-
g.yield(xparser.pull())
|
111
|
-
end
|
112
|
-
}
|
33
|
+
gen = TDPXML::XMLParser::XMLTokenGenerator.new(src)
|
34
|
+
xmlseq.parse(gen)
|
113
35
|
end
|
114
36
|
}
|
115
37
|
|
116
|
-
|
38
|
+
seq = translator.translate(<<EOS)
|
117
39
|
<?xml version="1.0" ?>
|
40
|
+
<!DOCTYPE body [
|
41
|
+
<!ELEMENT body (#PCDATA, strong*)>
|
42
|
+
<!ELEMENT strong (#PCDATA)>
|
43
|
+
]>
|
118
44
|
<list>
|
45
|
+
<!-- comment -->
|
119
46
|
<a><b><c>hoge</c></b></a>
|
120
47
|
<b>b?</b>
|
121
48
|
</list>
|
122
49
|
EOS
|
123
|
-
|
50
|
+
doc = REXML::Document.new()
|
51
|
+
seq.each{|x| doc.add(x) }
|
52
|
+
puts(doc)
|
data/test/test_tdp.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
require 'test/unit'
|
2
2
|
require 'tdp'
|
3
|
-
require '
|
3
|
+
require 'tdp/utils'
|
4
|
+
require 'tdp/xml'
|
4
5
|
|
5
6
|
class Tokens
|
6
7
|
include Enumerable
|
@@ -367,6 +368,14 @@ class TestTDParser < Test::Unit::TestCase
|
|
367
368
|
assert_equal(["a","b",[["a","b"],["a","b"]]], rule.parse(buff))
|
368
369
|
end
|
369
370
|
|
371
|
+
def test_backref4()
|
372
|
+
rule = (token(/\w/) - token(/\w/))/:x - (token("-")|backref(:x))*0 >> proc{|x| x}
|
373
|
+
assert_equal(["a","b",[["a","b"],["a","b"]]],
|
374
|
+
rule.parse(["a","b","a","b","a","b"]))
|
375
|
+
assert_equal(["a","b",[["-"],["a","b"]]],
|
376
|
+
rule.parse(["a","b","-","a","b"]))
|
377
|
+
end
|
378
|
+
|
370
379
|
def test_stackref1()
|
371
380
|
buff = ["a","b","a"]
|
372
381
|
stack = []
|
@@ -456,6 +465,25 @@ class TestTDParser < Test::Unit::TestCase
|
|
456
465
|
assert_equal(0, rule.parse(buff))
|
457
466
|
end
|
458
467
|
|
468
|
+
def test_condition1()
|
469
|
+
rule = condition{|m|m["n"]=20} - condition{|m|m["n"]} >> Proc.new{|x| x}
|
470
|
+
assert_equal([20,20], rule.parse([]))
|
471
|
+
end
|
472
|
+
|
473
|
+
def test_condition2()
|
474
|
+
rule = condition{|m|m["n"]=20} - condition{|m|m["n"]>20} >> Proc.new{|x| x}
|
475
|
+
assert_equal(nil, rule.parse([]))
|
476
|
+
end
|
477
|
+
|
478
|
+
def test_condition3()
|
479
|
+
rule =
|
480
|
+
condition{|m|m["n"]=20} -
|
481
|
+
(token("a") - condition{|m|m["n"]>20} |
|
482
|
+
token("b") - condition{|m|m["n"]>10}) >> Proc.new{|x| x}
|
483
|
+
assert_equal(nil, rule.parse(["a"]))
|
484
|
+
assert_equal([20,"b",true], rule.parse(["b"]))
|
485
|
+
end
|
486
|
+
|
459
487
|
def test_rule1()
|
460
488
|
expr = "1 + 2"
|
461
489
|
assert_equal(3, @calc.parse(expr))
|
metadata
CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.9.0
|
|
3
3
|
specification_version: 1
|
4
4
|
name: tdp4r
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 1.4.
|
7
|
-
date: 2006-07-
|
6
|
+
version: 1.4.1
|
7
|
+
date: 2006-07-23 00:00:00 +09:00
|
8
8
|
summary: TDP4R is a top-down parser library that consists of parser combinators and utility functions.
|
9
9
|
require_paths:
|
10
10
|
- lib
|
@@ -29,8 +29,10 @@ post_install_message:
|
|
29
29
|
authors:
|
30
30
|
- Takaaki Tateishi
|
31
31
|
files:
|
32
|
+
- lib/tdp
|
32
33
|
- lib/tdp.rb
|
33
|
-
- lib/
|
34
|
+
- lib/tdp/utils.rb
|
35
|
+
- lib/tdp/xml.rb
|
34
36
|
- samples/sample1.rb
|
35
37
|
- samples/sample2.rb
|
36
38
|
- samples/sample3.rb
|