tdp4r 1.4.0 → 1.4.1

Sign up to get free protection for your applications and to get access to all the features.
data/doc/guide.txt CHANGED
@@ -146,5 +146,5 @@ Parser Combinators
146
146
  StringTokenizer
147
147
  -----------------
148
148
  There is a simple tokenizer called TDPUtils::StringTokenizer in the library
149
- "tdputils".
149
+ "tdp/utils".
150
150
  (See MyParser#parse in sample2.rb)
data/lib/tdp.rb CHANGED
@@ -88,7 +88,6 @@ module TDParser
88
88
 
89
89
  def recover(buff, ts)
90
90
  buff.each{|b| ts.unshift(b)}
91
- buff.clear()
92
91
  end
93
92
  end
94
93
  include BufferUtils
@@ -169,7 +168,8 @@ module TDParser
169
168
  NegativeParser.new(self)
170
169
  end
171
170
 
172
- def parse(tokens=nil, &blk)
171
+ def parse(tokens=nil, buff=nil, &blk)
172
+ buff ||= TokenBuffer.new()
173
173
  if( blk.nil? )
174
174
  if( tokens.respond_to?(:shift) && tokens.respond_to?(:unshift) )
175
175
  @tokens = tokens
@@ -181,7 +181,7 @@ module TDParser
181
181
  else
182
182
  @tokens = TokenGenerator.new(&blk)
183
183
  end
184
- r = call(@tokens, TokenBuffer.new())
184
+ r = call(@tokens, buff)
185
185
  if( r.nil? )
186
186
  nil
187
187
  else
@@ -685,15 +685,15 @@ module TDParser
685
685
  end
686
686
 
687
687
  class BackrefParser < ReferenceParser
688
- attr_reader :reference, :equality
688
+ attr_reader :label, :equality
689
689
 
690
- def initialize(ref, eqsym)
691
- @reference = ref
690
+ def initialize(label, eqsym)
691
+ @label = label
692
692
  @equality = eqsym
693
693
  end
694
694
 
695
695
  def call(tokens, buff)
696
- ys = buff.map[@reference]
696
+ ys = buff.map[@label]
697
697
  if (ys.nil? || ys.empty?)
698
698
  nil
699
699
  else
@@ -702,12 +702,12 @@ module TDParser
702
702
  end
703
703
 
704
704
  def to_s()
705
- "<backref:#{@reference}>"
705
+ "<backref:#{@label}>"
706
706
  end
707
707
 
708
708
  def ==(r)
709
709
  super(r) &&
710
- (@reference == r.reference) &&
710
+ (@label == r.label) &&
711
711
  (@equality == r.equality)
712
712
  end
713
713
  end
@@ -735,11 +735,40 @@ module TDParser
735
735
 
736
736
  def ==(r)
737
737
  super(r) &&
738
- (@stack == r.stack) &&
738
+ (@stack.object_id == r.stack.object_id) &&
739
739
  (@equality == r.equality)
740
740
  end
741
741
  end
742
742
 
743
+ class ConditionParser < Parser
744
+ attr_reader :condition
745
+
746
+ def initialize(&condition)
747
+ @condition = condition
748
+ end
749
+
750
+ def call(tokens, buff)
751
+ if (res = @condition.call(buff.map))
752
+ Sequence[res]
753
+ else
754
+ nil
755
+ end
756
+ end
757
+
758
+ def to_s()
759
+ "<condition:#{@condition}>"
760
+ end
761
+
762
+ def ==(r)
763
+ super(r) &&
764
+ (@condition == r.condition)
765
+ end
766
+
767
+ def same?(r)
768
+ false
769
+ end
770
+ end
771
+
743
772
  class StateParser < Parser
744
773
  attr_reader :state
745
774
 
@@ -809,6 +838,11 @@ module TDParser
809
838
  end
810
839
  alias fail fail_rule
811
840
 
841
+ def condition_rule(&b)
842
+ ConditionParser.new(&b)
843
+ end
844
+ alias condition condition_rule
845
+
812
846
  def leftrec(*rules, &act)
813
847
  f = Proc.new{|x|
814
848
  x[1].inject(x[0]){|acc,y|
File without changes
data/lib/tdp/xml.rb ADDED
@@ -0,0 +1,184 @@
1
+ require 'tdp'
2
+ require 'rexml/parsers/pullparser'
3
+ require 'rexml/document'
4
+
5
+ module TDPXML
6
+ module XMLParser
7
+ class XMLTokenGenerator < TDParser::TokenGenerator
8
+ def initialize(src)
9
+ @xparser = REXML::Parsers::BaseParser.new(src)
10
+ super(){|g|
11
+ while(@xparser.has_next?)
12
+ e = @xparser.pull()
13
+ g.yield(e)
14
+ end
15
+ }
16
+ end
17
+ end
18
+
19
+ class XArray < Array
20
+ def ===(ary)
21
+ if super(ary)
22
+ return true
23
+ end
24
+ if !ary.is_a?(Array)
25
+ return false
26
+ end
27
+ each_with_index{|v,idx|
28
+ case ary[idx]
29
+ when v
30
+ else
31
+ return false
32
+ end
33
+ }
34
+ true
35
+ end
36
+ end
37
+
38
+ class XHash < Hash
39
+ def ===(h)
40
+ if super(h)
41
+ return true
42
+ end
43
+ if !h.is_a?(Hash)
44
+ return false
45
+ end
46
+ each{|k,v|
47
+ case h[k]
48
+ when v
49
+ else
50
+ return false
51
+ end
52
+ }
53
+ true
54
+ end
55
+ end
56
+
57
+ def start_element(name=String)
58
+ token(XArray[:start_element, name, Hash])
59
+ end
60
+
61
+ def end_element(name=String)
62
+ token(XArray[:end_element, name])
63
+ end
64
+
65
+ def element(elem=String, &inner)
66
+ if inner
67
+ crule = inner.call()|empty()
68
+ else
69
+ crule = empty()
70
+ end
71
+ start_element(elem) - crule - end_element(elem) >> Proc.new{|x|
72
+ name = x[0][1]
73
+ attrs = x[0][2]
74
+ node = REXML::Element.new()
75
+ node.name = name
76
+ node.attributes.merge!(attrs)
77
+ [node,x[1]]
78
+ }
79
+ end
80
+
81
+ def text(match=String)
82
+ token(XArray[:text, match]) >> Proc.new{|x|
83
+ REXML::Text.new(x[0][1])
84
+ }
85
+ end
86
+
87
+ def pi()
88
+ token(XArray[:processing_instruction, String, String]) >> Proc.new{|x|
89
+ REXML::Instruction.new(x[0][1],x[0][2])
90
+ }
91
+ end
92
+
93
+ def cdata(match=String)
94
+ token(XArray[:cdata, match]) >> Proc.new{|x|
95
+ REXML::CData.new(x[0][1])
96
+ }
97
+ end
98
+
99
+ def comment(match=String)
100
+ token(XArray[:comment, match]) >> Proc.new{|x|
101
+ REXML::Comment.new(x[0][1])
102
+ }
103
+ end
104
+
105
+ def xmldecl()
106
+ token(XArray[:xmldecl]) >> Proc.new{|x|
107
+ REXML::XMLDecl.new(x[0][1],x[0][2], x[0][3])
108
+ }
109
+ end
110
+
111
+ def start_doctype(name=String)
112
+ token(XArray[:start_doctype, name])
113
+ end
114
+
115
+ def end_doctype()
116
+ token(XArray[:end_doctype])
117
+ end
118
+
119
+ def doctype(name=String, &inner)
120
+ if (inner)
121
+ crule = inner.call()|empty()
122
+ else
123
+ crule = empty()
124
+ end
125
+ start_doctype(name) - crule - end_doctype() >> Proc.new{|x|
126
+ node = REXML::DocType.new(x[0][1..-1])
127
+ [node, x[1]]
128
+ }
129
+ end
130
+
131
+ def externalentity(entity=String)
132
+ token(XArray[:externalentity, entity]) >> Proc.new{|x|
133
+ REXML::ExternalEntity.new(x[0][1])
134
+ }
135
+ end
136
+
137
+ def elementdecl(elem=String)
138
+ token(XArray[:elementdecl, elem]) >> Proc.new{|x|
139
+ REXML::ElementDecl.new(x[0][1])
140
+ }
141
+ end
142
+
143
+ def entitydecl(entity=String)
144
+ token(XArray[:entitydecl, elem]) >> Proc.new{|x|
145
+ REXML::Entity.new(x[0])
146
+ }
147
+ end
148
+
149
+ def attlistdecl(decl=String)
150
+ token(XArray[:attlistdecl]) >> Proc.new{|x|
151
+ REXML::AttlistDecl.new(x[0][1..-1])
152
+ }
153
+ end
154
+
155
+ def notationdecl(decl=String)
156
+ token(XArray[:notationdecl]) >> Proc.new{|x|
157
+ REXML::NotationDecl.new(*x[0][1..-1])
158
+ }
159
+ end
160
+
161
+ def any_node(&b)
162
+ (element(&b) | doctype(&b) | text() | pi() | cdata() |
163
+ comment() | xmldecl() | externalentity() | elementdecl() |
164
+ entitydecl() | attlistdecl() | notationdecl()) >> Proc.new{|x| x[2]}
165
+ end
166
+
167
+ def dom_constructor(&act)
168
+ Proc.new{|x|
169
+ node = x[0][0]
170
+ child = x[0][1]
171
+ if (child.is_a?(Array))
172
+ child.each{|c| node.add(c) }
173
+ else
174
+ node.add(child)
175
+ end
176
+ if (act)
177
+ act[node]
178
+ else
179
+ node
180
+ end
181
+ }
182
+ end
183
+ end
184
+ end
data/samples/sample2.rb CHANGED
@@ -2,7 +2,7 @@
2
2
  # parsing four arithmetic expressions with tdputils.
3
3
 
4
4
  require 'tdp'
5
- require 'tdputils'
5
+ require 'tdp/utils'
6
6
 
7
7
  class MyParser
8
8
  include TDParser
data/samples/sample3.rb CHANGED
@@ -2,7 +2,7 @@
2
2
  # parsing four arithmetic expressions with tdputils.
3
3
 
4
4
  require 'tdp'
5
- require 'tdputils'
5
+ require 'tdp/utils'
6
6
 
7
7
  class MyParser
8
8
  include TDParser
data/samples/sample4.rb CHANGED
@@ -2,7 +2,7 @@
2
2
  # caching constructed grammars
3
3
 
4
4
  require 'tdp'
5
- require 'tdputils'
5
+ require 'tdp/utils'
6
6
 
7
7
  class MyParser
8
8
  include TDParser
data/samples/sample5.rb CHANGED
@@ -2,7 +2,7 @@
2
2
  # writing grammars in the substitution style.
3
3
 
4
4
  require 'tdp'
5
- require 'tdputils'
5
+ require 'tdp/utils'
6
6
 
7
7
  parser = TDParser.define{|g|
8
8
  g.plus = "+"
@@ -2,7 +2,7 @@
2
2
  # writing grammars using chainl().
3
3
 
4
4
  require 'tdp'
5
- require 'tdputils'
5
+ require 'tdp/utils'
6
6
 
7
7
  parser = TDParser.define{|g|
8
8
  g.plus = "+"
@@ -33,13 +33,14 @@ puts(parser.rule1.to_s)
33
33
  puts(parser.rule2.to_s)
34
34
  puts(parser.rule3.to_s)
35
35
 
36
+ N = 10
36
37
  Benchmark.bm{|x|
37
38
  buff = ["1","2"]
38
39
  b = ["b"]
39
40
  for i in [5,10,15]
40
41
  puts("--")
41
- x.report{ $r1 = parser.rule1.parse(buff*i + b*i) }
42
- x.report{ $r2 = parser.rule2.parse(buff*i + b*i) }
43
- x.report{ $r3 = parser.rule3.parse(buff*i + b*i) }
42
+ x.report{ N.times{ $r1 = parser.rule1.parse(buff*i + b*i) } }
43
+ x.report{ N.times{ $r2 = parser.rule2.parse(buff*i + b*i) } }
44
+ x.report{ N.times{ $r3 = parser.rule3.parse(buff*i + b*i) } }
44
45
  end
45
46
  }
@@ -1,123 +1,52 @@
1
1
  require 'tdp'
2
- require 'tdputils'
3
- require 'rexml/parsers/pullparser'
4
- require 'rexml/document'
5
-
6
- class Array
7
- def ===(ary)
8
- if super(ary)
9
- return true
10
- end
11
- if !ary.is_a?(Array)
12
- return false
13
- end
14
- each_with_index{|v,idx|
15
- case ary[idx]
16
- when v
17
- else
18
- return false
19
- end
20
- }
21
- true
22
- end
23
- end
24
-
25
- class Hash
26
- def ===(h)
27
- if super(h)
28
- return true
29
- end
30
- if !h.is_a?(Hash)
31
- return false
32
- end
33
- each{|k,v|
34
- case h[k]
35
- when v
36
- else
37
- return false
38
- end
39
- }
40
- true
41
- end
42
- end
43
-
44
- module XMLParser
45
- def xml_stag(name)
46
- token([:start_element, name, Hash])
47
- end
48
- alias stag xml_stag
49
-
50
- def xml_etag(name)
51
- token([:end_element, name])
52
- end
53
- alias etag xml_etag
54
-
55
- def dom_element(elem, &inner)
56
- stag(elem) - (inner.call()|empty()) - etag(elem)
57
- end
58
- alias element dom_element
59
-
60
- def dom_filter(&act)
61
- Proc.new{|x|
62
- name = x[0][1]
63
- attrs = x[0][2]
64
- node = REXML::Element.new()
65
- node.name = name
66
- node.attributes.merge!(attrs)
67
- act[node,x[1]]
68
- }
69
- end
70
- alias filter dom_filter
71
-
72
- def dom_construct(&act)
73
- dom_filter{|node,child|
74
- if (child.is_a?(Array))
75
- child.each{|c| node.add(c) }
76
- else
77
- node.add(child)
78
- end
79
- if (act)
80
- act[node]
81
- else
82
- node
83
- end
84
- }
85
- end
86
- alias construct dom_construct
87
- end
2
+ require 'tdp/utils'
3
+ require 'tdp/xml'
88
4
 
89
5
  translator = TDParser.define{|g|
90
- extend XMLParser
6
+ extend TDPXML::XMLParser
91
7
 
92
8
  g.xml =
93
9
  element("a"){
94
10
  element("b"){
95
- g.xml*0 >> Proc.new{|x| x[0].collect{|y| y[0]} }
96
- } >> construct{|node| node.name = "bar"; node }
97
- } >> construct{|node| node.name = "foo"; node } |
11
+ g.xmlseq
12
+ } >> dom_constructor{|node| node.children() }
13
+ } >> dom_constructor{|node| node.name = "AB"; node } |
98
14
  element(String){
99
- g.xml*0 >> Proc.new{|x| x[0].collect{|y| y[0]} }
100
- } >> construct{|node|
101
- node.name = node.name.upcase()
102
- node
103
- } |
104
- ~etag(String) - any() - g.xml >> Proc.new{|x| x[2]}
15
+ g.xmlseq
16
+ } >> dom_constructor{|node|
17
+ node.name = node.name.upcase()
18
+ node
19
+ } |
20
+ doctype{
21
+ g.xmlseq
22
+ } >> dom_constructor{|node| node} |
23
+ text >> Proc.new{|x| x[0]} |
24
+ elementdecl >> Proc.new{|x| x[0]} |
25
+ xmldecl >> Proc.new{|x| x[0]} |
26
+ comment >> Proc.new{|x| x[0]} |
27
+ any_node() >> Proc.new{|x| x[0] }
28
+
29
+ g.xmlseq =
30
+ g.xml()*0 >> Proc.new{|x| x[0].collect{|y| y[0]}} |
105
31
 
106
32
  def translate(src)
107
- xparser = REXML::Parsers::BaseParser.new(src)
108
- xml.parse{|g|
109
- while(xparser.has_next?)
110
- g.yield(xparser.pull())
111
- end
112
- }
33
+ gen = TDPXML::XMLParser::XMLTokenGenerator.new(src)
34
+ xmlseq.parse(gen)
113
35
  end
114
36
  }
115
37
 
116
- puts(translator.translate(<<EOS))
38
+ seq = translator.translate(<<EOS)
117
39
  <?xml version="1.0" ?>
40
+ <!DOCTYPE body [
41
+ <!ELEMENT body (#PCDATA, strong*)>
42
+ <!ELEMENT strong (#PCDATA)>
43
+ ]>
118
44
  <list>
45
+ <!-- comment -->
119
46
  <a><b><c>hoge</c></b></a>
120
47
  <b>b?</b>
121
48
  </list>
122
49
  EOS
123
- # => "<LIST><foo><bar><C></C></bar></foo><B></B></LIST>"
50
+ doc = REXML::Document.new()
51
+ seq.each{|x| doc.add(x) }
52
+ puts(doc)
data/test/test_tdp.rb CHANGED
@@ -1,6 +1,7 @@
1
1
  require 'test/unit'
2
2
  require 'tdp'
3
- require 'tdputils'
3
+ require 'tdp/utils'
4
+ require 'tdp/xml'
4
5
 
5
6
  class Tokens
6
7
  include Enumerable
@@ -367,6 +368,14 @@ class TestTDParser < Test::Unit::TestCase
367
368
  assert_equal(["a","b",[["a","b"],["a","b"]]], rule.parse(buff))
368
369
  end
369
370
 
371
+ def test_backref4()
372
+ rule = (token(/\w/) - token(/\w/))/:x - (token("-")|backref(:x))*0 >> proc{|x| x}
373
+ assert_equal(["a","b",[["a","b"],["a","b"]]],
374
+ rule.parse(["a","b","a","b","a","b"]))
375
+ assert_equal(["a","b",[["-"],["a","b"]]],
376
+ rule.parse(["a","b","-","a","b"]))
377
+ end
378
+
370
379
  def test_stackref1()
371
380
  buff = ["a","b","a"]
372
381
  stack = []
@@ -456,6 +465,25 @@ class TestTDParser < Test::Unit::TestCase
456
465
  assert_equal(0, rule.parse(buff))
457
466
  end
458
467
 
468
+ def test_condition1()
469
+ rule = condition{|m|m["n"]=20} - condition{|m|m["n"]} >> Proc.new{|x| x}
470
+ assert_equal([20,20], rule.parse([]))
471
+ end
472
+
473
+ def test_condition2()
474
+ rule = condition{|m|m["n"]=20} - condition{|m|m["n"]>20} >> Proc.new{|x| x}
475
+ assert_equal(nil, rule.parse([]))
476
+ end
477
+
478
+ def test_condition3()
479
+ rule =
480
+ condition{|m|m["n"]=20} -
481
+ (token("a") - condition{|m|m["n"]>20} |
482
+ token("b") - condition{|m|m["n"]>10}) >> Proc.new{|x| x}
483
+ assert_equal(nil, rule.parse(["a"]))
484
+ assert_equal([20,"b",true], rule.parse(["b"]))
485
+ end
486
+
459
487
  def test_rule1()
460
488
  expr = "1 + 2"
461
489
  assert_equal(3, @calc.parse(expr))
metadata CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.9.0
3
3
  specification_version: 1
4
4
  name: tdp4r
5
5
  version: !ruby/object:Gem::Version
6
- version: 1.4.0
7
- date: 2006-07-22 00:00:00 +09:00
6
+ version: 1.4.1
7
+ date: 2006-07-23 00:00:00 +09:00
8
8
  summary: TDP4R is a top-down parser library that consists of parser combinators and utility functions.
9
9
  require_paths:
10
10
  - lib
@@ -29,8 +29,10 @@ post_install_message:
29
29
  authors:
30
30
  - Takaaki Tateishi
31
31
  files:
32
+ - lib/tdp
32
33
  - lib/tdp.rb
33
- - lib/tdputils.rb
34
+ - lib/tdp/utils.rb
35
+ - lib/tdp/xml.rb
34
36
  - samples/sample1.rb
35
37
  - samples/sample2.rb
36
38
  - samples/sample3.rb