tdp4r 1.4.0 → 1.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/doc/guide.txt CHANGED
@@ -146,5 +146,5 @@ Parser Combinators
146
146
  StringTokenizer
147
147
  -----------------
148
148
  There is a simple tokenizer called TDPUtils::StringTokenizer in the library
149
- "tdputils".
149
+ "tdp/utils".
150
150
  (See MyParser#parse in sample2.rb)
data/lib/tdp.rb CHANGED
@@ -88,7 +88,6 @@ module TDParser
88
88
 
89
89
  def recover(buff, ts)
90
90
  buff.each{|b| ts.unshift(b)}
91
- buff.clear()
92
91
  end
93
92
  end
94
93
  include BufferUtils
@@ -169,7 +168,8 @@ module TDParser
169
168
  NegativeParser.new(self)
170
169
  end
171
170
 
172
- def parse(tokens=nil, &blk)
171
+ def parse(tokens=nil, buff=nil, &blk)
172
+ buff ||= TokenBuffer.new()
173
173
  if( blk.nil? )
174
174
  if( tokens.respond_to?(:shift) && tokens.respond_to?(:unshift) )
175
175
  @tokens = tokens
@@ -181,7 +181,7 @@ module TDParser
181
181
  else
182
182
  @tokens = TokenGenerator.new(&blk)
183
183
  end
184
- r = call(@tokens, TokenBuffer.new())
184
+ r = call(@tokens, buff)
185
185
  if( r.nil? )
186
186
  nil
187
187
  else
@@ -685,15 +685,15 @@ module TDParser
685
685
  end
686
686
 
687
687
  class BackrefParser < ReferenceParser
688
- attr_reader :reference, :equality
688
+ attr_reader :label, :equality
689
689
 
690
- def initialize(ref, eqsym)
691
- @reference = ref
690
+ def initialize(label, eqsym)
691
+ @label = label
692
692
  @equality = eqsym
693
693
  end
694
694
 
695
695
  def call(tokens, buff)
696
- ys = buff.map[@reference]
696
+ ys = buff.map[@label]
697
697
  if (ys.nil? || ys.empty?)
698
698
  nil
699
699
  else
@@ -702,12 +702,12 @@ module TDParser
702
702
  end
703
703
 
704
704
  def to_s()
705
- "<backref:#{@reference}>"
705
+ "<backref:#{@label}>"
706
706
  end
707
707
 
708
708
  def ==(r)
709
709
  super(r) &&
710
- (@reference == r.reference) &&
710
+ (@label == r.label) &&
711
711
  (@equality == r.equality)
712
712
  end
713
713
  end
@@ -735,11 +735,40 @@ module TDParser
735
735
 
736
736
  def ==(r)
737
737
  super(r) &&
738
- (@stack == r.stack) &&
738
+ (@stack.object_id == r.stack.object_id) &&
739
739
  (@equality == r.equality)
740
740
  end
741
741
  end
742
742
 
743
+ class ConditionParser < Parser
744
+ attr_reader :condition
745
+
746
+ def initialize(&condition)
747
+ @condition = condition
748
+ end
749
+
750
+ def call(tokens, buff)
751
+ if (res = @condition.call(buff.map))
752
+ Sequence[res]
753
+ else
754
+ nil
755
+ end
756
+ end
757
+
758
+ def to_s()
759
+ "<condition:#{@condition}>"
760
+ end
761
+
762
+ def ==(r)
763
+ super(r) &&
764
+ (@condition == r.condition)
765
+ end
766
+
767
+ def same?(r)
768
+ false
769
+ end
770
+ end
771
+
743
772
  class StateParser < Parser
744
773
  attr_reader :state
745
774
 
@@ -809,6 +838,11 @@ module TDParser
809
838
  end
810
839
  alias fail fail_rule
811
840
 
841
+ def condition_rule(&b)
842
+ ConditionParser.new(&b)
843
+ end
844
+ alias condition condition_rule
845
+
812
846
  def leftrec(*rules, &act)
813
847
  f = Proc.new{|x|
814
848
  x[1].inject(x[0]){|acc,y|
File without changes
data/lib/tdp/xml.rb ADDED
@@ -0,0 +1,184 @@
1
+ require 'tdp'
2
+ require 'rexml/parsers/pullparser'
3
+ require 'rexml/document'
4
+
5
+ module TDPXML
6
+ module XMLParser
7
+ class XMLTokenGenerator < TDParser::TokenGenerator
8
+ def initialize(src)
9
+ @xparser = REXML::Parsers::BaseParser.new(src)
10
+ super(){|g|
11
+ while(@xparser.has_next?)
12
+ e = @xparser.pull()
13
+ g.yield(e)
14
+ end
15
+ }
16
+ end
17
+ end
18
+
19
+ class XArray < Array
20
+ def ===(ary)
21
+ if super(ary)
22
+ return true
23
+ end
24
+ if !ary.is_a?(Array)
25
+ return false
26
+ end
27
+ each_with_index{|v,idx|
28
+ case ary[idx]
29
+ when v
30
+ else
31
+ return false
32
+ end
33
+ }
34
+ true
35
+ end
36
+ end
37
+
38
+ class XHash < Hash
39
+ def ===(h)
40
+ if super(h)
41
+ return true
42
+ end
43
+ if !h.is_a?(Hash)
44
+ return false
45
+ end
46
+ each{|k,v|
47
+ case h[k]
48
+ when v
49
+ else
50
+ return false
51
+ end
52
+ }
53
+ true
54
+ end
55
+ end
56
+
57
+ def start_element(name=String)
58
+ token(XArray[:start_element, name, Hash])
59
+ end
60
+
61
+ def end_element(name=String)
62
+ token(XArray[:end_element, name])
63
+ end
64
+
65
+ def element(elem=String, &inner)
66
+ if inner
67
+ crule = inner.call()|empty()
68
+ else
69
+ crule = empty()
70
+ end
71
+ start_element(elem) - crule - end_element(elem) >> Proc.new{|x|
72
+ name = x[0][1]
73
+ attrs = x[0][2]
74
+ node = REXML::Element.new()
75
+ node.name = name
76
+ node.attributes.merge!(attrs)
77
+ [node,x[1]]
78
+ }
79
+ end
80
+
81
+ def text(match=String)
82
+ token(XArray[:text, match]) >> Proc.new{|x|
83
+ REXML::Text.new(x[0][1])
84
+ }
85
+ end
86
+
87
+ def pi()
88
+ token(XArray[:processing_instruction, String, String]) >> Proc.new{|x|
89
+ REXML::Instruction.new(x[0][1],x[0][2])
90
+ }
91
+ end
92
+
93
+ def cdata(match=String)
94
+ token(XArray[:cdata, match]) >> Proc.new{|x|
95
+ REXML::CData.new(x[0][1])
96
+ }
97
+ end
98
+
99
+ def comment(match=String)
100
+ token(XArray[:comment, match]) >> Proc.new{|x|
101
+ REXML::Comment.new(x[0][1])
102
+ }
103
+ end
104
+
105
+ def xmldecl()
106
+ token(XArray[:xmldecl]) >> Proc.new{|x|
107
+ REXML::XMLDecl.new(x[0][1],x[0][2], x[0][3])
108
+ }
109
+ end
110
+
111
+ def start_doctype(name=String)
112
+ token(XArray[:start_doctype, name])
113
+ end
114
+
115
+ def end_doctype()
116
+ token(XArray[:end_doctype])
117
+ end
118
+
119
+ def doctype(name=String, &inner)
120
+ if (inner)
121
+ crule = inner.call()|empty()
122
+ else
123
+ crule = empty()
124
+ end
125
+ start_doctype(name) - crule - end_doctype() >> Proc.new{|x|
126
+ node = REXML::DocType.new(x[0][1..-1])
127
+ [node, x[1]]
128
+ }
129
+ end
130
+
131
+ def externalentity(entity=String)
132
+ token(XArray[:externalentity, entity]) >> Proc.new{|x|
133
+ REXML::ExternalEntity.new(x[0][1])
134
+ }
135
+ end
136
+
137
+ def elementdecl(elem=String)
138
+ token(XArray[:elementdecl, elem]) >> Proc.new{|x|
139
+ REXML::ElementDecl.new(x[0][1])
140
+ }
141
+ end
142
+
143
+ def entitydecl(entity=String)
144
+ token(XArray[:entitydecl, elem]) >> Proc.new{|x|
145
+ REXML::Entity.new(x[0])
146
+ }
147
+ end
148
+
149
+ def attlistdecl(decl=String)
150
+ token(XArray[:attlistdecl]) >> Proc.new{|x|
151
+ REXML::AttlistDecl.new(x[0][1..-1])
152
+ }
153
+ end
154
+
155
+ def notationdecl(decl=String)
156
+ token(XArray[:notationdecl]) >> Proc.new{|x|
157
+ REXML::NotationDecl.new(*x[0][1..-1])
158
+ }
159
+ end
160
+
161
+ def any_node(&b)
162
+ (element(&b) | doctype(&b) | text() | pi() | cdata() |
163
+ comment() | xmldecl() | externalentity() | elementdecl() |
164
+ entitydecl() | attlistdecl() | notationdecl()) >> Proc.new{|x| x[2]}
165
+ end
166
+
167
+ def dom_constructor(&act)
168
+ Proc.new{|x|
169
+ node = x[0][0]
170
+ child = x[0][1]
171
+ if (child.is_a?(Array))
172
+ child.each{|c| node.add(c) }
173
+ else
174
+ node.add(child)
175
+ end
176
+ if (act)
177
+ act[node]
178
+ else
179
+ node
180
+ end
181
+ }
182
+ end
183
+ end
184
+ end
data/samples/sample2.rb CHANGED
@@ -2,7 +2,7 @@
2
2
  # parsing four arithmetic expressions with tdputils.
3
3
 
4
4
  require 'tdp'
5
- require 'tdputils'
5
+ require 'tdp/utils'
6
6
 
7
7
  class MyParser
8
8
  include TDParser
data/samples/sample3.rb CHANGED
@@ -2,7 +2,7 @@
2
2
  # parsing four arithmetic expressions with tdputils.
3
3
 
4
4
  require 'tdp'
5
- require 'tdputils'
5
+ require 'tdp/utils'
6
6
 
7
7
  class MyParser
8
8
  include TDParser
data/samples/sample4.rb CHANGED
@@ -2,7 +2,7 @@
2
2
  # caching constructed grammars
3
3
 
4
4
  require 'tdp'
5
- require 'tdputils'
5
+ require 'tdp/utils'
6
6
 
7
7
  class MyParser
8
8
  include TDParser
data/samples/sample5.rb CHANGED
@@ -2,7 +2,7 @@
2
2
  # writing grammars in the substitution style.
3
3
 
4
4
  require 'tdp'
5
- require 'tdputils'
5
+ require 'tdp/utils'
6
6
 
7
7
  parser = TDParser.define{|g|
8
8
  g.plus = "+"
@@ -2,7 +2,7 @@
2
2
  # writing grammars using chainl().
3
3
 
4
4
  require 'tdp'
5
- require 'tdputils'
5
+ require 'tdp/utils'
6
6
 
7
7
  parser = TDParser.define{|g|
8
8
  g.plus = "+"
@@ -33,13 +33,14 @@ puts(parser.rule1.to_s)
33
33
  puts(parser.rule2.to_s)
34
34
  puts(parser.rule3.to_s)
35
35
 
36
+ N = 10
36
37
  Benchmark.bm{|x|
37
38
  buff = ["1","2"]
38
39
  b = ["b"]
39
40
  for i in [5,10,15]
40
41
  puts("--")
41
- x.report{ $r1 = parser.rule1.parse(buff*i + b*i) }
42
- x.report{ $r2 = parser.rule2.parse(buff*i + b*i) }
43
- x.report{ $r3 = parser.rule3.parse(buff*i + b*i) }
42
+ x.report{ N.times{ $r1 = parser.rule1.parse(buff*i + b*i) } }
43
+ x.report{ N.times{ $r2 = parser.rule2.parse(buff*i + b*i) } }
44
+ x.report{ N.times{ $r3 = parser.rule3.parse(buff*i + b*i) } }
44
45
  end
45
46
  }
@@ -1,123 +1,52 @@
1
1
  require 'tdp'
2
- require 'tdputils'
3
- require 'rexml/parsers/pullparser'
4
- require 'rexml/document'
5
-
6
- class Array
7
- def ===(ary)
8
- if super(ary)
9
- return true
10
- end
11
- if !ary.is_a?(Array)
12
- return false
13
- end
14
- each_with_index{|v,idx|
15
- case ary[idx]
16
- when v
17
- else
18
- return false
19
- end
20
- }
21
- true
22
- end
23
- end
24
-
25
- class Hash
26
- def ===(h)
27
- if super(h)
28
- return true
29
- end
30
- if !h.is_a?(Hash)
31
- return false
32
- end
33
- each{|k,v|
34
- case h[k]
35
- when v
36
- else
37
- return false
38
- end
39
- }
40
- true
41
- end
42
- end
43
-
44
- module XMLParser
45
- def xml_stag(name)
46
- token([:start_element, name, Hash])
47
- end
48
- alias stag xml_stag
49
-
50
- def xml_etag(name)
51
- token([:end_element, name])
52
- end
53
- alias etag xml_etag
54
-
55
- def dom_element(elem, &inner)
56
- stag(elem) - (inner.call()|empty()) - etag(elem)
57
- end
58
- alias element dom_element
59
-
60
- def dom_filter(&act)
61
- Proc.new{|x|
62
- name = x[0][1]
63
- attrs = x[0][2]
64
- node = REXML::Element.new()
65
- node.name = name
66
- node.attributes.merge!(attrs)
67
- act[node,x[1]]
68
- }
69
- end
70
- alias filter dom_filter
71
-
72
- def dom_construct(&act)
73
- dom_filter{|node,child|
74
- if (child.is_a?(Array))
75
- child.each{|c| node.add(c) }
76
- else
77
- node.add(child)
78
- end
79
- if (act)
80
- act[node]
81
- else
82
- node
83
- end
84
- }
85
- end
86
- alias construct dom_construct
87
- end
2
+ require 'tdp/utils'
3
+ require 'tdp/xml'
88
4
 
89
5
  translator = TDParser.define{|g|
90
- extend XMLParser
6
+ extend TDPXML::XMLParser
91
7
 
92
8
  g.xml =
93
9
  element("a"){
94
10
  element("b"){
95
- g.xml*0 >> Proc.new{|x| x[0].collect{|y| y[0]} }
96
- } >> construct{|node| node.name = "bar"; node }
97
- } >> construct{|node| node.name = "foo"; node } |
11
+ g.xmlseq
12
+ } >> dom_constructor{|node| node.children() }
13
+ } >> dom_constructor{|node| node.name = "AB"; node } |
98
14
  element(String){
99
- g.xml*0 >> Proc.new{|x| x[0].collect{|y| y[0]} }
100
- } >> construct{|node|
101
- node.name = node.name.upcase()
102
- node
103
- } |
104
- ~etag(String) - any() - g.xml >> Proc.new{|x| x[2]}
15
+ g.xmlseq
16
+ } >> dom_constructor{|node|
17
+ node.name = node.name.upcase()
18
+ node
19
+ } |
20
+ doctype{
21
+ g.xmlseq
22
+ } >> dom_constructor{|node| node} |
23
+ text >> Proc.new{|x| x[0]} |
24
+ elementdecl >> Proc.new{|x| x[0]} |
25
+ xmldecl >> Proc.new{|x| x[0]} |
26
+ comment >> Proc.new{|x| x[0]} |
27
+ any_node() >> Proc.new{|x| x[0] }
28
+
29
+ g.xmlseq =
30
+ g.xml()*0 >> Proc.new{|x| x[0].collect{|y| y[0]}} |
105
31
 
106
32
  def translate(src)
107
- xparser = REXML::Parsers::BaseParser.new(src)
108
- xml.parse{|g|
109
- while(xparser.has_next?)
110
- g.yield(xparser.pull())
111
- end
112
- }
33
+ gen = TDPXML::XMLParser::XMLTokenGenerator.new(src)
34
+ xmlseq.parse(gen)
113
35
  end
114
36
  }
115
37
 
116
- puts(translator.translate(<<EOS))
38
+ seq = translator.translate(<<EOS)
117
39
  <?xml version="1.0" ?>
40
+ <!DOCTYPE body [
41
+ <!ELEMENT body (#PCDATA, strong*)>
42
+ <!ELEMENT strong (#PCDATA)>
43
+ ]>
118
44
  <list>
45
+ <!-- comment -->
119
46
  <a><b><c>hoge</c></b></a>
120
47
  <b>b?</b>
121
48
  </list>
122
49
  EOS
123
- # => "<LIST><foo><bar><C></C></bar></foo><B></B></LIST>"
50
+ doc = REXML::Document.new()
51
+ seq.each{|x| doc.add(x) }
52
+ puts(doc)
data/test/test_tdp.rb CHANGED
@@ -1,6 +1,7 @@
1
1
  require 'test/unit'
2
2
  require 'tdp'
3
- require 'tdputils'
3
+ require 'tdp/utils'
4
+ require 'tdp/xml'
4
5
 
5
6
  class Tokens
6
7
  include Enumerable
@@ -367,6 +368,14 @@ class TestTDParser < Test::Unit::TestCase
367
368
  assert_equal(["a","b",[["a","b"],["a","b"]]], rule.parse(buff))
368
369
  end
369
370
 
371
+ def test_backref4()
372
+ rule = (token(/\w/) - token(/\w/))/:x - (token("-")|backref(:x))*0 >> proc{|x| x}
373
+ assert_equal(["a","b",[["a","b"],["a","b"]]],
374
+ rule.parse(["a","b","a","b","a","b"]))
375
+ assert_equal(["a","b",[["-"],["a","b"]]],
376
+ rule.parse(["a","b","-","a","b"]))
377
+ end
378
+
370
379
  def test_stackref1()
371
380
  buff = ["a","b","a"]
372
381
  stack = []
@@ -456,6 +465,25 @@ class TestTDParser < Test::Unit::TestCase
456
465
  assert_equal(0, rule.parse(buff))
457
466
  end
458
467
 
468
+ def test_condition1()
469
+ rule = condition{|m|m["n"]=20} - condition{|m|m["n"]} >> Proc.new{|x| x}
470
+ assert_equal([20,20], rule.parse([]))
471
+ end
472
+
473
+ def test_condition2()
474
+ rule = condition{|m|m["n"]=20} - condition{|m|m["n"]>20} >> Proc.new{|x| x}
475
+ assert_equal(nil, rule.parse([]))
476
+ end
477
+
478
+ def test_condition3()
479
+ rule =
480
+ condition{|m|m["n"]=20} -
481
+ (token("a") - condition{|m|m["n"]>20} |
482
+ token("b") - condition{|m|m["n"]>10}) >> Proc.new{|x| x}
483
+ assert_equal(nil, rule.parse(["a"]))
484
+ assert_equal([20,"b",true], rule.parse(["b"]))
485
+ end
486
+
459
487
  def test_rule1()
460
488
  expr = "1 + 2"
461
489
  assert_equal(3, @calc.parse(expr))
metadata CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.9.0
3
3
  specification_version: 1
4
4
  name: tdp4r
5
5
  version: !ruby/object:Gem::Version
6
- version: 1.4.0
7
- date: 2006-07-22 00:00:00 +09:00
6
+ version: 1.4.1
7
+ date: 2006-07-23 00:00:00 +09:00
8
8
  summary: TDP4R is a top-down parser library that consists of parser combinators and utility functions.
9
9
  require_paths:
10
10
  - lib
@@ -29,8 +29,10 @@ post_install_message:
29
29
  authors:
30
30
  - Takaaki Tateishi
31
31
  files:
32
+ - lib/tdp
32
33
  - lib/tdp.rb
33
- - lib/tdputils.rb
34
+ - lib/tdp/utils.rb
35
+ - lib/tdp/xml.rb
34
36
  - samples/sample1.rb
35
37
  - samples/sample2.rb
36
38
  - samples/sample3.rb