tdp4r 1.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/doc/faq.txt +37 -0
- data/doc/guide.txt +150 -0
- data/lib/tdp.rb +463 -0
- data/lib/tdputils.rb +89 -0
- data/samples/sample1.rb +32 -0
- data/samples/sample2.rb +55 -0
- data/samples/sample3.rb +57 -0
- data/samples/sample4.rb +75 -0
- data/samples/sample5.rb +53 -0
- data/samples/sample6.rb +44 -0
- data/samples/sample7.rb +79 -0
- data/samples/sample_list.rb +21 -0
- data/test/test_tdp.rb +472 -0
- metadata +58 -0
data/lib/tdputils.rb
ADDED
@@ -0,0 +1,89 @@
|
|
1
|
+
require 'tdp'
|
2
|
+
|
3
|
+
module TDPUtils
|
4
|
+
class Token
|
5
|
+
attr_accessor :kind, :value
|
6
|
+
def initialize(kind, value)
|
7
|
+
@kind = kind
|
8
|
+
@value = value
|
9
|
+
end
|
10
|
+
|
11
|
+
def ==(other)
|
12
|
+
(other.class == self.class) &&
|
13
|
+
(@kind == other.kind) &&
|
14
|
+
(@value == other.value)
|
15
|
+
end
|
16
|
+
|
17
|
+
def ===(other)
|
18
|
+
super(other) || (@kind == other)
|
19
|
+
end
|
20
|
+
|
21
|
+
def =~(other)
|
22
|
+
@kind == other
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
class BasicStringTokenizer
|
27
|
+
def self.[](rule, ignore=nil)
|
28
|
+
self.new(rule, ignore)
|
29
|
+
end
|
30
|
+
|
31
|
+
def initialize(rule, ignore=nil)
|
32
|
+
require("strscan")
|
33
|
+
@rule = rule
|
34
|
+
@scan_pattern = Regexp.new(@rule.keys.join("|"))
|
35
|
+
@ignore_pattern = ignore
|
36
|
+
end
|
37
|
+
|
38
|
+
def generate(str)
|
39
|
+
scanner = StringScanner.new(str)
|
40
|
+
TDParser::TokenGenerator.new{|x|
|
41
|
+
while(!scanner.empty?)
|
42
|
+
if (@ignore_pattern)
|
43
|
+
while(scanner.scan(@ignore_pattern))
|
44
|
+
end
|
45
|
+
end
|
46
|
+
sstr = scanner.scan(@scan_pattern)
|
47
|
+
if (sstr)
|
48
|
+
@rule.each{|reg,kind|
|
49
|
+
if (reg =~ sstr)
|
50
|
+
x.yield(Token.new(kind, sstr))
|
51
|
+
yielded = true
|
52
|
+
break
|
53
|
+
end
|
54
|
+
}
|
55
|
+
else
|
56
|
+
c = scanner.scan(/./)
|
57
|
+
x.yield(c)
|
58
|
+
end
|
59
|
+
end
|
60
|
+
}
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
class StringTokenizer < BasicStringTokenizer
|
65
|
+
def initialize(rule, ignore=nil)
|
66
|
+
super(rule, ignore || /\s+/)
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
class WaitingTokenGenerator < TDParser::TokenGenerator
|
71
|
+
def initialize(*args)
|
72
|
+
super(*args)
|
73
|
+
@terminated = false
|
74
|
+
end
|
75
|
+
|
76
|
+
def terminate()
|
77
|
+
@terminated = true
|
78
|
+
end
|
79
|
+
|
80
|
+
def shift()
|
81
|
+
if (@terminated)
|
82
|
+
return nil
|
83
|
+
end
|
84
|
+
while(empty?())
|
85
|
+
end
|
86
|
+
super()
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
data/samples/sample1.rb
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
# -*- ruby -*-
|
2
|
+
# adder-substractor
|
3
|
+
|
4
|
+
require 'tdp'
|
5
|
+
|
6
|
+
class MyParser
|
7
|
+
include TDParser
|
8
|
+
|
9
|
+
def expr
|
10
|
+
token(/\d+/) - ((token("+")|token("-")) - token(/\d+/))*0 >> proc{|x|
|
11
|
+
n = x[0].to_i
|
12
|
+
x[1].inject(n){|acc,i|
|
13
|
+
case i[0]
|
14
|
+
when "-"
|
15
|
+
acc - i[1].to_i
|
16
|
+
when "+"
|
17
|
+
acc + i[1].to_i
|
18
|
+
end
|
19
|
+
}
|
20
|
+
}
|
21
|
+
end
|
22
|
+
|
23
|
+
def parse(str)
|
24
|
+
tokens = str.split(/(?:\s+)|([\+\-\*\/])/).select{|x| x != ""}
|
25
|
+
expr.parse(tokens)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
parser = MyParser.new
|
30
|
+
puts("1+10 = " + parser.parse("1+10").to_s())
|
31
|
+
puts("2-1-20 = " + parser.parse("2 - 1 - 20").to_s())
|
32
|
+
puts("1+2-3 = " + parser.parse("1 + 2 - 3").to_s())
|
data/samples/sample2.rb
ADDED
@@ -0,0 +1,55 @@
|
|
1
|
+
# -*- ruby -*-
|
2
|
+
# parsing four arithmetic expressions with tdputils.
|
3
|
+
|
4
|
+
require 'tdp'
|
5
|
+
require 'tdputils'
|
6
|
+
|
7
|
+
class MyParser
|
8
|
+
include TDParser
|
9
|
+
include TDPUtils
|
10
|
+
|
11
|
+
def expr1
|
12
|
+
rule(:expr2) - ((token("+")|token("-")) - rule(:expr2))*0 >> proc{|x|
|
13
|
+
x[1].inject(x[0]){|n,y|
|
14
|
+
case y[0]
|
15
|
+
when "+"
|
16
|
+
n + y[1]
|
17
|
+
when "-"
|
18
|
+
n - y[1]
|
19
|
+
end
|
20
|
+
}
|
21
|
+
}
|
22
|
+
end
|
23
|
+
|
24
|
+
def expr2
|
25
|
+
rule(:prim) - ((token("*")|token("/")) - rule(:prim))*0 >> proc{|x|
|
26
|
+
x[1].inject(x[0]){|n, y|
|
27
|
+
case y[0]
|
28
|
+
when "*"
|
29
|
+
n * y[1]
|
30
|
+
when "/"
|
31
|
+
n / y[1]
|
32
|
+
end
|
33
|
+
}
|
34
|
+
}
|
35
|
+
end
|
36
|
+
|
37
|
+
def prim
|
38
|
+
token(:int) >> proc{|x| x[0].value.to_i } |
|
39
|
+
token("(") - rule(:expr1) - token(")") >> proc{|x| x[1] }
|
40
|
+
end
|
41
|
+
|
42
|
+
def parse(str)
|
43
|
+
tokenizer = StringTokenizer[
|
44
|
+
/\d+(?!\.\d)/ => :int,
|
45
|
+
/\d+\.\d+/ => :real,
|
46
|
+
]
|
47
|
+
expr1.parse(tokenizer.generate(str))
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
parser = MyParser.new
|
52
|
+
puts("1+10 = " + parser.parse("1+10").to_s())
|
53
|
+
puts("2-1*20+18 = " + parser.parse("2 - 1 * 20 + 18").to_s())
|
54
|
+
puts("2-(1-20) = " + parser.parse("2 - (1 - 20)").to_s())
|
55
|
+
puts("1+2-3 = " + parser.parse("1 + 2 - 3").to_s())
|
data/samples/sample3.rb
ADDED
@@ -0,0 +1,57 @@
|
|
1
|
+
# -*- ruby -*-
|
2
|
+
# parsing four arithmetic expressions with tdputils.
|
3
|
+
|
4
|
+
require 'tdp'
|
5
|
+
require 'tdputils'
|
6
|
+
|
7
|
+
class MyParser
|
8
|
+
include TDParser
|
9
|
+
include TDPUtils
|
10
|
+
|
11
|
+
def expr1
|
12
|
+
n = nil;
|
13
|
+
(rule(:expr2) >> proc{|x| n = x[0] }) -
|
14
|
+
((token("+")|token("-")) - rule(:expr2) >> proc{|x|
|
15
|
+
case x[0]
|
16
|
+
when "+"
|
17
|
+
n += x[1]
|
18
|
+
when "-"
|
19
|
+
n -= x[1]
|
20
|
+
end
|
21
|
+
n
|
22
|
+
})*0 >> proc{ n }
|
23
|
+
end
|
24
|
+
|
25
|
+
def expr2
|
26
|
+
n = nil;
|
27
|
+
(rule(:prim) >> proc{|x| n = x[0] }) -
|
28
|
+
((token("*")|token("/")) - rule(:prim) >> proc{|x|
|
29
|
+
case x[0]
|
30
|
+
when "*"
|
31
|
+
n *= x[1]
|
32
|
+
when "/"
|
33
|
+
n /= x[1]
|
34
|
+
end
|
35
|
+
n
|
36
|
+
})*0 >> proc{ n }
|
37
|
+
end
|
38
|
+
|
39
|
+
def prim
|
40
|
+
token(:int) >> proc{|x| x[0].value.to_i } |
|
41
|
+
token("(") - rule(:expr1) - token(")") >> proc{|x| x[1] }
|
42
|
+
end
|
43
|
+
|
44
|
+
def parse(str)
|
45
|
+
tokenizer = StringTokenizer[
|
46
|
+
/\d+(?!\.\d)/ => :int,
|
47
|
+
/\d+\.\d+/ => :real,
|
48
|
+
]
|
49
|
+
expr1.parse(tokenizer.generate(str))
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
parser = MyParser.new
|
54
|
+
puts("1+10 = " + parser.parse("1+10").to_s())
|
55
|
+
puts("2-1*20+18 = " + parser.parse("2 - 1 * 20 + 18").to_s())
|
56
|
+
puts("2-(1-20) = " + parser.parse("2 - (1 - 20)").to_s())
|
57
|
+
puts("1+2-3 = " + parser.parse("1 + 2 - 3").to_s())
|
data/samples/sample4.rb
ADDED
@@ -0,0 +1,75 @@
|
|
1
|
+
# -*- ruby -*-
|
2
|
+
# caching constructed grammars
|
3
|
+
|
4
|
+
require 'tdp'
|
5
|
+
require 'tdputils'
|
6
|
+
|
7
|
+
class MyParser
|
8
|
+
include TDParser
|
9
|
+
include TDPUtils
|
10
|
+
|
11
|
+
def expr1
|
12
|
+
rule(:expr2) - ((token("+")|token("-")) - rule(:expr2))*0 >> proc{|x|
|
13
|
+
x[1].inject(x[0]){|n,y|
|
14
|
+
case y[0]
|
15
|
+
when "+"
|
16
|
+
n + y[1]
|
17
|
+
when "-"
|
18
|
+
n - y[1]
|
19
|
+
end
|
20
|
+
}
|
21
|
+
}
|
22
|
+
end
|
23
|
+
|
24
|
+
def expr2
|
25
|
+
rule(:prim) - ((token("*")|token("/")) - rule(:prim))*0 >> proc{|x|
|
26
|
+
n = x[0]
|
27
|
+
x[1].inject(x[0]){|n,y|
|
28
|
+
case y[0]
|
29
|
+
when "*"
|
30
|
+
n * y[1]
|
31
|
+
when "/"
|
32
|
+
n / y[1]
|
33
|
+
end
|
34
|
+
}
|
35
|
+
}
|
36
|
+
end
|
37
|
+
|
38
|
+
def prim
|
39
|
+
token(:int) >> proc{|x| x[0].value.to_i } |
|
40
|
+
token("(") - rule(:expr1) - token(")") >> proc{|x| x[1] }
|
41
|
+
end
|
42
|
+
|
43
|
+
def parse(str)
|
44
|
+
tokenizer = StringTokenizer[
|
45
|
+
/\d+(?!\.\d)/ => :int,
|
46
|
+
/\d+\.\d+/ => :real,
|
47
|
+
]
|
48
|
+
expr1.parse(tokenizer.generate(str))
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
class FastParser < MyParser
|
53
|
+
def expr1
|
54
|
+
@expr1 ||= super()
|
55
|
+
end
|
56
|
+
|
57
|
+
def expr2
|
58
|
+
@expr2 ||= super()
|
59
|
+
end
|
60
|
+
|
61
|
+
def prim
|
62
|
+
@prim ||= super()
|
63
|
+
end
|
64
|
+
|
65
|
+
def parse(str)
|
66
|
+
tokens = str.split(/(?:\s+)|([\(\)\+\-\*\/])/).select{|x| x != ""}
|
67
|
+
expr1.parse(tokens)
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
parser = MyParser.new
|
72
|
+
puts("1+10 = " + parser.parse("1+10").to_s())
|
73
|
+
puts("2-1*20+18 = " + parser.parse("2 - 1 * 20 + 18").to_s())
|
74
|
+
puts("2-(1-20) = " + parser.parse("2 - (1 - 20)").to_s())
|
75
|
+
puts("1+2-3 = " + parser.parse("1 + 2 - 3").to_s())
|
data/samples/sample5.rb
ADDED
@@ -0,0 +1,53 @@
|
|
1
|
+
# -*- ruby -*-
|
2
|
+
# writing grammars in the substitution style.
|
3
|
+
|
4
|
+
require 'tdp'
|
5
|
+
require 'tdputils'
|
6
|
+
|
7
|
+
parser = TDParser.define{|g|
|
8
|
+
g.plus = "+"
|
9
|
+
g.minus = "-"
|
10
|
+
g.mult = "*"
|
11
|
+
g.div = "/"
|
12
|
+
|
13
|
+
g.expr1 =
|
14
|
+
g.expr2 - ((g.plus|g.minus) - g.expr2)*0 >> proc{|x|
|
15
|
+
x[1].inject(x[0]){|n,y|
|
16
|
+
case y[0]
|
17
|
+
when "+"
|
18
|
+
n + y[1]
|
19
|
+
when "-"
|
20
|
+
n - y[1]
|
21
|
+
end
|
22
|
+
}
|
23
|
+
}
|
24
|
+
|
25
|
+
g.expr2 =
|
26
|
+
g.prim - ((g.mult|g.div) - g.prim)*0 >> proc{|x|
|
27
|
+
x[1].inject(x[0]){|n,y|
|
28
|
+
case y[0]
|
29
|
+
when "*"
|
30
|
+
n * y[1]
|
31
|
+
when "/"
|
32
|
+
n / y[1]
|
33
|
+
end
|
34
|
+
}
|
35
|
+
}
|
36
|
+
|
37
|
+
g.prim =
|
38
|
+
g.token(:int) >> proc{|x| x[0].value.to_i } |
|
39
|
+
g.token("(") - g.expr1 - g.token(")") >> proc{|x| x[1] }
|
40
|
+
|
41
|
+
def parse(str)
|
42
|
+
tokenizer = TDPUtils::StringTokenizer[
|
43
|
+
/\d+(?!\.\d)/ => :int,
|
44
|
+
/\d+\.\d+/ => :real,
|
45
|
+
]
|
46
|
+
expr1.parse(tokenizer.generate(str))
|
47
|
+
end
|
48
|
+
}
|
49
|
+
|
50
|
+
puts("1+10 = " + parser.parse("1+10").to_s())
|
51
|
+
puts("2-1*20+18 = " + parser.parse("2 - 1 * 20 + 18").to_s())
|
52
|
+
puts("2-(1-20) = " + parser.parse("2 - (1 - 20)").to_s())
|
53
|
+
puts("1+2-3 = " + parser.parse("1 + 2 - 3").to_s())
|
data/samples/sample6.rb
ADDED
@@ -0,0 +1,44 @@
|
|
1
|
+
# -*- ruby -*-
|
2
|
+
# writing grammars using chainl().
|
3
|
+
|
4
|
+
require 'tdp'
|
5
|
+
require 'tdputils'
|
6
|
+
|
7
|
+
parser = TDParser.define{|g|
|
8
|
+
g.plus = "+"
|
9
|
+
g.minus = "-"
|
10
|
+
g.mult = "*"
|
11
|
+
g.div = "/"
|
12
|
+
|
13
|
+
g.expr1 =
|
14
|
+
chainl(prim, mult|div, plus|minus){|x|
|
15
|
+
case x[1]
|
16
|
+
when "+"
|
17
|
+
x[0] + x[2]
|
18
|
+
when "-"
|
19
|
+
x[0] - x[2]
|
20
|
+
when "*"
|
21
|
+
x[0] * x[2]
|
22
|
+
when "/"
|
23
|
+
x[0] / x[2]
|
24
|
+
end
|
25
|
+
}
|
26
|
+
|
27
|
+
g.prim =
|
28
|
+
token(:int) >> proc{|x| x[0].value.to_i } |
|
29
|
+
token("(") - expr1 - token(")") >> proc{|x| x[1] }
|
30
|
+
|
31
|
+
def parse(str)
|
32
|
+
tokenizer = TDPUtils::StringTokenizer[
|
33
|
+
/\d+(?!\.\d)/ => :int,
|
34
|
+
/\d+\.\d+/ => :real,
|
35
|
+
]
|
36
|
+
expr1.parse(tokenizer.generate(str))
|
37
|
+
end
|
38
|
+
}
|
39
|
+
|
40
|
+
puts("1 = " + parser.parse("1").to_s())
|
41
|
+
puts("1+10 = " + parser.parse("1+10").to_s())
|
42
|
+
puts("2-1*20+18 = " + parser.parse("2 - 1 * 20 + 18").to_s())
|
43
|
+
puts("2-(1-20) = " + parser.parse("2 - (1 - 20)").to_s())
|
44
|
+
puts("1+2-3 = " + parser.parse("1 + 2 - 3").to_s())
|
data/samples/sample7.rb
ADDED
@@ -0,0 +1,79 @@
|
|
1
|
+
require 'tdp'
|
2
|
+
require 'tdputils'
|
3
|
+
require 'rexml/parsers/pullparser'
|
4
|
+
|
5
|
+
class Array
|
6
|
+
def ===(ary)
|
7
|
+
if super(ary)
|
8
|
+
return true
|
9
|
+
end
|
10
|
+
if !ary.is_a?(Array)
|
11
|
+
return false
|
12
|
+
end
|
13
|
+
each_with_index{|v,idx|
|
14
|
+
case ary[idx]
|
15
|
+
when v
|
16
|
+
else
|
17
|
+
return false
|
18
|
+
end
|
19
|
+
}
|
20
|
+
true
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
class Hash
|
25
|
+
def ===(h)
|
26
|
+
if super(h)
|
27
|
+
return true
|
28
|
+
end
|
29
|
+
if !h.is_a?(Hash)
|
30
|
+
return false
|
31
|
+
end
|
32
|
+
each{|k,v|
|
33
|
+
case h[k]
|
34
|
+
when v
|
35
|
+
else
|
36
|
+
return false
|
37
|
+
end
|
38
|
+
}
|
39
|
+
true
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
translator = TDParser.define{|g|
|
44
|
+
g.xml =
|
45
|
+
token([:start_element, "a", Hash]) -
|
46
|
+
token([:start_element, "b", Hash]) -
|
47
|
+
g.xml*0 -
|
48
|
+
token([:end_element, "b"]) -
|
49
|
+
token([:end_element, "a"]) >> Proc.new{|x|
|
50
|
+
"<foo><bar>#{x[2]}</bar></foo>"
|
51
|
+
} |
|
52
|
+
token([:start_element, String, Hash]) -
|
53
|
+
g.xml*0 -
|
54
|
+
token([:end_element, String]) >> Proc.new{|x|
|
55
|
+
stag = x[0][1].upcase()
|
56
|
+
etag = x[2][1].upcase()
|
57
|
+
"<#{stag}>#{x[1]}</#{etag}>"
|
58
|
+
} |
|
59
|
+
~token([:end_element, String]) -
|
60
|
+
any() - g.xml >> Proc.new{|x| x[2]}
|
61
|
+
|
62
|
+
def translate(src)
|
63
|
+
xparser = REXML::Parsers::BaseParser.new(src)
|
64
|
+
xml.parse{|g|
|
65
|
+
while(xparser.has_next?)
|
66
|
+
g.yield(xparser.pull())
|
67
|
+
end
|
68
|
+
}
|
69
|
+
end
|
70
|
+
}
|
71
|
+
|
72
|
+
puts(translator.translate(<<EOS))
|
73
|
+
<?xml version="1.0" ?>
|
74
|
+
<list>
|
75
|
+
<a><b><c>hoge</c></b></a>
|
76
|
+
<b>b?</b>
|
77
|
+
</list>
|
78
|
+
EOS
|
79
|
+
# => "<LIST><foo><bar><C></C></bar></foo><B></B></LIST>"
|
@@ -0,0 +1,21 @@
|
|
1
|
+
require 'tdp'
|
2
|
+
|
3
|
+
parser = TDParser.define{|g|
|
4
|
+
g.lp = "("
|
5
|
+
g.rp = ")"
|
6
|
+
g.str = /\w+/
|
7
|
+
|
8
|
+
# Note that "g.elem*1" is a iteration of a sequence that consists
|
9
|
+
# of only "g.elem", but it is not a iteration of "g.elem".
|
10
|
+
g.list = g.lp - g.elem*1 - g.rp >> proc{|x| x[1].collect{|y| y[0]} }
|
11
|
+
g.elem = (g.str | g.list) >> proc{|x| x[0]}
|
12
|
+
|
13
|
+
def parse(str)
|
14
|
+
buff = str.split(/\s+|([\(\)])/).select{|s| s.size() > 0}
|
15
|
+
list.parse(buff)
|
16
|
+
end
|
17
|
+
}
|
18
|
+
|
19
|
+
list = "(a (b c d) (e f g))"
|
20
|
+
r = parser.parse(list)
|
21
|
+
p r
|