tdparser 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/manifest.scm ADDED
@@ -0,0 +1 @@
1
+ (specifications->manifest (list "ruby@3.1" "ruby-rubocop"))
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ # -*- ruby -*-
4
+ # adder-substractor
5
+
6
+ require 'tdparser'
7
+
8
+ class MyParser
9
+ include TDParser
10
+
11
+ def expr
12
+ (token(/\d+/) - (((token('+') | token('-')) - token(/\d+/)) * 0)) >> proc { |x|
13
+ n = x[0].to_i
14
+ x[1].inject(n) do |acc, i|
15
+ case i[0]
16
+ when '-'
17
+ acc - i[1].to_i
18
+ when '+'
19
+ acc + i[1].to_i
20
+ end
21
+ end
22
+ }
23
+ end
24
+
25
+ def parse(str)
26
+ tokens = str.split(%r{(?:\s+)|([+\-*/])}).reject { |x| x == '' }
27
+ expr.parse(tokens)
28
+ end
29
+ end
30
+
31
+ ENV.fetch('TEST', nil) and return
32
+
33
+ parser = MyParser.new
34
+ puts("1+10 = #{parser.parse('1+10')}")
35
+ puts("2-1-20 = #{parser.parse('2 - 1 - 20')}")
36
+ puts("1+2-3 = #{parser.parse('1 + 2 - 3')}")
@@ -0,0 +1,58 @@
1
+ # frozen_string_literal: true
2
+
3
+ # -*- ruby -*-
4
+ # parsing four arithmetic expressions with tdputils.
5
+
6
+ require 'tdparser'
7
+ require 'tdparser/utils'
8
+
9
+ class Sample2Parser
10
+ include TDParser
11
+
12
+ def expr1
13
+ (rule(:expr2) - (((token('+') | token('-')) - rule(:expr2)) * 0)) >> proc { |x|
14
+ x[1].inject(x[0]) do |n, y|
15
+ case y[0]
16
+ when '+'
17
+ n + y[1]
18
+ when '-'
19
+ n - y[1]
20
+ end
21
+ end
22
+ }
23
+ end
24
+
25
+ def expr2
26
+ (rule(:prim) - (((token('*') | token('/')) - rule(:prim)) * 0)) >> proc { |x|
27
+ x[1].inject(x[0]) do |n, y|
28
+ case y[0]
29
+ when '*'
30
+ n * y[1]
31
+ when '/'
32
+ n / y[1]
33
+ end
34
+ end
35
+ }
36
+ end
37
+
38
+ def prim
39
+ (token(:int) >> proc { |x| x[0].value.to_i }) |
40
+ ((token('(') - rule(:expr1) - token(')')) >> proc { |x| x[1] })
41
+ end
42
+
43
+ def parse(str)
44
+ tokenizer = StringTokenizer[
45
+ /\d+(?!\.\d)/ => :int,
46
+ /\d+\.\d+/ => :real,
47
+ ]
48
+ expr1.parse(tokenizer.generate(str))
49
+ end
50
+ end
51
+
52
+ ENV.fetch('TEST', nil) and return
53
+
54
+ parser = Sample2Parser.new
55
+ puts("1+10 = #{parser.parse('1+10')}")
56
+ puts("2-1*20+18 = #{parser.parse('2 - 1 * 20 + 18')}")
57
+ puts("2-(1-20) = #{parser.parse('2 - (1 - 20)')}")
58
+ puts("1+2-3 = #{parser.parse('1 + 2 - 3')}")
@@ -0,0 +1,60 @@
1
+ # frozen_string_literal: true
2
+
3
+ # -*- ruby -*-
4
+ # parsing four arithmetic expressions with tdputils.
5
+
6
+ require 'tdparser'
7
+ require 'tdparser/utils'
8
+
9
+ class Sample3Parser
10
+ include TDParser
11
+
12
+ def expr1
13
+ n = nil
14
+ ((rule(:expr2) >> proc { |x| n = x[0] }) -
15
+ ((((token('+') | token('-')) - rule(:expr2)) >> proc { |x|
16
+ case x[0]
17
+ when '+'
18
+ n += x[1]
19
+ when '-'
20
+ n -= x[1]
21
+ end
22
+ n
23
+ }) * 0)) >> proc { n }
24
+ end
25
+
26
+ def expr2
27
+ n = nil
28
+ ((rule(:prim) >> proc { |x| n = x[0] }) -
29
+ ((((token('*') | token('/')) - rule(:prim)) >> proc { |x|
30
+ case x[0]
31
+ when '*'
32
+ n *= x[1]
33
+ when '/'
34
+ n /= x[1]
35
+ end
36
+ n
37
+ }) * 0)) >> proc { n }
38
+ end
39
+
40
+ def prim
41
+ (token(:int) >> proc { |x| x[0].value.to_i }) |
42
+ ((token('(') - rule(:expr1) - token(')')) >> proc { |x| x[1] })
43
+ end
44
+
45
+ def parse(str)
46
+ tokenizer = StringTokenizer[
47
+ /\d+(?!\.\d)/ => :int,
48
+ /\d+\.\d+/ => :real,
49
+ ]
50
+ expr1.parse(tokenizer.generate(str))
51
+ end
52
+ end
53
+
54
+ ENV.fetch('TEST', nil) and return
55
+
56
+ parser = Sample3Parser.new
57
+ puts("1+10 = #{parser.parse('1+10')}")
58
+ puts("2-1*20+18 = #{parser.parse('2 - 1 * 20 + 18')}")
59
+ puts("2-(1-20) = #{parser.parse('2 - (1 - 20)')}")
60
+ puts("1+2-3 = #{parser.parse('1 + 2 - 3')}")
@@ -0,0 +1,78 @@
1
+ # frozen_string_literal: true
2
+
3
+ # -*- ruby -*-
4
+ # caching constructed grammars
5
+
6
+ require 'tdparser'
7
+ require 'tdparser/utils'
8
+
9
+ class Sample4Parser
10
+ include TDParser
11
+
12
+ def expr1
13
+ (rule(:expr2) - (((token('+') | token('-')) - rule(:expr2)) * 0)) >> proc { |x|
14
+ x[1].inject(x[0]) do |n, y|
15
+ case y[0]
16
+ when '+'
17
+ n + y[1]
18
+ when '-'
19
+ n - y[1]
20
+ end
21
+ end
22
+ }
23
+ end
24
+
25
+ def expr2
26
+ (rule(:prim) - (((token('*') | token('/')) - rule(:prim)) * 0)) >> proc { |x|
27
+ n = x[0]
28
+ x[1].inject(x[0]) do |n, y|
29
+ case y[0]
30
+ when '*'
31
+ n * y[1]
32
+ when '/'
33
+ n / y[1]
34
+ end
35
+ end
36
+ }
37
+ end
38
+
39
+ def prim
40
+ (token(:int) >> proc { |x| x[0].value.to_i }) |
41
+ ((token('(') - rule(:expr1) - token(')')) >> proc { |x| x[1] })
42
+ end
43
+
44
+ def parse(str)
45
+ tokenizer = StringTokenizer[
46
+ /\d+(?!\.\d)/ => :int,
47
+ /\d+\.\d+/ => :real,
48
+ ]
49
+ expr1.parse(tokenizer.generate(str))
50
+ end
51
+ end
52
+
53
+ class FastParser < Sample4Parser
54
+ def expr1
55
+ @expr1 ||= super()
56
+ end
57
+
58
+ def expr2
59
+ @expr2 ||= super()
60
+ end
61
+
62
+ def prim
63
+ @prim ||= super()
64
+ end
65
+
66
+ def parse(str)
67
+ tokens = str.split(%r{(?:\s+)|([()+\-*/])}).reject { |x| x == '' }
68
+ expr1.parse(tokens)
69
+ end
70
+ end
71
+
72
+ ENV.fetch('TEST', nil) and return
73
+
74
+ parser = Sample4Parser.new
75
+ puts("1+10 = #{parser.parse('1+10')}")
76
+ puts("2-1*20+18 = #{parser.parse('2 - 1 * 20 + 18')}")
77
+ puts("2-(1-20) = #{parser.parse('2 - (1 - 20)')}")
78
+ puts("1+2-3 = #{parser.parse('1 + 2 - 3')}")
@@ -0,0 +1,60 @@
1
+ # frozen_string_literal: true
2
+
3
+ # -*- ruby -*-
4
+ # writing grammars in the substitution style.
5
+
6
+ require 'tdparser'
7
+ require 'tdparser/utils'
8
+
9
+ parser = TDParser.define do |g|
10
+ g.plus = '+'
11
+ g.minus = '-'
12
+ g.mult = '*'
13
+ g.div = '/'
14
+
15
+ g.expr1 =
16
+ (g.expr2 - (((g.plus | g.minus) - g.expr2) * 0)) >> proc { |x|
17
+ x[1].inject(x[0]) do |n, y|
18
+ case y[0]
19
+ when '+'
20
+ n + y[1]
21
+ when '-'
22
+ n - y[1]
23
+ end
24
+ end
25
+ }
26
+
27
+ g.expr2 =
28
+ (g.prim - (((g.mult | g.div) - g.prim) * 0)) >> proc { |x|
29
+ x[1].inject(x[0]) do |n, y|
30
+ case y[0]
31
+ when '*'
32
+ n * y[1]
33
+ when '/'
34
+ n / y[1]
35
+ end
36
+ end
37
+ }
38
+
39
+ g.prim =
40
+ (g.token(:int) >> proc { |x| x[0].value.to_i }) |
41
+ ((g.token('(') - g.expr1 - g.token(')')) >> proc { |x| x[1] })
42
+
43
+ def parse(str)
44
+ tokenizer = TDParser::StringTokenizer[
45
+ /\d+(?!\.\d)/ => :int,
46
+ /\d+\.\d+/ => :real,
47
+ ]
48
+ expr1.parse(tokenizer.generate(str))
49
+ end
50
+ end
51
+
52
+ if ENV['TEST']
53
+ Sample5Parser = parser
54
+ return
55
+ end
56
+
57
+ puts("1+10 = #{parser.parse('1+10')}")
58
+ puts("2-1*20+18 = #{parser.parse('2 - 1 * 20 + 18')}")
59
+ puts("2-(1-20) = #{parser.parse('2 - (1 - 20)')}")
60
+ puts("1+2-3 = #{parser.parse('1 + 2 - 3')}")
@@ -0,0 +1,51 @@
1
+ # frozen_string_literal: true
2
+
3
+ # -*- ruby -*-
4
+ # writing grammars using chainl().
5
+
6
+ require 'tdparser'
7
+ require 'tdparser/utils'
8
+
9
+ parser = TDParser.define do |g|
10
+ g.plus = '+'
11
+ g.minus = '-'
12
+ g.mult = '*'
13
+ g.div = '/'
14
+
15
+ g.expr1 =
16
+ chainl(prim, mult | div, plus | minus) do |x|
17
+ case x[1]
18
+ when '+'
19
+ x[0] + x[2]
20
+ when '-'
21
+ x[0] - x[2]
22
+ when '*'
23
+ x[0] * x[2]
24
+ when '/'
25
+ x[0] / x[2]
26
+ end
27
+ end
28
+
29
+ g.prim =
30
+ (token(:int) >> proc { |x| x[0].value.to_i }) |
31
+ ((token('(') - expr1 - token(')')) >> proc { |x| x[1] })
32
+
33
+ def parse(str)
34
+ tokenizer = TDParser::StringTokenizer[
35
+ /\d+(?!\.\d)/ => :int,
36
+ /\d+\.\d+/ => :real,
37
+ ]
38
+ expr1.parse(tokenizer.generate(str))
39
+ end
40
+ end
41
+
42
+ if ENV['TEST']
43
+ SampleExprParser = parser
44
+ return
45
+ end
46
+
47
+ puts("1 = #{parser.parse('1')}")
48
+ puts("1+10 = #{parser.parse('1+10')}")
49
+ puts("2-1*20+18 = #{parser.parse('2 - 1 * 20 + 18')}")
50
+ puts("2-(1-20) = #{parser.parse('2 - (1 - 20)')}")
51
+ puts("1+2-3 = #{parser.parse('1 + 2 - 3')}")
@@ -0,0 +1,28 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'tdparser'
4
+
5
+ parser = TDParser.define do |g|
6
+ g.lp = '('
7
+ g.rp = ')'
8
+ g.str = /\w+/
9
+
10
+ # Note that "g.elem*1" is a iteration of a sequence that consists
11
+ # of only "g.elem", but it is not a iteration of "g.elem".
12
+ g.list = (g.lp - (g.elem * 1) - g.rp) >> proc { |x| x[1].collect { |y| y[0] } }
13
+ g.elem = (g.str | g.list) >> proc { |x| x[0] }
14
+
15
+ def parse(str)
16
+ buff = str.split(/\s+|([()])/).select { |s| s.size.positive? }
17
+ list.parse(buff)
18
+ end
19
+ end
20
+
21
+ if ENV['TEST']
22
+ SampleListParser = parser
23
+ return
24
+ end
25
+
26
+ list = '(a (b c d) (e f g))'
27
+ r = parser.parse(list)
28
+ p r
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'tdparser'
4
+ require 'benchmark'
5
+
6
+ # disable auto optimization
7
+ module TDParser
8
+ class Parser
9
+ def |(other)
10
+ ChoiceParser.new(self, other)
11
+ end
12
+ end
13
+ end
14
+
15
+ parser = TDParser.define do |g|
16
+ f = proc { |x| x.flatten }
17
+ g.rule1 =
18
+ ((token('1') - token('2') - rule1 - token('a')) >> f) |
19
+ ((token('1') - token('2') - rule1 - token('b')) >> f) |
20
+ empty
21
+
22
+ g.rule2 =
23
+ (((token('1') - token('2') - rule2 - token('a')) >> f) |
24
+ ((token('1') - token('2') - rule2 - token('b')) >> f) |
25
+ empty).optimize
26
+
27
+ g.rule3 =
28
+ (((token('1') - token('2') - rule3 - (token('a') | token('b'))) >> f) |
29
+ empty)
30
+ end
31
+
32
+ puts(parser.rule1.to_s)
33
+ puts(parser.rule2.to_s)
34
+ puts(parser.rule3.to_s)
35
+
36
+ N = 10
37
+ Benchmark.bm do |x|
38
+ buff = %w[1 2]
39
+ b = ['b']
40
+ [5, 10, 15].each do |i|
41
+ puts('--')
42
+ x.report { N.times { $r1 = parser.rule1.parse((buff * i) + (b * i)) } }
43
+ x.report { N.times { $r2 = parser.rule2.parse((buff * i) + (b * i)) } }
44
+ x.report { N.times { $r3 = parser.rule3.parse((buff * i) + (b * i)) } }
45
+ end
46
+ end
@@ -0,0 +1,61 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'tdparser'
4
+ require 'tdparser/utils'
5
+ require 'tdparser/xml'
6
+
7
+ translator = TDParser.define do |g|
8
+ extend TDParser::XMLParser
9
+
10
+ g.xml =
11
+ (element('a') do
12
+ element('b') do
13
+ g.xmlseq
14
+ end >> dom_constructor(&:children)
15
+ end >> dom_constructor do |node|
16
+ node.name = 'AB'
17
+ node
18
+ end) |
19
+ (element(String) do
20
+ g.xmlseq
21
+ end >> dom_constructor do |node|
22
+ node.name = node.name.upcase
23
+ node
24
+ end) |
25
+ (doctype do
26
+ g.xmlseq
27
+ end >> dom_constructor { |node| node }) |
28
+ (text >> proc { |x| x[0] }) |
29
+ (elementdecl >> proc { |x| x[0] }) |
30
+ (xmldecl >> proc { |x| x[0] }) |
31
+ (comment >> proc { |x| x[0] }) |
32
+ (any_node >> proc { |x| x[0] })
33
+
34
+ g.xmlseq =
35
+ ((g.xml * 0) >> proc { |x| x[0].collect { |y| y[0] } }) |
36
+ def translate(src)
37
+ gen = TDParser::XMLParser::XMLTokenGenerator.new(src)
38
+ xmlseq.parse(gen)
39
+ end
40
+ end
41
+
42
+ if ENV['TEST']
43
+ XMLTranslator = translator
44
+ return
45
+ end
46
+
47
+ seq = translator.translate(<<~EOS)
48
+ <?xml version="1.0" ?>
49
+ <!DOCTYPE body [
50
+ <!ELEMENT body (#PCDATA, strong*)>
51
+ <!ELEMENT strong (#PCDATA)>
52
+ ]>
53
+ <list>
54
+ <!-- comment -->
55
+ <a><b><c>hoge</c></b></a>
56
+ <b>b?</b>
57
+ </list>
58
+ EOS
59
+ doc = REXML::Document.new
60
+ seq.each { |x| doc.add(x) }
61
+ puts(doc)
metadata ADDED
@@ -0,0 +1,70 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: tdparser
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.5.0
5
+ platform: ruby
6
+ authors:
7
+ - Takaaki Tateishi
8
+ - gemmaro
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2024-11-08 00:00:00.000000000 Z
13
+ dependencies: []
14
+ description: TDParser (formerly TDP4R) is a top-down parser library that consists
15
+ of parser combinators and utility functions.
16
+ email:
17
+ - ttate@ttsky.net
18
+ - gemmaro.dev@gmail.com
19
+ executables: []
20
+ extensions: []
21
+ extra_rdoc_files: []
22
+ files:
23
+ - ".dir-locals.el"
24
+ - ".envrc"
25
+ - ".rubocop.yml"
26
+ - CHANGELOG.md
27
+ - COPYING
28
+ - README
29
+ - Rakefile
30
+ - doc/faq.rdoc
31
+ - doc/guide.rdoc
32
+ - lib/tdparser.rb
33
+ - lib/tdparser/utils.rb
34
+ - lib/tdparser/version.rb
35
+ - lib/tdparser/xml.rb
36
+ - manifest.scm
37
+ - samples/sample1.rb
38
+ - samples/sample2.rb
39
+ - samples/sample3.rb
40
+ - samples/sample4.rb
41
+ - samples/sample5.rb
42
+ - samples/sample_expr.rb
43
+ - samples/sample_list.rb
44
+ - samples/sample_optimize.rb
45
+ - samples/sample_xml.rb
46
+ homepage:
47
+ licenses:
48
+ - BSD-3-Clause
49
+ metadata:
50
+ rubygems_mfa_required: 'true'
51
+ post_install_message:
52
+ rdoc_options: []
53
+ require_paths:
54
+ - lib
55
+ required_ruby_version: !ruby/object:Gem::Requirement
56
+ requirements:
57
+ - - ">="
58
+ - !ruby/object:Gem::Version
59
+ version: '3.1'
60
+ required_rubygems_version: !ruby/object:Gem::Requirement
61
+ requirements:
62
+ - - ">="
63
+ - !ruby/object:Gem::Version
64
+ version: '0'
65
+ requirements: []
66
+ rubygems_version: 3.3.26
67
+ signing_key:
68
+ specification_version: 4
69
+ summary: Top down parser library
70
+ test_files: []