pegparse 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: c35eb89599b2cf50fb3076cd4b48873624940fc5e0ad8c07aaaa1e05c0992640
4
+ data.tar.gz: d02335bee92250709be7ab1e04871d6fff127548acb20979fe273738c3ccd8fa
5
+ SHA512:
6
+ metadata.gz: 763f0630ece3da62bb793259033307135776f02afe7cb312f9e730e207b8a9ae8ceee501a73fdea45c4ccd426f2df96220697e3bee98e608fadbdfc8b5d4da52
7
+ data.tar.gz: 89b15e6e06db3659ea15d97aa83de2eb4f37a59cc12be0f0337a85e665b46eae34ba7b344ce00a3bbfc62ada052f32f2fe3b6917a18e42e892214d9d485a13d9
data/.gitignore ADDED
@@ -0,0 +1,10 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /_yardoc/
4
+ /coverage/
5
+ /doc/
6
+ /pkg/
7
+ /spec/reports/
8
+ /tmp/
9
+ .vscode
10
+ Gemfile.lock
data/.rubocop.yml ADDED
@@ -0,0 +1,13 @@
1
+ AllCops:
2
+ TargetRubyVersion: 2.4
3
+
4
+ Style/StringLiterals:
5
+ Enabled: true
6
+ EnforcedStyle: double_quotes
7
+
8
+ Style/StringLiteralsInInterpolation:
9
+ Enabled: true
10
+ EnforcedStyle: double_quotes
11
+
12
+ Layout/LineLength:
13
+ Max: 120
data/Gemfile ADDED
@@ -0,0 +1,14 @@
1
+ # frozen_string_literal: true
2
+
3
+ source "https://rubygems.org"
4
+
5
+ # Specify your gem's dependencies in pegparse.gemspec
6
+ gemspec
7
+
8
+ gem "rake", "~> 13.0"
9
+
10
+ gem "minitest", "~> 5.0"
11
+
12
+ gem "rubocop", "~> 1.7"
13
+
14
+ gem "debug"
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2021 Riki Ishikawa
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,133 @@
1
+
2
+ # Pegparse
3
+
4
+ Pegparse is library to create recursive descent parser.
5
+
6
+ This provide parser base class which has helper methods.
7
+ - PEG semantics
8
+ - binary-operations
9
+ - quoted-strings
10
+ - comments aware skip
11
+ - indent level checking
12
+ - here-documents
13
+
14
+
15
+ ## Installation
16
+
17
+ Add this line to your application's Gemfile:
18
+
19
+ ```ruby
20
+ gem 'pegparse'
21
+ ```
22
+
23
+ And then execute:
24
+
25
+ $ bundle install
26
+
27
+ Or install it yourself as:
28
+
29
+ $ gem install pegparse
30
+
31
+
32
+ ## Usage
33
+
34
+ 1. Create class inherit `Pegparse::ParserBase` class.
35
+ 2. Set entrypoint with `start_rule_symbol`.
36
+ 3. Write parsing rule by method.
37
+
38
+ ```ruby
39
+ require 'pegparse'
40
+
41
+ class MyParser < Pegparse::ParserBase
42
+ def initialize(scanner_or_context)
43
+ super(scanner_or_context)
44
+ self.start_rule_symbol = :number_rule
45
+ end
46
+
47
+ def number_rule
48
+ digits = one_or_more { # digits becomes ['1', '2']
49
+ read(/[0-9]/)
50
+ }
51
+ decimal = optional { # decimal is '34'
52
+ decimal_rule()
53
+ }
54
+ return [digits.join.to_i, decimal&.to_i]
55
+ end
56
+
57
+ def decimal_rule
58
+ read('.')
59
+ read(/[0-9]+/) # decimal_rule returns '34'
60
+ end
61
+ end
62
+
63
+ MyParser.new(nil).parse(StringScanner.new('12.34')) # => [12, 34]
64
+ ```
65
+
66
+ ### Core methods
67
+
68
+ - `raed(str_or_regexp)` : Try to consume input. If success, return string. If failed, make backtrack.
69
+ - `peek(str_or_regexp)` : Peek input. If success, return string.
70
+ - `peek{ ... }` : Peek input. If success, return block result.
71
+ - `optional{ ... }` : Match only available. (PEG's option operator('?'))
72
+ - `zero_or_more{ ... }` : Repeat matching. (PEG's repeat operator('*'))
73
+ - `one_or_more{ ... }` : Repeat matching. (PEG's repeat operator('+'))
74
+ - `choice(proc, proc, ...)` : Choice matching (PEG's choice operator('/'))
75
+ - `backtrack()` : Make backtrack.
76
+
77
+ ### Helper methods
78
+
79
+ - `sp()` : Spaces. (Space charactors or comments)
80
+ - `inline_sp()` : Spaces without line feed.
81
+ - `deeper_sp()` : Spaces without line feed or have deeper indent than previous line.
82
+ - `lf()` : Spaces contain line feed.
83
+ - `separative(separator){ ... }` : Repeat matching with separator.
84
+ - `string_like(end_pattern, normal_pattern){ ... }` : String like "" and ''. Block is for special char handlings like escaping.
85
+ - `borrow_next_line{ ... }` : Skip current line and parse next line temporaliry. Used lines become unmatchable with normal process. (For here-document)
86
+ - `borrowed_area()` : Only matches to lines used by `borrow_next_line`.
87
+ - `Pegparse::BiopRuleChain` : Binary operator helper class.
88
+
89
+ You can see sample parser implementations under `/samples`.
90
+
91
+ ### debug
92
+
93
+ Use `Pegparse::ParserCore#best_errors` to find parsing error location.
94
+ `best_errors` returns farthest location where parsing failed.
95
+ It also returns the deepest rule name.
96
+ You can improve message by decorating your rule method with `rule`.
97
+ ```ruby
98
+ rule def your_rule
99
+ ...
100
+ end
101
+ ```
102
+
103
+
104
+ ### VSCode
105
+
106
+ If you want to debug your parser with VSCode by breakpoint or step-by-step execution, add this config to your launch.json.
107
+ (debug gem newer than 1.4.0 required)
108
+ Then all process inside gem will be skipped while VSCode step-by-step execution.
109
+
110
+ ```json
111
+ {
112
+ "type": "rdbg",
113
+ "name": "Debug specified user program with rdbg",
114
+ "request": "launch",
115
+ "script": "${workspaceFolder}/YOUR_PARSER_HERE.rb",
116
+ "args": [],
117
+ "env": {
118
+ "RUBY_DEBUG_SKIP_PATH": [
119
+ "YOUR_GEM_DIRECTORY_HERE",
120
+ ],
121
+ }
122
+ }
123
+ ```
124
+
125
+
126
+
127
+ ## Contributing
128
+
129
+ Bug reports and pull requests are welcome on GitHub at https://github.com/jljse/pegparse.
130
+
131
+ ## License
132
+
133
+ The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
data/Rakefile ADDED
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "bundler/gem_tasks"
4
+ require "rake/testtask"
5
+
6
+ Rake::TestTask.new(:test) do |t|
7
+ t.libs << "test"
8
+ t.libs << "lib"
9
+ t.test_files = FileList["test/**/*_test.rb"]
10
+ end
11
+
12
+ require "rubocop/rake_task"
13
+
14
+ RuboCop::RakeTask.new
15
+
16
+ task default: %i[test rubocop]
data/bin/console ADDED
@@ -0,0 +1,15 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require "bundler/setup"
5
+ require "pegparse"
6
+
7
+ # You can add fixtures and/or initialization code here to make experimenting
8
+ # with your gem easier. You can also use a different console, if you like.
9
+
10
+ # (If you use this, don't forget to add pry to your Gemfile!)
11
+ # require "pry"
12
+ # Pry.start
13
+
14
+ require "irb"
15
+ IRB.start(__FILE__)
data/bin/setup ADDED
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,113 @@
1
+ require_relative 'parser_base'
2
+
3
+ # Binary operator rule helper.
4
+ module Pegparse::BiopRuleChain
5
+ # @!parse include Pegparse::ParserCore
6
+
7
+ # Create new parser class derived from passed one.
8
+ # If you want to customize parser behavior, override method in exec_block.
9
+ # @return [Class<Pegparse::BiopRuleChainImitation>]
10
+ def self.based_on(parser_class, &exec_block)
11
+ raise ArgumentError unless parser_class.ancestors.include?(Pegparse::ParserBase)
12
+
13
+ klass = Class.new(parser_class) do
14
+ include Pegparse::BiopRuleChain
15
+ end
16
+ klass.class_exec(&exec_block) if exec_block
17
+
18
+ klass
19
+ end
20
+
21
+ def initialize(scanner_or_context)
22
+ super(scanner_or_context)
23
+ @start_rule_symbol = :start_rule
24
+ @operators = []
25
+ @term = nil
26
+ end
27
+
28
+ # Default construction of matching result. (override this if you want)
29
+ def construct_result(lhs, op, rhs)
30
+ [op, lhs, rhs]
31
+ end
32
+
33
+ # Default matching rule of spaces before operator. (override this if you want)
34
+ # This rule will be used when you pass string to #left_op.
35
+ def operator_sp
36
+ sp()
37
+ end
38
+
39
+ # Default matching rule of spaces before operand. (override this if you want)
40
+ def operand_sp
41
+ sp()
42
+ end
43
+
44
+ # Create match proc for operator.
45
+ # @param operator_matcher [Array, Proc, String, Regexp]
46
+ # @return [Proc]
47
+ private def get_operator_matcher(operator_matcher)
48
+ if operator_matcher.is_a? Array
49
+ ops = operator_matcher.map{|x| get_operator_matcher(x)}
50
+ return ->{
51
+ choice(*ops)
52
+ }
53
+ end
54
+ if operator_matcher.is_a? Proc
55
+ return operator_matcher
56
+ end
57
+ if operator_matcher.is_a?(String) || operator_matcher.is_a?(Regexp)
58
+ return ->{
59
+ operator_sp()
60
+ op = read(operator_matcher)
61
+ }
62
+ end
63
+ raise ArgumentError
64
+ end
65
+
66
+ # Add left-associative binary operators.
67
+ # Call in order of operators precedence.
68
+ # If you have multiple operators in same precedence, pass Array as parameter.
69
+ # @param operator_matcher [String, Regexp, Array, Proc]
70
+ # @return [Pegparse::BiopRuleChainImitation]
71
+ def left_op(operator_matcher)
72
+ @operators << get_operator_matcher(operator_matcher)
73
+ self
74
+ end
75
+
76
+ # Set terminal matching rule.
77
+ # @param term_block [Proc]
78
+ def term(term_block)
79
+ @term = term_block
80
+ nil
81
+ end
82
+
83
+ # Match expression of the operators which have specified precedence level.
84
+ private def match(operator_level)
85
+ return @term.call if operator_level >= @operators.size
86
+
87
+ lhs = match(operator_level + 1)
88
+
89
+ operands = zero_or_more {
90
+ op = choice(*@operators[operator_level])
91
+ operand_sp()
92
+ rhs = match(operator_level + 1)
93
+
94
+ [op, rhs]
95
+ }
96
+
97
+ tree = operands.inject(lhs) {|subtree, operand|
98
+ construct_result(subtree, operand[0], operand[1])
99
+ }
100
+ end
101
+
102
+ # entry point
103
+ private def start_rule
104
+ match(0)
105
+ end
106
+ end
107
+
108
+ # this is an imitation class just for documentation.
109
+ # actual runtime never use this instance.
110
+ class Pegparse::BiopRuleChainImitation < Pegparse::ParserBase
111
+ include Pegparse::BiopRuleChain
112
+ end
113
+
@@ -0,0 +1,35 @@
1
+
2
+ module Pegparse
3
+ BorrowedArea = Struct.new(
4
+ :marker_pos,
5
+ :start_pos,
6
+ :end_pos,
7
+ keyword_init: true,
8
+ )
9
+ end
10
+
11
+ class Pegparse::BorrowedAreas
12
+ def initialize
13
+ @areas = []
14
+ end
15
+
16
+ def add_area(area)
17
+ @areas << area
18
+ end
19
+
20
+ def conflicted_area(pos)
21
+ conflicted = @areas.find{|area| area.start_pos <= pos && pos < area.end_pos }
22
+ end
23
+
24
+ def backtracked(pos)
25
+ @areas.reject!{|area| area.marker_pos > pos }
26
+ end
27
+
28
+ def borrowed_area_start_pos
29
+ @areas.first ? @areas.first.start_pos : nil
30
+ end
31
+
32
+ def borrowed_area_end_pos
33
+ @areas.last ? @areas.last.end_pos : nil
34
+ end
35
+ end
@@ -0,0 +1,61 @@
1
+
2
+ # count line number and indent level
3
+ class Pegparse::LineCounter
4
+ def initialize
5
+ @line_start_pos = [0]
6
+ @line_start_pos_noindent = [0]
7
+ @farthest_pos = 0
8
+ end
9
+
10
+ # update with partial string
11
+ # @param pos [Integer] position of str relative to whole input
12
+ # @param str [String] partial string
13
+ def memo(pos, str)
14
+ return if pos + str.size < @farthest_pos
15
+ raise ArgumentError if pos > @farthest_pos
16
+
17
+ row, * = position(pos)
18
+ str.each_byte.with_index do |ch, index|
19
+ if ch == ' '.ord || ch == "\t".ord
20
+ # 既知のインデントより後ろに空白が続いている場合、インデントの深さを増やす
21
+ if (pos + index) == (@line_start_pos_noindent[row])
22
+ @line_start_pos_noindent[row] += 1
23
+ end
24
+ end
25
+ if ch == "\n".ord
26
+ next_line_start_pos = pos + index + 1
27
+ if @line_start_pos.last < next_line_start_pos
28
+ @line_start_pos << next_line_start_pos
29
+ @line_start_pos_noindent << next_line_start_pos
30
+ end
31
+ row += 1
32
+ end
33
+ end
34
+ if @farthest_pos < pos + str.size
35
+ @farthest_pos = pos + str.size
36
+ end
37
+ end
38
+
39
+ # get line number and char offset for pos
40
+ # @param pos [Integer]
41
+ # @return [Array[Integer]]
42
+ def position(pos)
43
+ if pos >= @line_start_pos.last
44
+ line_count = @line_start_pos.size - 1
45
+ else
46
+ after_pos_line_head = @line_start_pos.bsearch_index{|x| x > pos}
47
+ line_count = after_pos_line_head - 1
48
+ end
49
+ char_count = pos - @line_start_pos[line_count]
50
+
51
+ [line_count, char_count]
52
+ end
53
+
54
+ # get indent level for the line including pos
55
+ # @param pos [Integer]
56
+ # @return [Integer]
57
+ def indent(pos)
58
+ line_count, * = position(pos)
59
+ @line_start_pos_noindent[line_count] - @line_start_pos[line_count]
60
+ end
61
+ end
@@ -0,0 +1,139 @@
1
+ require_relative 'parser_core'
2
+
3
+ # Parser base class (reusable rules)
4
+ class Pegparse::ParserBase < Pegparse::ParserCore
5
+ def initialize(scanner_or_context)
6
+ super(scanner_or_context)
7
+ end
8
+
9
+ # match for spaces
10
+ def _
11
+ one_or_more {
12
+ choice(
13
+ ->{ read(/[ \t\r]+/) },
14
+ ->{ read(/\n/) },
15
+ ->{ borrowed_area() },
16
+ ->{ line_comment() },
17
+ ->{ block_comment() },
18
+ )
19
+ }
20
+ end
21
+
22
+ def line_comment
23
+ # read(/#[^\n]*/)
24
+ backtrack
25
+ end
26
+
27
+ rule def block_comment
28
+ # ret = ""
29
+ # ret << read('/*')
30
+ # ret << zero_or_more {
31
+ # part = read(/[^*]*/)
32
+ # break if peek('*/')
33
+ # part << '*' if optional('*')
34
+ # }.join
35
+ # ret << read('*/')
36
+ # ret
37
+ backtrack
38
+ end
39
+
40
+ # match for spaces
41
+ def sp
42
+ optional{ _ }
43
+ end
44
+
45
+ # match for spaces without newline
46
+ def inline_sp
47
+ before_line, * = @context.line_counter.position(@context.scanner.pos)
48
+ ret = optional{ _ }
49
+ after_line, * = @context.line_counter.position(@context.scanner.pos)
50
+ backtrack() if before_line != after_line
51
+ ret
52
+ end
53
+
54
+ # match for spaces (if spaces cross to the next line, it must have deeper indent than previous line)
55
+ def deeper_sp
56
+ base_line, * = @context.line_counter.position(@context.scanner.pos)
57
+ base_indent = @indent_stack.last
58
+ raise StandardError unless base_indent
59
+ ret = optional{ _ }
60
+ new_line, * = @context.line_counter.position(@context.scanner.pos)
61
+ new_indent = @context.line_counter.indent(@context.scanner.pos)
62
+ backtrack() if base_line != new_line && base_indent >= new_indent
63
+ ret
64
+ end
65
+
66
+ # match for spaces (must contain newline)
67
+ def lf
68
+ before_line, * = @context.line_counter.position(@context.scanner.pos)
69
+ ret = optional{ _ }
70
+ after_line, * = @context.line_counter.position(@context.scanner.pos)
71
+ backtrack() if before_line == after_line
72
+ ret
73
+ end
74
+
75
+ # loop with separator
76
+ # @param separator_matcher [Regexp, String, Proc]
77
+ # @param allow_additional_separator [Boolean] Allow redundant separator at tail.
78
+ def separative(separator_matcher, allow_additional_separator: false, &repeat_block)
79
+ if separator_matcher.is_a? Proc
80
+ separator_proc = separator_matcher
81
+ else
82
+ separator_proc = ->{
83
+ sp()
84
+ read(separator_matcher)
85
+ sp()
86
+ }
87
+ end
88
+
89
+ ret = []
90
+ optional {
91
+ ret << repeat_block.call()
92
+ rest = zero_or_more {
93
+ separator_proc.call()
94
+ repeat_block.call()
95
+ }
96
+ ret.concat(rest)
97
+ if allow_additional_separator
98
+ optional {
99
+ separator_proc.call()
100
+ }
101
+ end
102
+ }
103
+ ret
104
+ end
105
+
106
+ # string literal
107
+ # @param end_pattern [String, Regexp] End of literal (e.g. "'", "\"")
108
+ # @param normal_pattern [Regexp] Pattern for string without special process (e.g. /[^'\\]*/)
109
+ # @param special_process [Proc] Process for special characters. Block should return processed result.
110
+ # @return [Array<String,Object>] Match result. Result has one ore more elements.
111
+ # If block returned non-string result, array has multiple elements.
112
+ def string_like(end_pattern, normal_pattern, &special_process)
113
+ ret = []
114
+ str = ''
115
+ while true
116
+ str << read(normal_pattern)
117
+ break if peek(end_pattern)
118
+ break if eos?
119
+ break unless special_process
120
+ processed = special_process.call()
121
+ break unless processed
122
+ if processed.is_a? String
123
+ str << processed
124
+ else
125
+ ret << str if str.size > 0
126
+ ret << processed
127
+ str = ''
128
+ end
129
+ end
130
+ ret << str if str.size > 0
131
+
132
+ if ret.size > 0
133
+ ret
134
+ else
135
+ ['']
136
+ end
137
+ end
138
+
139
+ end
@@ -0,0 +1,19 @@
1
+ require_relative "parser_errors"
2
+ require_relative "line_counter"
3
+ require_relative "borrowed_areas"
4
+
5
+ class Pegparse::ParserContext
6
+ attr_accessor :scanner
7
+ attr_accessor :rule_stack
8
+ attr_accessor :errors
9
+ attr_accessor :line_counter
10
+ attr_accessor :borrowed_areas
11
+
12
+ def initialize(scanner)
13
+ @scanner = scanner
14
+ @rule_stack = []
15
+ @errors = Pegparse::ParserErrors.new
16
+ @line_counter = Pegparse::LineCounter.new
17
+ @borrowed_areas = Pegparse::BorrowedAreas.new
18
+ end
19
+ end