dolos 0.2.0 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +2 -0
- data/README.md +20 -8
- data/benchmarks/json/json.rb +31 -8
- data/benchmarks/json/nested_json_1m.json +20557 -0
- data/benchmarks/letter.rb +78 -0
- data/docs/.nojekyll +0 -0
- data/docs/README.md +22 -0
- data/docs/_sidebar.md +4 -0
- data/docs/getting_started.md +52 -0
- data/docs/index.html +26 -0
- data/examples/letter.rb +3 -3
- data/lib/dolos/common.rb +44 -0
- data/lib/dolos/parsers.rb +25 -16
- data/lib/dolos/result.rb +12 -3
- data/lib/dolos/string_io_wrapper.rb +3 -8
- data/lib/dolos/version.rb +1 -1
- data/lib/dolos.rb +53 -45
- data/sig/dolos/common_parsers.rbs +11 -0
- data/sig/dolos/parser.rbs +6 -2
- data/sig/dolos/parser_state.rbs +1 -1
- data/sig/dolos/parsers.rbs +4 -0
- data/sig/dolos/result.rbs +7 -0
- metadata +13 -5
- data/lib/dolos_common_parsers/common_parsers.rb +0 -34
- /data/benchmarks/json/{nested_json.json → nested_json_166.json} +0 -0
- /data/docs/{dolos_stable_diff.png → images/dolos_stable_diff.png} +0 -0
@@ -0,0 +1,78 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
require 'bundler/setup'
|
3
|
+
require 'dolos'
|
4
|
+
require 'dolos_common_parsers/common_parsers'
|
5
|
+
require 'benchmark/ips'
|
6
|
+
|
7
|
+
include Dolos
|
8
|
+
|
9
|
+
# Include common parsers
|
10
|
+
# In future this can be more structured, moved them to separate module to prevent breaking changes
|
11
|
+
include Dolos::CommonParsers
|
12
|
+
|
13
|
+
# Library usage example
|
14
|
+
# Parse out a name and address from a letter
|
15
|
+
# For higher difficulty, we will not split this into multiple lines, but instead parse it all at once
|
16
|
+
letter = <<-LETTER
|
17
|
+
Mr. Vardeniui Pavardeniui
|
18
|
+
AB „Lietuvos Paštas“
|
19
|
+
Totorių g. 8
|
20
|
+
01121 Vilnius
|
21
|
+
LETTER
|
22
|
+
|
23
|
+
# Combine with 'or'
|
24
|
+
honorific = c("Mr. ") | c("Mrs. ") | c("Ms. ")
|
25
|
+
|
26
|
+
# Can be parsed any_char which will include needed letters
|
27
|
+
# Or combine LT letters with latin alphabet
|
28
|
+
alpha_with_lt = char_in("ąčęėįšųūžĄČĘĖĮŠŲŪŽ") | alpha
|
29
|
+
|
30
|
+
# Capture all letters in a row and join them,
|
31
|
+
# because they are captured as elements of array by each alpha_with_lt parser.
|
32
|
+
first_name = alpha_with_lt.rep.map(&:join).capture!
|
33
|
+
last_name = alpha_with_lt.rep.map(&:join).capture!
|
34
|
+
|
35
|
+
# Combine first line parsers
|
36
|
+
# Consume zero or more whitespace, after that honorific must follow and so on
|
37
|
+
name_line = ws_rep0 & honorific & first_name & ws & last_name & eol
|
38
|
+
|
39
|
+
# Next line is company info
|
40
|
+
# We could choose to accept UAB and AB or just AB and etc.
|
41
|
+
# 'c("AB")' is for case-sensitive string. 'string' can also be used
|
42
|
+
company_type = c("AB")
|
43
|
+
quote_open = c("„")
|
44
|
+
quote_close = c("“")
|
45
|
+
|
46
|
+
# Consume LT alphabet with whitespace
|
47
|
+
company_name = (alpha_with_lt | ws).rep.map(&:join).capture!
|
48
|
+
company_info = company_type & ws_rep0 & quote_open & company_name & quote_close
|
49
|
+
second_line = ws_rep0 & company_info & eol
|
50
|
+
|
51
|
+
# Address line
|
52
|
+
# 'char_while' will consume characters while passed predicate is true
|
53
|
+
# This could be an alternative to previous 'alpha_with_lt' approach
|
54
|
+
# After that result is captured and mapped to hash
|
55
|
+
# Mapping to hash so at the end its easy to tell tuples apart
|
56
|
+
# Also while mapping, doing some cleaning with '.strip'
|
57
|
+
street_name = char_while(->(char) { !char.match(/\d/) }).map { |s| { street: s.strip } }.capture!
|
58
|
+
building = digits.map { |s| { building: s.strip } }.capture!
|
59
|
+
address_line = ws_rep0 & street_name & building & eol
|
60
|
+
|
61
|
+
# City line
|
62
|
+
# All digits can be matched here or 'digits.rep(5)' could be used. Also joining with map.
|
63
|
+
postcode = digits.map { |s| { postcode: s.strip } }.capture!
|
64
|
+
city = alpha_with_lt.rep.map(&:join).map { |s| { city: s.strip } }.capture!
|
65
|
+
city_line = ws_rep0 & postcode & ws & city & eol
|
66
|
+
|
67
|
+
# Full letter parser which is combined from all previous parsers. All previous parsers can be ran separately.
|
68
|
+
letter_parser = name_line & second_line & address_line & city_line
|
69
|
+
result = letter_parser.run(letter)
|
70
|
+
|
71
|
+
puts result.success?
|
72
|
+
|
73
|
+
Benchmark.ips do |x|
|
74
|
+
x.report('letter benchmark') do
|
75
|
+
letter_parser.run(letter)
|
76
|
+
end
|
77
|
+
x.compare!
|
78
|
+
end
|
data/docs/.nojekyll
ADDED
File without changes
|
data/docs/README.md
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
# Dolos
|
2
|
+
|
3
|
+
## What is Dolos?
|
4
|
+
Dolos is parser combinator library for Ruby. It is inspired by FastParse and Scala Parser Combinators.
|
5
|
+
|
6
|
+
## What are parser combinators?
|
7
|
+
Parser combinators are a way to build parsers from smaller parsers. For example, you can build a parser for a number from a parser for a digit.
|
8
|
+
This is a very simple example, but it can be used to build more complex parsers.
|
9
|
+
Parsers are lazy and only run when needed. This allows to build complex parsers before passing input to them.
|
10
|
+
```ruby
|
11
|
+
hello = string("Hello")
|
12
|
+
greeting = hello >> c(" ") >> string("Ruby developer!")
|
13
|
+
greeting.run("Hello Ruby developer!") # => Success
|
14
|
+
```
|
15
|
+
|
16
|
+
## What's different from alternatives?
|
17
|
+
This library focuses on two things:
|
18
|
+
- Parsers integrate well into Ruby code. There is no need to keep them in separate classes.
|
19
|
+
- Fine grained control over parsers. You can `map` and adjust each parser separately
|
20
|
+
- Two ways of capturing values: traditional `>>`, other product operators to construct value and `capture!`
|
21
|
+
- For simple parsers `capture!` can be used to very quickly capture values into flat arrays
|
22
|
+
- Running parsers will not throw exceptions and instead return a result object. Exceptions don't play well with parsing.
|
data/docs/_sidebar.md
ADDED
@@ -0,0 +1,52 @@
|
|
1
|
+
# Getting started
|
2
|
+
|
3
|
+
## Installation
|
4
|
+
|
5
|
+
Install the gem and add it to your Gemfile:
|
6
|
+
```shell
|
7
|
+
$ bundle add dolos
|
8
|
+
```
|
9
|
+
Or manually:
|
10
|
+
```ruby
|
11
|
+
gem 'dolos'
|
12
|
+
```
|
13
|
+
|
14
|
+
## Usage
|
15
|
+
|
16
|
+
Two things to do:
|
17
|
+
- require library
|
18
|
+
- include module `Dolos` and `Dolos::Common`
|
19
|
+
|
20
|
+
```ruby
|
21
|
+
require 'dolos'
|
22
|
+
|
23
|
+
include Dolos
|
24
|
+
include Dolos::Common # Common parsers
|
25
|
+
```
|
26
|
+
|
27
|
+
### Basic parsers
|
28
|
+
|
29
|
+
A simple parser which matches one word.
|
30
|
+
|
31
|
+
```ruby
|
32
|
+
require 'dolos'
|
33
|
+
include Dolos
|
34
|
+
|
35
|
+
hello = c("Hello") # c("") is an alias for string(""). Can be read as: case-sensitive string match
|
36
|
+
|
37
|
+
hello.run("Hello").success? # => true
|
38
|
+
|
39
|
+
hello.run("hello").success? # => failure
|
40
|
+
```
|
41
|
+
|
42
|
+
After defining parser, it can be ran with `run('my-input')` method. It returns a `Result` object.
|
43
|
+
|
44
|
+
### Result
|
45
|
+
|
46
|
+
Result can be either `Success` or `Failure`. It can be checked with `success?` or `failure?` methods.
|
47
|
+
|
48
|
+
Success will also have `value` property which will contain the result of the parser. There is also `captures`, but
|
49
|
+
that's for later.
|
50
|
+
|
51
|
+
|
52
|
+
Failure will have `inspect` method which will return a string with the error message. It will show error position as well.
|
data/docs/index.html
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
<!DOCTYPE html>
|
2
|
+
<html lang="en">
|
3
|
+
<head>
|
4
|
+
<meta charset="UTF-8">
|
5
|
+
<title>Document</title>
|
6
|
+
<meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1" />
|
7
|
+
<meta name="description" content="Description">
|
8
|
+
<meta name="viewport" content="width=device-width, initial-scale=1.0, minimum-scale=1.0">
|
9
|
+
<link rel="stylesheet" href="//cdn.jsdelivr.net/npm/docsify@4/lib/themes/vue.css">
|
10
|
+
</head>
|
11
|
+
<body>
|
12
|
+
<div id="app"></div>
|
13
|
+
<script>
|
14
|
+
window.$docsify = {
|
15
|
+
name: '',
|
16
|
+
repo: ''
|
17
|
+
}
|
18
|
+
window.$docsify = {
|
19
|
+
loadSidebar: true
|
20
|
+
}
|
21
|
+
</script>
|
22
|
+
<!-- Docsify v4 -->
|
23
|
+
<script src="//cdn.jsdelivr.net/npm/docsify@4"></script>
|
24
|
+
<script src="//cdn.jsdelivr.net/npm/prismjs@1/components/prism-ruby.min.js"></script>
|
25
|
+
</body>
|
26
|
+
</html>
|
data/examples/letter.rb
CHANGED
@@ -1,12 +1,12 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
-
|
3
|
-
|
2
|
+
require 'dolos'
|
3
|
+
require 'dolos_common_parsers/common_parsers'
|
4
4
|
|
5
5
|
include Dolos
|
6
6
|
|
7
7
|
# Include common parsers
|
8
8
|
# In future this can be more structured, moved them to separate module to prevent breaking changes
|
9
|
-
include Dolos::
|
9
|
+
include Dolos::Common
|
10
10
|
|
11
11
|
# Library usage example
|
12
12
|
# Parse out a name and address from a letter
|
data/lib/dolos/common.rb
ADDED
@@ -0,0 +1,44 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Dolos
|
4
|
+
# Common parsers
|
5
|
+
# Separated from the main library to improve them later on
|
6
|
+
# These will change, new ones will be added. Once API stabilises, we will see what to do
|
7
|
+
# We have to be careful what is in the scope when we include this main module
|
8
|
+
# Probably a package of parsers following some RFC will be added as well.
|
9
|
+
# Keeping them separate for now
|
10
|
+
module Common
|
11
|
+
def ws
|
12
|
+
regex(/\s/)
|
13
|
+
end
|
14
|
+
|
15
|
+
def ws_rep0
|
16
|
+
regex(/\s*/)
|
17
|
+
end
|
18
|
+
|
19
|
+
def eol
|
20
|
+
regex(/\n|\r\n|\r/)
|
21
|
+
end
|
22
|
+
|
23
|
+
def digit
|
24
|
+
regex(/\d/)
|
25
|
+
end
|
26
|
+
|
27
|
+
def int
|
28
|
+
digit.map(&:to_i)
|
29
|
+
end
|
30
|
+
|
31
|
+
# Capture as string
|
32
|
+
def digits
|
33
|
+
regex(/\d+/)
|
34
|
+
end
|
35
|
+
|
36
|
+
def alpha_num
|
37
|
+
regex(/[a-zA-Z0-9]/)
|
38
|
+
end
|
39
|
+
|
40
|
+
def alpha
|
41
|
+
regex(/[a-zA-Z]/)
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
data/lib/dolos/parsers.rb
CHANGED
@@ -2,10 +2,16 @@
|
|
2
2
|
|
3
3
|
module Dolos
|
4
4
|
module Parsers
|
5
|
+
|
6
|
+
# String parser
|
7
|
+
# Matches exactly the given string
|
8
|
+
# string('hello').run('hello') => Success.new('hello', 5)
|
9
|
+
# Alias: c, for case-sensitive. Ex: c('hello').run('hello') => Success.new('hello', 5)
|
5
10
|
def string(str)
|
11
|
+
utf8_str = str.encode('UTF-8')
|
12
|
+
|
6
13
|
Parser.new do |state|
|
7
14
|
state.input.mark_offset
|
8
|
-
utf8_str = str.encode('UTF-8')
|
9
15
|
if state.input.matches?(utf8_str)
|
10
16
|
Success.new(utf8_str, str.bytesize)
|
11
17
|
else
|
@@ -13,16 +19,19 @@ module Dolos
|
|
13
19
|
got_error = state.input.io.string.byteslice(state.input.backup, advanced)
|
14
20
|
state.input.rollback
|
15
21
|
Failure.new(
|
16
|
-
"Expected #{str.inspect} but got #{got_error.inspect}",
|
22
|
+
-> { "Expected #{str.inspect} but got #{got_error.inspect}" },
|
17
23
|
advanced,
|
18
24
|
state
|
19
25
|
)
|
20
26
|
end
|
21
27
|
end
|
22
28
|
end
|
23
|
-
|
24
29
|
alias_method :c, :string
|
25
30
|
|
31
|
+
# Regex parser
|
32
|
+
# Accepts a regex, matches the regex against the input
|
33
|
+
# parser = regex(/\d+/)
|
34
|
+
# result = parser.run('123') # => Success.new('123', 3)
|
26
35
|
def regex(pattern)
|
27
36
|
Parser.new do |state|
|
28
37
|
state.input.mark_offset
|
@@ -32,7 +41,7 @@ module Dolos
|
|
32
41
|
advanced = state.input.offset
|
33
42
|
state.input.rollback
|
34
43
|
Failure.new(
|
35
|
-
"Expected pattern #{pattern.inspect} but got #{state.input.io.string.inspect}",
|
44
|
+
-> { "Expected pattern #{pattern.inspect} but got #{state.input.io.string.inspect}" },
|
36
45
|
advanced,
|
37
46
|
state
|
38
47
|
)
|
@@ -40,6 +49,8 @@ module Dolos
|
|
40
49
|
end
|
41
50
|
end
|
42
51
|
|
52
|
+
# Matches any character
|
53
|
+
# any_char.run('a') # => Success.new('a', 1)
|
43
54
|
def any_char
|
44
55
|
Parser.new do |state|
|
45
56
|
state.input.mark_offset
|
@@ -52,7 +63,7 @@ module Dolos
|
|
52
63
|
advanced = state.input.offset
|
53
64
|
state.input.rollback
|
54
65
|
Failure.new(
|
55
|
-
'Expected any character but got end of input',
|
66
|
+
-> { 'Expected any character but got end of input' },
|
56
67
|
advanced,
|
57
68
|
state
|
58
69
|
)
|
@@ -61,23 +72,24 @@ module Dolos
|
|
61
72
|
end
|
62
73
|
|
63
74
|
# Matches any character in a string
|
75
|
+
# Passed string can be imagined as a set of characters
|
64
76
|
# Example:
|
65
77
|
# char_in('abc').run('b') # => Success.new('b', 1)
|
66
78
|
def char_in(characters_string)
|
67
|
-
|
79
|
+
characters_set = characters_string.chars
|
68
80
|
|
69
81
|
Parser.new do |state|
|
70
82
|
state.input.mark_offset
|
71
83
|
|
72
84
|
char, bytesize = state.input.peek(1)
|
73
85
|
|
74
|
-
if char &&
|
86
|
+
if char && characters_set.include?(char)
|
75
87
|
Success.new(char, bytesize)
|
76
88
|
else
|
77
89
|
advanced = state.input.offset
|
78
90
|
state.input.rollback
|
79
91
|
Failure.new(
|
80
|
-
"Expected one of #{
|
92
|
+
-> { "Expected one of #{characters_set.to_a.inspect} but got #{char.inspect}" },
|
81
93
|
advanced,
|
82
94
|
state
|
83
95
|
)
|
@@ -90,18 +102,18 @@ module Dolos
|
|
90
102
|
state.input.mark_offset
|
91
103
|
|
92
104
|
buffer = String.new
|
93
|
-
|
94
|
-
char, bytesize = state.input.peek(1)
|
95
|
-
break if char.nil? || !predicate.call(char)
|
105
|
+
char, bytesize = state.input.peek(1)
|
96
106
|
|
107
|
+
while char && predicate.call(char)
|
97
108
|
buffer << char
|
98
109
|
state.input.advance(bytesize)
|
110
|
+
char, bytesize = state.input.peek(1)
|
99
111
|
end
|
100
112
|
|
101
113
|
if buffer.empty?
|
102
114
|
advanced = state.input.offset
|
103
115
|
Failure.new(
|
104
|
-
"Predicate never returned true",
|
116
|
+
-> { "Predicate never returned true" },
|
105
117
|
advanced,
|
106
118
|
state
|
107
119
|
)
|
@@ -111,7 +123,6 @@ module Dolos
|
|
111
123
|
end
|
112
124
|
end
|
113
125
|
|
114
|
-
# Unstable API
|
115
126
|
def recursive(&block)
|
116
127
|
recursive_parser = nil
|
117
128
|
|
@@ -120,7 +131,7 @@ module Dolos
|
|
120
131
|
|
121
132
|
recursive_parser.call.run_with_state(state).tap do |result|
|
122
133
|
if result.failure?
|
123
|
-
error_msg = "Error in recursive structure around position #{state.input.offset}: #{result.message}"
|
134
|
+
error_msg = -> { "Error in recursive structure around position #{state.input.offset}: #{result.message}" }
|
124
135
|
Failure.new(error_msg, state.input.offset, state)
|
125
136
|
end
|
126
137
|
end
|
@@ -130,7 +141,5 @@ module Dolos
|
|
130
141
|
placeholder
|
131
142
|
end
|
132
143
|
|
133
|
-
|
134
|
-
|
135
144
|
end
|
136
145
|
end
|
data/lib/dolos/result.rb
CHANGED
@@ -55,12 +55,21 @@ module Dolos
|
|
55
55
|
end
|
56
56
|
|
57
57
|
class Failure < Result
|
58
|
-
attr_reader
|
58
|
+
attr_reader :error_position, :state
|
59
59
|
|
60
|
-
def initialize(
|
61
|
-
@
|
60
|
+
def initialize(message_proc, error_position, state)
|
61
|
+
@message_proc = message_proc
|
62
62
|
@error_position = error_position
|
63
63
|
@state = state
|
64
|
+
@message_evaluated = false
|
65
|
+
end
|
66
|
+
|
67
|
+
def message
|
68
|
+
unless @message_evaluated
|
69
|
+
@message_value = @message_proc.call
|
70
|
+
@message_evaluated = true
|
71
|
+
end
|
72
|
+
@message_value
|
64
73
|
end
|
65
74
|
|
66
75
|
def inspect
|
@@ -22,12 +22,7 @@ module Dolos
|
|
22
22
|
|
23
23
|
def matches?(utf8_str)
|
24
24
|
read = io.read(utf8_str.bytesize)
|
25
|
-
|
26
|
-
if read.nil?
|
27
|
-
false
|
28
|
-
else
|
29
|
-
read.force_encoding('UTF-8') == utf8_str
|
30
|
-
end
|
25
|
+
!read.nil? && read.force_encoding('UTF-8') == utf8_str
|
31
26
|
end
|
32
27
|
|
33
28
|
def advance(bytesize)
|
@@ -61,8 +56,8 @@ module Dolos
|
|
61
56
|
remaining_data = io.read
|
62
57
|
io.seek(current_position)
|
63
58
|
|
64
|
-
if
|
65
|
-
matched_string =
|
59
|
+
if remaining_data =~ /\A#{pattern}/
|
60
|
+
matched_string = $&
|
66
61
|
io.seek(current_position + matched_string.bytesize)
|
67
62
|
return matched_string
|
68
63
|
end
|
data/lib/dolos/version.rb
CHANGED
data/lib/dolos.rb
CHANGED
@@ -10,80 +10,76 @@ module Dolos
|
|
10
10
|
include Parsers
|
11
11
|
|
12
12
|
class Parser
|
13
|
-
|
14
13
|
attr_accessor :parser_proc
|
15
|
-
|
16
14
|
def initialize(&block)
|
17
15
|
@parser_proc = block
|
18
16
|
end
|
19
17
|
|
18
|
+
# Run the parser with the given input
|
19
|
+
# Returns a Result<Success|Failure>
|
20
|
+
# string("hello").run("hello") => Success.new("hello", 5)
|
20
21
|
def run(input)
|
21
22
|
run_with_state(ParserState.new(input))
|
22
23
|
end
|
23
24
|
|
25
|
+
|
24
26
|
def run_with_state(state)
|
25
|
-
result = parser_proc.call(state)
|
26
|
-
if result.success?
|
27
|
-
state.last_success_position = state.input.offset
|
28
|
-
end
|
27
|
+
result = @parser_proc.call(state)
|
28
|
+
state.last_success_position = state.input.offset if result.success?
|
29
29
|
result
|
30
30
|
end
|
31
31
|
|
32
|
+
# Capture the result of the parser
|
33
|
+
# p = string("hello").capture!
|
34
|
+
# p.run("hello").captures => ["hello"]
|
35
|
+
# Captures is a flat array of all captured values
|
32
36
|
def capture!(wrap_in = nil)
|
33
37
|
Parser.new do |state|
|
34
38
|
result = run_with_state(state)
|
35
|
-
|
36
|
-
result.capture!(wrap_in)
|
37
|
-
else
|
38
|
-
result
|
39
|
-
end
|
39
|
+
result.success? ? result.capture!(wrap_in) : result
|
40
40
|
end
|
41
41
|
end
|
42
42
|
|
43
|
-
#
|
43
|
+
# Map the captures of the parser
|
44
|
+
# p = string("hello").map_captures { |captures| captures.map(&:upcase) }
|
45
|
+
# p.run("hello") => Success.new("hello", 5, ["HELLO"])
|
46
|
+
# This only maps over captures, not the value
|
44
47
|
def map_captures(&block)
|
45
48
|
Parser.new do |state|
|
46
49
|
result = run_with_state(state)
|
47
|
-
|
48
|
-
Success.new(result.value, result.length, block.call(result.captures))
|
49
|
-
else
|
50
|
-
result
|
51
|
-
end
|
50
|
+
result.success? ? Success.new(result.value, result.length, block.call(result.captures)) : result
|
52
51
|
end
|
53
52
|
end
|
54
53
|
|
55
|
-
#
|
54
|
+
# Map the result of the parser
|
55
|
+
# p = string("hello").map { |s| s.upcase }
|
56
|
+
# p.run("hello") => Success.new("HELLO", 5)
|
56
57
|
def map(&block)
|
57
58
|
Parser.new do |state|
|
58
59
|
result = run_with_state(state)
|
59
|
-
|
60
|
-
Success.new(block.call(result.value), result.length, result.captures)
|
61
|
-
else
|
62
|
-
result
|
63
|
-
end
|
60
|
+
result.success? ? Success.new(block.call(result.value), result.length, result.captures) : result
|
64
61
|
end
|
65
62
|
end
|
66
63
|
|
64
|
+
# Combine the result of the parser with another parser
|
67
65
|
def combine(&block)
|
68
66
|
Parser.new do |state|
|
69
67
|
result = run_with_state(state)
|
68
|
+
|
70
69
|
if result.success?
|
70
|
+
state.input.advance(result.length)
|
71
71
|
new_parser = block.call(result.value, result.captures)
|
72
|
-
|
73
|
-
new_state.input.advance(result.length)
|
74
|
-
new_parser.run_with_state(new_state)
|
72
|
+
new_parser.run_with_state(state)
|
75
73
|
else
|
76
74
|
result
|
77
75
|
end
|
78
76
|
end
|
79
77
|
end
|
80
78
|
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
end
|
86
|
-
|
79
|
+
# Combine the result of the parser with another parser
|
80
|
+
# Has an alias of `&`
|
81
|
+
# p = string("hello") & string("world")
|
82
|
+
# p.run("helloworld") => Success.new(["hello", "world"], 10)
|
87
83
|
def product(other_parser)
|
88
84
|
combine do |value1, capture1|
|
89
85
|
other_parser.map do |value2|
|
@@ -95,6 +91,10 @@ module Dolos
|
|
95
91
|
end
|
96
92
|
alias_method :&, :product
|
97
93
|
|
94
|
+
|
95
|
+
# Combine the result of the parser with another parser
|
96
|
+
# Discards the result of the second parser
|
97
|
+
# p = string("hello") << string("world")
|
98
98
|
def product_l(other_parser)
|
99
99
|
combine do |value1, capture1|
|
100
100
|
other_parser.map do |_|
|
@@ -105,6 +105,9 @@ module Dolos
|
|
105
105
|
end
|
106
106
|
end
|
107
107
|
|
108
|
+
# Combine the result of the parser with another parser
|
109
|
+
# Discards the result of the first parser
|
110
|
+
# p = string("hello") >> string("world")
|
108
111
|
def product_r(other_parser)
|
109
112
|
combine do |_, capture1|
|
110
113
|
other_parser.map do |value2|
|
@@ -118,6 +121,10 @@ module Dolos
|
|
118
121
|
alias_method :<<, :product_l
|
119
122
|
alias_method :>>, :product_r
|
120
123
|
|
124
|
+
# Combine the result of the parser with another parser
|
125
|
+
# If the first parser fails, it will try the second parser
|
126
|
+
# p = string("hello") | string("world") | string("!")
|
127
|
+
# p.run("hello") => Success.new("hello", 5)
|
121
128
|
def choice(other_parser)
|
122
129
|
Parser.new do |state|
|
123
130
|
result = run_with_state(state)
|
@@ -130,6 +137,9 @@ module Dolos
|
|
130
137
|
end
|
131
138
|
alias_method :|, :choice
|
132
139
|
|
140
|
+
|
141
|
+
# Repeat the parser n times
|
142
|
+
# Separator is optional, its another parser that will be run between each repetition
|
133
143
|
# rep0 # 0 or more
|
134
144
|
# rep # 1 or more
|
135
145
|
# rep(n = 2) # exactly 2
|
@@ -140,10 +150,9 @@ module Dolos
|
|
140
150
|
values = []
|
141
151
|
captures = []
|
142
152
|
count = 0
|
143
|
-
state.input.mark_offset
|
144
153
|
|
145
154
|
loop do
|
146
|
-
result = run_with_state(state.dup
|
155
|
+
result = run_with_state(state) # Removing .dup for performance. Be cautious of side effects.
|
147
156
|
|
148
157
|
if result.failure? || count >= n_max
|
149
158
|
break
|
@@ -155,7 +164,7 @@ module Dolos
|
|
155
164
|
count += 1
|
156
165
|
|
157
166
|
if separator && count < n_max
|
158
|
-
sep_result = separator.run_with_state(state.dup
|
167
|
+
sep_result = separator.run_with_state(state) # Removing .dup for performance. Be cautious of side effects.
|
159
168
|
break if sep_result.failure?
|
160
169
|
|
161
170
|
state.input.advance(sep_result.length)
|
@@ -163,10 +172,9 @@ module Dolos
|
|
163
172
|
end
|
164
173
|
|
165
174
|
if count < n_min
|
166
|
-
error_pos = state.input.offset
|
167
175
|
Failure.new(
|
168
|
-
"Expected parser to match at least #{n_min} times but matched only #{count} times",
|
169
|
-
|
176
|
+
-> { "Expected parser to match at least #{n_min} times but matched only #{count} times" },
|
177
|
+
state.input.offset,
|
170
178
|
state
|
171
179
|
)
|
172
180
|
else
|
@@ -175,11 +183,16 @@ module Dolos
|
|
175
183
|
end
|
176
184
|
end
|
177
185
|
|
186
|
+
# Repeat the parser zero or more times
|
187
|
+
# c(" ").rep0.run(" ") => Success.new([" ", " ", " "], 3)
|
178
188
|
def zero_or_more
|
179
189
|
repeat(n_min: 0, n_max: Float::INFINITY)
|
180
190
|
end
|
181
191
|
alias_method :rep0, :zero_or_more
|
182
192
|
|
193
|
+
# Repeat the parser one or more times
|
194
|
+
# Same as rep0, but must match at least once
|
195
|
+
# c(" ").rep.run("A") => Failure.new("...")
|
183
196
|
def one_or_more(exactly = nil)
|
184
197
|
if exactly.nil?
|
185
198
|
repeat(n_min: 1, n_max: Float::INFINITY)
|
@@ -189,6 +202,8 @@ module Dolos
|
|
189
202
|
end
|
190
203
|
alias_method :rep, :one_or_more
|
191
204
|
|
205
|
+
# Make parser optional
|
206
|
+
# c(" ").opt.run("A") => Success.new([], 0)
|
192
207
|
def optional
|
193
208
|
Parser.new do |state|
|
194
209
|
result = run_with_state(state.dup)
|
@@ -201,7 +216,6 @@ module Dolos
|
|
201
216
|
end
|
202
217
|
alias_method :opt, :optional
|
203
218
|
|
204
|
-
# Unstable API
|
205
219
|
# Used to declare lazy parser to avoid infinite loops in recursive parsers
|
206
220
|
def lazy
|
207
221
|
parser_memo = nil
|
@@ -212,11 +226,5 @@ module Dolos
|
|
212
226
|
end
|
213
227
|
end
|
214
228
|
|
215
|
-
private
|
216
|
-
|
217
|
-
def combine_and_discard_empty(*arrays)
|
218
|
-
arrays.compact.reject { |arr| arr.is_a?(Array) && arr.empty? }
|
219
|
-
end
|
220
|
-
|
221
229
|
end
|
222
230
|
end
|