dolos 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +2 -0
- data/README.md +20 -8
- data/benchmarks/json/json.rb +31 -8
- data/benchmarks/json/nested_json_1m.json +20557 -0
- data/benchmarks/letter.rb +78 -0
- data/docs/.nojekyll +0 -0
- data/docs/README.md +22 -0
- data/docs/_sidebar.md +4 -0
- data/docs/getting_started.md +52 -0
- data/docs/index.html +26 -0
- data/examples/letter.rb +3 -3
- data/lib/dolos/common.rb +44 -0
- data/lib/dolos/parsers.rb +25 -16
- data/lib/dolos/result.rb +12 -3
- data/lib/dolos/string_io_wrapper.rb +3 -8
- data/lib/dolos/version.rb +1 -1
- data/lib/dolos.rb +53 -45
- data/sig/dolos/common_parsers.rbs +11 -0
- data/sig/dolos/parser.rbs +6 -2
- data/sig/dolos/parser_state.rbs +1 -1
- data/sig/dolos/parsers.rbs +4 -0
- data/sig/dolos/result.rbs +7 -0
- metadata +13 -5
- data/lib/dolos_common_parsers/common_parsers.rb +0 -34
- /data/benchmarks/json/{nested_json.json → nested_json_166.json} +0 -0
- /data/docs/{dolos_stable_diff.png → images/dolos_stable_diff.png} +0 -0
@@ -0,0 +1,78 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
require 'bundler/setup'
|
3
|
+
require 'dolos'
|
4
|
+
require 'dolos_common_parsers/common_parsers'
|
5
|
+
require 'benchmark/ips'
|
6
|
+
|
7
|
+
include Dolos
|
8
|
+
|
9
|
+
# Include common parsers
|
10
|
+
# In future this can be more structured, moved them to separate module to prevent breaking changes
|
11
|
+
include Dolos::CommonParsers
|
12
|
+
|
13
|
+
# Library usage example
|
14
|
+
# Parse out a name and address from a letter
|
15
|
+
# For higher difficulty, we will not split this into multiple lines, but instead parse it all at once
|
16
|
+
letter = <<-LETTER
|
17
|
+
Mr. Vardeniui Pavardeniui
|
18
|
+
AB „Lietuvos Paštas“
|
19
|
+
Totorių g. 8
|
20
|
+
01121 Vilnius
|
21
|
+
LETTER
|
22
|
+
|
23
|
+
# Combine with 'or'
|
24
|
+
honorific = c("Mr. ") | c("Mrs. ") | c("Ms. ")
|
25
|
+
|
26
|
+
# Can be parsed any_char which will include needed letters
|
27
|
+
# Or combine LT letters with latin alphabet
|
28
|
+
alpha_with_lt = char_in("ąčęėįšųūžĄČĘĖĮŠŲŪŽ") | alpha
|
29
|
+
|
30
|
+
# Capture all letters in a row and join them,
|
31
|
+
# because they are captured as elements of array by each alpha_with_lt parser.
|
32
|
+
first_name = alpha_with_lt.rep.map(&:join).capture!
|
33
|
+
last_name = alpha_with_lt.rep.map(&:join).capture!
|
34
|
+
|
35
|
+
# Combine first line parsers
|
36
|
+
# Consume zero or more whitespace, after that honorific must follow and so on
|
37
|
+
name_line = ws_rep0 & honorific & first_name & ws & last_name & eol
|
38
|
+
|
39
|
+
# Next line is company info
|
40
|
+
# We could choose to accept UAB and AB or just AB and etc.
|
41
|
+
# 'c("AB")' is for case-sensitive string. 'string' can also be used
|
42
|
+
company_type = c("AB")
|
43
|
+
quote_open = c("„")
|
44
|
+
quote_close = c("“")
|
45
|
+
|
46
|
+
# Consume LT alphabet with whitespace
|
47
|
+
company_name = (alpha_with_lt | ws).rep.map(&:join).capture!
|
48
|
+
company_info = company_type & ws_rep0 & quote_open & company_name & quote_close
|
49
|
+
second_line = ws_rep0 & company_info & eol
|
50
|
+
|
51
|
+
# Address line
|
52
|
+
# 'char_while' will consume characters while passed predicate is true
|
53
|
+
# This could be an alternative to previous 'alpha_with_lt' approach
|
54
|
+
# After that result is captured and mapped to hash
|
55
|
+
# Mapping to hash so at the end its easy to tell tuples apart
|
56
|
+
# Also while mapping, doing some cleaning with '.strip'
|
57
|
+
street_name = char_while(->(char) { !char.match(/\d/) }).map { |s| { street: s.strip } }.capture!
|
58
|
+
building = digits.map { |s| { building: s.strip } }.capture!
|
59
|
+
address_line = ws_rep0 & street_name & building & eol
|
60
|
+
|
61
|
+
# City line
|
62
|
+
# All digits can be matched here or 'digits.rep(5)' could be used. Also joining with map.
|
63
|
+
postcode = digits.map { |s| { postcode: s.strip } }.capture!
|
64
|
+
city = alpha_with_lt.rep.map(&:join).map { |s| { city: s.strip } }.capture!
|
65
|
+
city_line = ws_rep0 & postcode & ws & city & eol
|
66
|
+
|
67
|
+
# Full letter parser which is combined from all previous parsers. All previous parsers can be ran separately.
|
68
|
+
letter_parser = name_line & second_line & address_line & city_line
|
69
|
+
result = letter_parser.run(letter)
|
70
|
+
|
71
|
+
puts result.success?
|
72
|
+
|
73
|
+
Benchmark.ips do |x|
|
74
|
+
x.report('letter benchmark') do
|
75
|
+
letter_parser.run(letter)
|
76
|
+
end
|
77
|
+
x.compare!
|
78
|
+
end
|
data/docs/.nojekyll
ADDED
File without changes
|
data/docs/README.md
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
# Dolos
|
2
|
+
|
3
|
+
## What is Dolos?
|
4
|
+
Dolos is parser combinator library for Ruby. It is inspired by FastParse and Scala Parser Combinators.
|
5
|
+
|
6
|
+
## What are parser combinators?
|
7
|
+
Parser combinators are a way to build parsers from smaller parsers. For example, you can build a parser for a number from a parser for a digit.
|
8
|
+
This is a very simple example, but it can be used to build more complex parsers.
|
9
|
+
Parsers are lazy and only run when needed. This allows to build complex parsers before passing input to them.
|
10
|
+
```ruby
|
11
|
+
hello = string("Hello")
|
12
|
+
greeting = hello >> c(" ") >> string("Ruby developer!")
|
13
|
+
greeting.run("Hello Ruby developer!") # => Success
|
14
|
+
```
|
15
|
+
|
16
|
+
## What's different from alternatives?
|
17
|
+
This library focuses on two things:
|
18
|
+
- Parsers integrate well into Ruby code. There is no need to keep them in separate classes.
|
19
|
+
- Fine grained control over parsers. You can `map` and adjust each parser separately
|
20
|
+
- Two ways of capturing values: traditional `>>`, other product operators to construct value and `capture!`
|
21
|
+
- For simple parsers `capture!` can be used to very quickly capture values into flat arrays
|
22
|
+
- Running parsers will not throw exceptions and instead return a result object. Exceptions don't play well with parsing.
|
data/docs/_sidebar.md
ADDED
@@ -0,0 +1,52 @@
|
|
1
|
+
# Getting started
|
2
|
+
|
3
|
+
## Installation
|
4
|
+
|
5
|
+
Install the gem and add it to your Gemfile:
|
6
|
+
```shell
|
7
|
+
$ bundle add dolos
|
8
|
+
```
|
9
|
+
Or manually:
|
10
|
+
```ruby
|
11
|
+
gem 'dolos'
|
12
|
+
```
|
13
|
+
|
14
|
+
## Usage
|
15
|
+
|
16
|
+
Two things to do:
|
17
|
+
- require library
|
18
|
+
- include module `Dolos` and `Dolos::Common`
|
19
|
+
|
20
|
+
```ruby
|
21
|
+
require 'dolos'
|
22
|
+
|
23
|
+
include Dolos
|
24
|
+
include Dolos::Common # Common parsers
|
25
|
+
```
|
26
|
+
|
27
|
+
### Basic parsers
|
28
|
+
|
29
|
+
A simple parser which matches one word.
|
30
|
+
|
31
|
+
```ruby
|
32
|
+
require 'dolos'
|
33
|
+
include Dolos
|
34
|
+
|
35
|
+
hello = c("Hello") # c("") is an alias for string(""). Can be read as: case-sensitive string match
|
36
|
+
|
37
|
+
hello.run("Hello").success? # => true
|
38
|
+
|
39
|
+
hello.run("hello").success? # => failure
|
40
|
+
```
|
41
|
+
|
42
|
+
After defining parser, it can be ran with `run('my-input')` method. It returns a `Result` object.
|
43
|
+
|
44
|
+
### Result
|
45
|
+
|
46
|
+
Result can be either `Success` or `Failure`. It can be checked with `success?` or `failure?` methods.
|
47
|
+
|
48
|
+
Success will also have `value` property which will contain the result of the parser. There is also `captures`, but
|
49
|
+
that's for later.
|
50
|
+
|
51
|
+
|
52
|
+
Failure will have `inspect` method which will return a string with the error message. It will show error position as well.
|
data/docs/index.html
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
<!DOCTYPE html>
|
2
|
+
<html lang="en">
|
3
|
+
<head>
|
4
|
+
<meta charset="UTF-8">
|
5
|
+
<title>Document</title>
|
6
|
+
<meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1" />
|
7
|
+
<meta name="description" content="Description">
|
8
|
+
<meta name="viewport" content="width=device-width, initial-scale=1.0, minimum-scale=1.0">
|
9
|
+
<link rel="stylesheet" href="//cdn.jsdelivr.net/npm/docsify@4/lib/themes/vue.css">
|
10
|
+
</head>
|
11
|
+
<body>
|
12
|
+
<div id="app"></div>
|
13
|
+
<script>
|
14
|
+
window.$docsify = {
|
15
|
+
name: '',
|
16
|
+
repo: ''
|
17
|
+
}
|
18
|
+
window.$docsify = {
|
19
|
+
loadSidebar: true
|
20
|
+
}
|
21
|
+
</script>
|
22
|
+
<!-- Docsify v4 -->
|
23
|
+
<script src="//cdn.jsdelivr.net/npm/docsify@4"></script>
|
24
|
+
<script src="//cdn.jsdelivr.net/npm/prismjs@1/components/prism-ruby.min.js"></script>
|
25
|
+
</body>
|
26
|
+
</html>
|
data/examples/letter.rb
CHANGED
@@ -1,12 +1,12 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
-
|
3
|
-
|
2
|
+
require 'dolos'
|
3
|
+
require 'dolos_common_parsers/common_parsers'
|
4
4
|
|
5
5
|
include Dolos
|
6
6
|
|
7
7
|
# Include common parsers
|
8
8
|
# In future this can be more structured, moved them to separate module to prevent breaking changes
|
9
|
-
include Dolos::
|
9
|
+
include Dolos::Common
|
10
10
|
|
11
11
|
# Library usage example
|
12
12
|
# Parse out a name and address from a letter
|
data/lib/dolos/common.rb
ADDED
@@ -0,0 +1,44 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Dolos
|
4
|
+
# Common parsers
|
5
|
+
# Separated from the main library to improve them later on
|
6
|
+
# These will change, new ones will be added. Once API stabilises, we will see what to do
|
7
|
+
# We have to be careful what is in the scope when we include this main module
|
8
|
+
# Probably a package of parsers following some RFC will be added as well.
|
9
|
+
# Keeping them separate for now
|
10
|
+
module Common
|
11
|
+
def ws
|
12
|
+
regex(/\s/)
|
13
|
+
end
|
14
|
+
|
15
|
+
def ws_rep0
|
16
|
+
regex(/\s*/)
|
17
|
+
end
|
18
|
+
|
19
|
+
def eol
|
20
|
+
regex(/\n|\r\n|\r/)
|
21
|
+
end
|
22
|
+
|
23
|
+
def digit
|
24
|
+
regex(/\d/)
|
25
|
+
end
|
26
|
+
|
27
|
+
def int
|
28
|
+
digit.map(&:to_i)
|
29
|
+
end
|
30
|
+
|
31
|
+
# Capture as string
|
32
|
+
def digits
|
33
|
+
regex(/\d+/)
|
34
|
+
end
|
35
|
+
|
36
|
+
def alpha_num
|
37
|
+
regex(/[a-zA-Z0-9]/)
|
38
|
+
end
|
39
|
+
|
40
|
+
def alpha
|
41
|
+
regex(/[a-zA-Z]/)
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
data/lib/dolos/parsers.rb
CHANGED
@@ -2,10 +2,16 @@
|
|
2
2
|
|
3
3
|
module Dolos
|
4
4
|
module Parsers
|
5
|
+
|
6
|
+
# String parser
|
7
|
+
# Matches exactly the given string
|
8
|
+
# string('hello').run('hello') => Success.new('hello', 5)
|
9
|
+
# Alias: c, for case-sensitive. Ex: c('hello').run('hello') => Success.new('hello', 5)
|
5
10
|
def string(str)
|
11
|
+
utf8_str = str.encode('UTF-8')
|
12
|
+
|
6
13
|
Parser.new do |state|
|
7
14
|
state.input.mark_offset
|
8
|
-
utf8_str = str.encode('UTF-8')
|
9
15
|
if state.input.matches?(utf8_str)
|
10
16
|
Success.new(utf8_str, str.bytesize)
|
11
17
|
else
|
@@ -13,16 +19,19 @@ module Dolos
|
|
13
19
|
got_error = state.input.io.string.byteslice(state.input.backup, advanced)
|
14
20
|
state.input.rollback
|
15
21
|
Failure.new(
|
16
|
-
"Expected #{str.inspect} but got #{got_error.inspect}",
|
22
|
+
-> { "Expected #{str.inspect} but got #{got_error.inspect}" },
|
17
23
|
advanced,
|
18
24
|
state
|
19
25
|
)
|
20
26
|
end
|
21
27
|
end
|
22
28
|
end
|
23
|
-
|
24
29
|
alias_method :c, :string
|
25
30
|
|
31
|
+
# Regex parser
|
32
|
+
# Accepts a regex, matches the regex against the input
|
33
|
+
# parser = regex(/\d+/)
|
34
|
+
# result = parser.run('123') # => Success.new('123', 3)
|
26
35
|
def regex(pattern)
|
27
36
|
Parser.new do |state|
|
28
37
|
state.input.mark_offset
|
@@ -32,7 +41,7 @@ module Dolos
|
|
32
41
|
advanced = state.input.offset
|
33
42
|
state.input.rollback
|
34
43
|
Failure.new(
|
35
|
-
"Expected pattern #{pattern.inspect} but got #{state.input.io.string.inspect}",
|
44
|
+
-> { "Expected pattern #{pattern.inspect} but got #{state.input.io.string.inspect}" },
|
36
45
|
advanced,
|
37
46
|
state
|
38
47
|
)
|
@@ -40,6 +49,8 @@ module Dolos
|
|
40
49
|
end
|
41
50
|
end
|
42
51
|
|
52
|
+
# Matches any character
|
53
|
+
# any_char.run('a') # => Success.new('a', 1)
|
43
54
|
def any_char
|
44
55
|
Parser.new do |state|
|
45
56
|
state.input.mark_offset
|
@@ -52,7 +63,7 @@ module Dolos
|
|
52
63
|
advanced = state.input.offset
|
53
64
|
state.input.rollback
|
54
65
|
Failure.new(
|
55
|
-
'Expected any character but got end of input',
|
66
|
+
-> { 'Expected any character but got end of input' },
|
56
67
|
advanced,
|
57
68
|
state
|
58
69
|
)
|
@@ -61,23 +72,24 @@ module Dolos
|
|
61
72
|
end
|
62
73
|
|
63
74
|
# Matches any character in a string
|
75
|
+
# Passed string can be imagined as a set of characters
|
64
76
|
# Example:
|
65
77
|
# char_in('abc').run('b') # => Success.new('b', 1)
|
66
78
|
def char_in(characters_string)
|
67
|
-
|
79
|
+
characters_set = characters_string.chars
|
68
80
|
|
69
81
|
Parser.new do |state|
|
70
82
|
state.input.mark_offset
|
71
83
|
|
72
84
|
char, bytesize = state.input.peek(1)
|
73
85
|
|
74
|
-
if char &&
|
86
|
+
if char && characters_set.include?(char)
|
75
87
|
Success.new(char, bytesize)
|
76
88
|
else
|
77
89
|
advanced = state.input.offset
|
78
90
|
state.input.rollback
|
79
91
|
Failure.new(
|
80
|
-
"Expected one of #{
|
92
|
+
-> { "Expected one of #{characters_set.to_a.inspect} but got #{char.inspect}" },
|
81
93
|
advanced,
|
82
94
|
state
|
83
95
|
)
|
@@ -90,18 +102,18 @@ module Dolos
|
|
90
102
|
state.input.mark_offset
|
91
103
|
|
92
104
|
buffer = String.new
|
93
|
-
|
94
|
-
char, bytesize = state.input.peek(1)
|
95
|
-
break if char.nil? || !predicate.call(char)
|
105
|
+
char, bytesize = state.input.peek(1)
|
96
106
|
|
107
|
+
while char && predicate.call(char)
|
97
108
|
buffer << char
|
98
109
|
state.input.advance(bytesize)
|
110
|
+
char, bytesize = state.input.peek(1)
|
99
111
|
end
|
100
112
|
|
101
113
|
if buffer.empty?
|
102
114
|
advanced = state.input.offset
|
103
115
|
Failure.new(
|
104
|
-
"Predicate never returned true",
|
116
|
+
-> { "Predicate never returned true" },
|
105
117
|
advanced,
|
106
118
|
state
|
107
119
|
)
|
@@ -111,7 +123,6 @@ module Dolos
|
|
111
123
|
end
|
112
124
|
end
|
113
125
|
|
114
|
-
# Unstable API
|
115
126
|
def recursive(&block)
|
116
127
|
recursive_parser = nil
|
117
128
|
|
@@ -120,7 +131,7 @@ module Dolos
|
|
120
131
|
|
121
132
|
recursive_parser.call.run_with_state(state).tap do |result|
|
122
133
|
if result.failure?
|
123
|
-
error_msg = "Error in recursive structure around position #{state.input.offset}: #{result.message}"
|
134
|
+
error_msg = -> { "Error in recursive structure around position #{state.input.offset}: #{result.message}" }
|
124
135
|
Failure.new(error_msg, state.input.offset, state)
|
125
136
|
end
|
126
137
|
end
|
@@ -130,7 +141,5 @@ module Dolos
|
|
130
141
|
placeholder
|
131
142
|
end
|
132
143
|
|
133
|
-
|
134
|
-
|
135
144
|
end
|
136
145
|
end
|
data/lib/dolos/result.rb
CHANGED
@@ -55,12 +55,21 @@ module Dolos
|
|
55
55
|
end
|
56
56
|
|
57
57
|
class Failure < Result
|
58
|
-
attr_reader
|
58
|
+
attr_reader :error_position, :state
|
59
59
|
|
60
|
-
def initialize(
|
61
|
-
@
|
60
|
+
def initialize(message_proc, error_position, state)
|
61
|
+
@message_proc = message_proc
|
62
62
|
@error_position = error_position
|
63
63
|
@state = state
|
64
|
+
@message_evaluated = false
|
65
|
+
end
|
66
|
+
|
67
|
+
def message
|
68
|
+
unless @message_evaluated
|
69
|
+
@message_value = @message_proc.call
|
70
|
+
@message_evaluated = true
|
71
|
+
end
|
72
|
+
@message_value
|
64
73
|
end
|
65
74
|
|
66
75
|
def inspect
|
@@ -22,12 +22,7 @@ module Dolos
|
|
22
22
|
|
23
23
|
def matches?(utf8_str)
|
24
24
|
read = io.read(utf8_str.bytesize)
|
25
|
-
|
26
|
-
if read.nil?
|
27
|
-
false
|
28
|
-
else
|
29
|
-
read.force_encoding('UTF-8') == utf8_str
|
30
|
-
end
|
25
|
+
!read.nil? && read.force_encoding('UTF-8') == utf8_str
|
31
26
|
end
|
32
27
|
|
33
28
|
def advance(bytesize)
|
@@ -61,8 +56,8 @@ module Dolos
|
|
61
56
|
remaining_data = io.read
|
62
57
|
io.seek(current_position)
|
63
58
|
|
64
|
-
if
|
65
|
-
matched_string =
|
59
|
+
if remaining_data =~ /\A#{pattern}/
|
60
|
+
matched_string = $&
|
66
61
|
io.seek(current_position + matched_string.bytesize)
|
67
62
|
return matched_string
|
68
63
|
end
|
data/lib/dolos/version.rb
CHANGED
data/lib/dolos.rb
CHANGED
@@ -10,80 +10,76 @@ module Dolos
|
|
10
10
|
include Parsers
|
11
11
|
|
12
12
|
class Parser
|
13
|
-
|
14
13
|
attr_accessor :parser_proc
|
15
|
-
|
16
14
|
def initialize(&block)
|
17
15
|
@parser_proc = block
|
18
16
|
end
|
19
17
|
|
18
|
+
# Run the parser with the given input
|
19
|
+
# Returns a Result<Success|Failure>
|
20
|
+
# string("hello").run("hello") => Success.new("hello", 5)
|
20
21
|
def run(input)
|
21
22
|
run_with_state(ParserState.new(input))
|
22
23
|
end
|
23
24
|
|
25
|
+
|
24
26
|
def run_with_state(state)
|
25
|
-
result = parser_proc.call(state)
|
26
|
-
if result.success?
|
27
|
-
state.last_success_position = state.input.offset
|
28
|
-
end
|
27
|
+
result = @parser_proc.call(state)
|
28
|
+
state.last_success_position = state.input.offset if result.success?
|
29
29
|
result
|
30
30
|
end
|
31
31
|
|
32
|
+
# Capture the result of the parser
|
33
|
+
# p = string("hello").capture!
|
34
|
+
# p.run("hello").captures => ["hello"]
|
35
|
+
# Captures is a flat array of all captured values
|
32
36
|
def capture!(wrap_in = nil)
|
33
37
|
Parser.new do |state|
|
34
38
|
result = run_with_state(state)
|
35
|
-
|
36
|
-
result.capture!(wrap_in)
|
37
|
-
else
|
38
|
-
result
|
39
|
-
end
|
39
|
+
result.success? ? result.capture!(wrap_in) : result
|
40
40
|
end
|
41
41
|
end
|
42
42
|
|
43
|
-
#
|
43
|
+
# Map the captures of the parser
|
44
|
+
# p = string("hello").map_captures { |captures| captures.map(&:upcase) }
|
45
|
+
# p.run("hello") => Success.new("hello", 5, ["HELLO"])
|
46
|
+
# This only maps over captures, not the value
|
44
47
|
def map_captures(&block)
|
45
48
|
Parser.new do |state|
|
46
49
|
result = run_with_state(state)
|
47
|
-
|
48
|
-
Success.new(result.value, result.length, block.call(result.captures))
|
49
|
-
else
|
50
|
-
result
|
51
|
-
end
|
50
|
+
result.success? ? Success.new(result.value, result.length, block.call(result.captures)) : result
|
52
51
|
end
|
53
52
|
end
|
54
53
|
|
55
|
-
#
|
54
|
+
# Map the result of the parser
|
55
|
+
# p = string("hello").map { |s| s.upcase }
|
56
|
+
# p.run("hello") => Success.new("HELLO", 5)
|
56
57
|
def map(&block)
|
57
58
|
Parser.new do |state|
|
58
59
|
result = run_with_state(state)
|
59
|
-
|
60
|
-
Success.new(block.call(result.value), result.length, result.captures)
|
61
|
-
else
|
62
|
-
result
|
63
|
-
end
|
60
|
+
result.success? ? Success.new(block.call(result.value), result.length, result.captures) : result
|
64
61
|
end
|
65
62
|
end
|
66
63
|
|
64
|
+
# Combine the result of the parser with another parser
|
67
65
|
def combine(&block)
|
68
66
|
Parser.new do |state|
|
69
67
|
result = run_with_state(state)
|
68
|
+
|
70
69
|
if result.success?
|
70
|
+
state.input.advance(result.length)
|
71
71
|
new_parser = block.call(result.value, result.captures)
|
72
|
-
|
73
|
-
new_state.input.advance(result.length)
|
74
|
-
new_parser.run_with_state(new_state)
|
72
|
+
new_parser.run_with_state(state)
|
75
73
|
else
|
76
74
|
result
|
77
75
|
end
|
78
76
|
end
|
79
77
|
end
|
80
78
|
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
end
|
86
|
-
|
79
|
+
# Combine the result of the parser with another parser
|
80
|
+
# Has an alias of `&`
|
81
|
+
# p = string("hello") & string("world")
|
82
|
+
# p.run("helloworld") => Success.new(["hello", "world"], 10)
|
87
83
|
def product(other_parser)
|
88
84
|
combine do |value1, capture1|
|
89
85
|
other_parser.map do |value2|
|
@@ -95,6 +91,10 @@ module Dolos
|
|
95
91
|
end
|
96
92
|
alias_method :&, :product
|
97
93
|
|
94
|
+
|
95
|
+
# Combine the result of the parser with another parser
|
96
|
+
# Discards the result of the second parser
|
97
|
+
# p = string("hello") << string("world")
|
98
98
|
def product_l(other_parser)
|
99
99
|
combine do |value1, capture1|
|
100
100
|
other_parser.map do |_|
|
@@ -105,6 +105,9 @@ module Dolos
|
|
105
105
|
end
|
106
106
|
end
|
107
107
|
|
108
|
+
# Combine the result of the parser with another parser
|
109
|
+
# Discards the result of the first parser
|
110
|
+
# p = string("hello") >> string("world")
|
108
111
|
def product_r(other_parser)
|
109
112
|
combine do |_, capture1|
|
110
113
|
other_parser.map do |value2|
|
@@ -118,6 +121,10 @@ module Dolos
|
|
118
121
|
alias_method :<<, :product_l
|
119
122
|
alias_method :>>, :product_r
|
120
123
|
|
124
|
+
# Combine the result of the parser with another parser
|
125
|
+
# If the first parser fails, it will try the second parser
|
126
|
+
# p = string("hello") | string("world") | string("!")
|
127
|
+
# p.run("hello") => Success.new("hello", 5)
|
121
128
|
def choice(other_parser)
|
122
129
|
Parser.new do |state|
|
123
130
|
result = run_with_state(state)
|
@@ -130,6 +137,9 @@ module Dolos
|
|
130
137
|
end
|
131
138
|
alias_method :|, :choice
|
132
139
|
|
140
|
+
|
141
|
+
# Repeat the parser n times
|
142
|
+
# Separator is optional, its another parser that will be run between each repetition
|
133
143
|
# rep0 # 0 or more
|
134
144
|
# rep # 1 or more
|
135
145
|
# rep(n = 2) # exactly 2
|
@@ -140,10 +150,9 @@ module Dolos
|
|
140
150
|
values = []
|
141
151
|
captures = []
|
142
152
|
count = 0
|
143
|
-
state.input.mark_offset
|
144
153
|
|
145
154
|
loop do
|
146
|
-
result = run_with_state(state.dup
|
155
|
+
result = run_with_state(state) # Removing .dup for performance. Be cautious of side effects.
|
147
156
|
|
148
157
|
if result.failure? || count >= n_max
|
149
158
|
break
|
@@ -155,7 +164,7 @@ module Dolos
|
|
155
164
|
count += 1
|
156
165
|
|
157
166
|
if separator && count < n_max
|
158
|
-
sep_result = separator.run_with_state(state.dup
|
167
|
+
sep_result = separator.run_with_state(state) # Removing .dup for performance. Be cautious of side effects.
|
159
168
|
break if sep_result.failure?
|
160
169
|
|
161
170
|
state.input.advance(sep_result.length)
|
@@ -163,10 +172,9 @@ module Dolos
|
|
163
172
|
end
|
164
173
|
|
165
174
|
if count < n_min
|
166
|
-
error_pos = state.input.offset
|
167
175
|
Failure.new(
|
168
|
-
"Expected parser to match at least #{n_min} times but matched only #{count} times",
|
169
|
-
|
176
|
+
-> { "Expected parser to match at least #{n_min} times but matched only #{count} times" },
|
177
|
+
state.input.offset,
|
170
178
|
state
|
171
179
|
)
|
172
180
|
else
|
@@ -175,11 +183,16 @@ module Dolos
|
|
175
183
|
end
|
176
184
|
end
|
177
185
|
|
186
|
+
# Repeat the parser zero or more times
|
187
|
+
# c(" ").rep0.run(" ") => Success.new([" ", " ", " "], 3)
|
178
188
|
def zero_or_more
|
179
189
|
repeat(n_min: 0, n_max: Float::INFINITY)
|
180
190
|
end
|
181
191
|
alias_method :rep0, :zero_or_more
|
182
192
|
|
193
|
+
# Repeat the parser one or more times
|
194
|
+
# Same as rep0, but must match at least once
|
195
|
+
# c(" ").rep.run("A") => Failure.new("...")
|
183
196
|
def one_or_more(exactly = nil)
|
184
197
|
if exactly.nil?
|
185
198
|
repeat(n_min: 1, n_max: Float::INFINITY)
|
@@ -189,6 +202,8 @@ module Dolos
|
|
189
202
|
end
|
190
203
|
alias_method :rep, :one_or_more
|
191
204
|
|
205
|
+
# Make parser optional
|
206
|
+
# c(" ").opt.run("A") => Success.new([], 0)
|
192
207
|
def optional
|
193
208
|
Parser.new do |state|
|
194
209
|
result = run_with_state(state.dup)
|
@@ -201,7 +216,6 @@ module Dolos
|
|
201
216
|
end
|
202
217
|
alias_method :opt, :optional
|
203
218
|
|
204
|
-
# Unstable API
|
205
219
|
# Used to declare lazy parser to avoid infinite loops in recursive parsers
|
206
220
|
def lazy
|
207
221
|
parser_memo = nil
|
@@ -212,11 +226,5 @@ module Dolos
|
|
212
226
|
end
|
213
227
|
end
|
214
228
|
|
215
|
-
private
|
216
|
-
|
217
|
-
def combine_and_discard_empty(*arrays)
|
218
|
-
arrays.compact.reject { |arr| arr.is_a?(Array) && arr.empty? }
|
219
|
-
end
|
220
|
-
|
221
229
|
end
|
222
230
|
end
|