dolos 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +14 -5
- data/benchmarks/json/json.rb +31 -8
- data/benchmarks/json/nested_json_1m.json +20557 -0
- data/benchmarks/letter.rb +78 -0
- data/examples/letter.rb +2 -2
- data/lib/dolos/parsers.rb +13 -15
- data/lib/dolos/result.rb +12 -3
- data/lib/dolos/string_io_wrapper.rb +3 -8
- data/lib/dolos/version.rb +1 -1
- data/lib/dolos.rb +13 -39
- data/lib/dolos_common_parsers/common_parsers.rb +4 -0
- data/sig/dolos/common_parsers.rbs +11 -0
- data/sig/dolos/parser.rbs +6 -2
- data/sig/dolos/parser_state.rbs +1 -1
- data/sig/dolos/parsers.rbs +4 -0
- data/sig/dolos/result.rbs +7 -0
- metadata +5 -3
- /data/benchmarks/json/{nested_json.json → nested_json_166.json} +0 -0
@@ -0,0 +1,78 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
require 'bundler/setup'
|
3
|
+
require 'dolos'
|
4
|
+
require 'dolos_common_parsers/common_parsers'
|
5
|
+
require 'benchmark/ips'
|
6
|
+
|
7
|
+
include Dolos
|
8
|
+
|
9
|
+
# Include common parsers
|
10
|
+
# In future this can be more structured, moved them to separate module to prevent breaking changes
|
11
|
+
include Dolos::CommonParsers
|
12
|
+
|
13
|
+
# Library usage example
|
14
|
+
# Parse out a name and address from a letter
|
15
|
+
# For higher difficulty, we will not split this into multiple lines, but instead parse it all at once
|
16
|
+
letter = <<-LETTER
|
17
|
+
Mr. Vardeniui Pavardeniui
|
18
|
+
AB „Lietuvos Paštas“
|
19
|
+
Totorių g. 8
|
20
|
+
01121 Vilnius
|
21
|
+
LETTER
|
22
|
+
|
23
|
+
# Combine with 'or'
|
24
|
+
honorific = c("Mr. ") | c("Mrs. ") | c("Ms. ")
|
25
|
+
|
26
|
+
# Can be parsed any_char which will include needed letters
|
27
|
+
# Or combine LT letters with latin alphabet
|
28
|
+
alpha_with_lt = char_in("ąčęėįšųūžĄČĘĖĮŠŲŪŽ") | alpha
|
29
|
+
|
30
|
+
# Capture all letters in a row and join them,
|
31
|
+
# because they are captured as elements of array by each alpha_with_lt parser.
|
32
|
+
first_name = alpha_with_lt.rep.map(&:join).capture!
|
33
|
+
last_name = alpha_with_lt.rep.map(&:join).capture!
|
34
|
+
|
35
|
+
# Combine first line parsers
|
36
|
+
# Consume zero or more whitespace, after that honorific must follow and so on
|
37
|
+
name_line = ws_rep0 & honorific & first_name & ws & last_name & eol
|
38
|
+
|
39
|
+
# Next line is company info
|
40
|
+
# We could choose to accept UAB and AB or just AB and etc.
|
41
|
+
# 'c("AB")' is for case-sensitive string. 'string' can also be used
|
42
|
+
company_type = c("AB")
|
43
|
+
quote_open = c("„")
|
44
|
+
quote_close = c("“")
|
45
|
+
|
46
|
+
# Consume LT alphabet with whitespace
|
47
|
+
company_name = (alpha_with_lt | ws).rep.map(&:join).capture!
|
48
|
+
company_info = company_type & ws_rep0 & quote_open & company_name & quote_close
|
49
|
+
second_line = ws_rep0 & company_info & eol
|
50
|
+
|
51
|
+
# Address line
|
52
|
+
# 'char_while' will consume characters while passed predicate is true
|
53
|
+
# This could be an alternative to previous 'alpha_with_lt' approach
|
54
|
+
# After that result is captured and mapped to hash
|
55
|
+
# Mapping to hash so at the end its easy to tell tuples apart
|
56
|
+
# Also while mapping, doing some cleaning with '.strip'
|
57
|
+
street_name = char_while(->(char) { !char.match(/\d/) }).map { |s| { street: s.strip } }.capture!
|
58
|
+
building = digits.map { |s| { building: s.strip } }.capture!
|
59
|
+
address_line = ws_rep0 & street_name & building & eol
|
60
|
+
|
61
|
+
# City line
|
62
|
+
# All digits can be matched here or 'digits.rep(5)' could be used. Also joining with map.
|
63
|
+
postcode = digits.map { |s| { postcode: s.strip } }.capture!
|
64
|
+
city = alpha_with_lt.rep.map(&:join).map { |s| { city: s.strip } }.capture!
|
65
|
+
city_line = ws_rep0 & postcode & ws & city & eol
|
66
|
+
|
67
|
+
# Full letter parser which is combined from all previous parsers. All previous parsers can be ran separately.
|
68
|
+
letter_parser = name_line & second_line & address_line & city_line
|
69
|
+
result = letter_parser.run(letter)
|
70
|
+
|
71
|
+
puts result.success?
|
72
|
+
|
73
|
+
Benchmark.ips do |x|
|
74
|
+
x.report('letter benchmark') do
|
75
|
+
letter_parser.run(letter)
|
76
|
+
end
|
77
|
+
x.compare!
|
78
|
+
end
|
data/examples/letter.rb
CHANGED
data/lib/dolos/parsers.rb
CHANGED
@@ -3,9 +3,10 @@
|
|
3
3
|
module Dolos
|
4
4
|
module Parsers
|
5
5
|
def string(str)
|
6
|
+
utf8_str = str.encode('UTF-8')
|
7
|
+
|
6
8
|
Parser.new do |state|
|
7
9
|
state.input.mark_offset
|
8
|
-
utf8_str = str.encode('UTF-8')
|
9
10
|
if state.input.matches?(utf8_str)
|
10
11
|
Success.new(utf8_str, str.bytesize)
|
11
12
|
else
|
@@ -13,7 +14,7 @@ module Dolos
|
|
13
14
|
got_error = state.input.io.string.byteslice(state.input.backup, advanced)
|
14
15
|
state.input.rollback
|
15
16
|
Failure.new(
|
16
|
-
"Expected #{str.inspect} but got #{got_error.inspect}",
|
17
|
+
-> { "Expected #{str.inspect} but got #{got_error.inspect}" },
|
17
18
|
advanced,
|
18
19
|
state
|
19
20
|
)
|
@@ -32,7 +33,7 @@ module Dolos
|
|
32
33
|
advanced = state.input.offset
|
33
34
|
state.input.rollback
|
34
35
|
Failure.new(
|
35
|
-
"Expected pattern #{pattern.inspect} but got #{state.input.io.string.inspect}",
|
36
|
+
-> { "Expected pattern #{pattern.inspect} but got #{state.input.io.string.inspect}" },
|
36
37
|
advanced,
|
37
38
|
state
|
38
39
|
)
|
@@ -52,7 +53,7 @@ module Dolos
|
|
52
53
|
advanced = state.input.offset
|
53
54
|
state.input.rollback
|
54
55
|
Failure.new(
|
55
|
-
'Expected any character but got end of input',
|
56
|
+
-> { 'Expected any character but got end of input' },
|
56
57
|
advanced,
|
57
58
|
state
|
58
59
|
)
|
@@ -64,20 +65,20 @@ module Dolos
|
|
64
65
|
# Example:
|
65
66
|
# char_in('abc').run('b') # => Success.new('b', 1)
|
66
67
|
def char_in(characters_string)
|
67
|
-
|
68
|
+
characters_set = characters_string.chars
|
68
69
|
|
69
70
|
Parser.new do |state|
|
70
71
|
state.input.mark_offset
|
71
72
|
|
72
73
|
char, bytesize = state.input.peek(1)
|
73
74
|
|
74
|
-
if char &&
|
75
|
+
if char && characters_set.include?(char)
|
75
76
|
Success.new(char, bytesize)
|
76
77
|
else
|
77
78
|
advanced = state.input.offset
|
78
79
|
state.input.rollback
|
79
80
|
Failure.new(
|
80
|
-
"Expected one of #{
|
81
|
+
-> { "Expected one of #{characters_set.to_a.inspect} but got #{char.inspect}" },
|
81
82
|
advanced,
|
82
83
|
state
|
83
84
|
)
|
@@ -90,18 +91,18 @@ module Dolos
|
|
90
91
|
state.input.mark_offset
|
91
92
|
|
92
93
|
buffer = String.new
|
93
|
-
|
94
|
-
char, bytesize = state.input.peek(1)
|
95
|
-
break if char.nil? || !predicate.call(char)
|
94
|
+
char, bytesize = state.input.peek(1)
|
96
95
|
|
96
|
+
while char && predicate.call(char)
|
97
97
|
buffer << char
|
98
98
|
state.input.advance(bytesize)
|
99
|
+
char, bytesize = state.input.peek(1)
|
99
100
|
end
|
100
101
|
|
101
102
|
if buffer.empty?
|
102
103
|
advanced = state.input.offset
|
103
104
|
Failure.new(
|
104
|
-
"Predicate never returned true",
|
105
|
+
-> { "Predicate never returned true" },
|
105
106
|
advanced,
|
106
107
|
state
|
107
108
|
)
|
@@ -111,7 +112,6 @@ module Dolos
|
|
111
112
|
end
|
112
113
|
end
|
113
114
|
|
114
|
-
# Unstable API
|
115
115
|
def recursive(&block)
|
116
116
|
recursive_parser = nil
|
117
117
|
|
@@ -120,7 +120,7 @@ module Dolos
|
|
120
120
|
|
121
121
|
recursive_parser.call.run_with_state(state).tap do |result|
|
122
122
|
if result.failure?
|
123
|
-
error_msg = "Error in recursive structure around position #{state.input.offset}: #{result.message}"
|
123
|
+
error_msg = -> { "Error in recursive structure around position #{state.input.offset}: #{result.message}" }
|
124
124
|
Failure.new(error_msg, state.input.offset, state)
|
125
125
|
end
|
126
126
|
end
|
@@ -130,7 +130,5 @@ module Dolos
|
|
130
130
|
placeholder
|
131
131
|
end
|
132
132
|
|
133
|
-
|
134
|
-
|
135
133
|
end
|
136
134
|
end
|
data/lib/dolos/result.rb
CHANGED
@@ -55,12 +55,21 @@ module Dolos
|
|
55
55
|
end
|
56
56
|
|
57
57
|
class Failure < Result
|
58
|
-
attr_reader
|
58
|
+
attr_reader :error_position, :state
|
59
59
|
|
60
|
-
def initialize(
|
61
|
-
@
|
60
|
+
def initialize(message_proc, error_position, state)
|
61
|
+
@message_proc = message_proc
|
62
62
|
@error_position = error_position
|
63
63
|
@state = state
|
64
|
+
@message_evaluated = false
|
65
|
+
end
|
66
|
+
|
67
|
+
def message
|
68
|
+
unless @message_evaluated
|
69
|
+
@message_value = @message_proc.call
|
70
|
+
@message_evaluated = true
|
71
|
+
end
|
72
|
+
@message_value
|
64
73
|
end
|
65
74
|
|
66
75
|
def inspect
|
@@ -22,12 +22,7 @@ module Dolos
|
|
22
22
|
|
23
23
|
def matches?(utf8_str)
|
24
24
|
read = io.read(utf8_str.bytesize)
|
25
|
-
|
26
|
-
if read.nil?
|
27
|
-
false
|
28
|
-
else
|
29
|
-
read.force_encoding('UTF-8') == utf8_str
|
30
|
-
end
|
25
|
+
!read.nil? && read.force_encoding('UTF-8') == utf8_str
|
31
26
|
end
|
32
27
|
|
33
28
|
def advance(bytesize)
|
@@ -61,8 +56,8 @@ module Dolos
|
|
61
56
|
remaining_data = io.read
|
62
57
|
io.seek(current_position)
|
63
58
|
|
64
|
-
if
|
65
|
-
matched_string =
|
59
|
+
if remaining_data =~ /\A#{pattern}/
|
60
|
+
matched_string = $&
|
66
61
|
io.seek(current_position + matched_string.bytesize)
|
67
62
|
return matched_string
|
68
63
|
end
|
data/lib/dolos/version.rb
CHANGED
data/lib/dolos.rb
CHANGED
@@ -10,9 +10,7 @@ module Dolos
|
|
10
10
|
include Parsers
|
11
11
|
|
12
12
|
class Parser
|
13
|
-
|
14
13
|
attr_accessor :parser_proc
|
15
|
-
|
16
14
|
def initialize(&block)
|
17
15
|
@parser_proc = block
|
18
16
|
end
|
@@ -22,33 +20,23 @@ module Dolos
|
|
22
20
|
end
|
23
21
|
|
24
22
|
def run_with_state(state)
|
25
|
-
result = parser_proc.call(state)
|
26
|
-
if result.success?
|
27
|
-
state.last_success_position = state.input.offset
|
28
|
-
end
|
23
|
+
result = @parser_proc.call(state)
|
24
|
+
state.last_success_position = state.input.offset if result.success?
|
29
25
|
result
|
30
26
|
end
|
31
27
|
|
32
28
|
def capture!(wrap_in = nil)
|
33
29
|
Parser.new do |state|
|
34
30
|
result = run_with_state(state)
|
35
|
-
|
36
|
-
result.capture!(wrap_in)
|
37
|
-
else
|
38
|
-
result
|
39
|
-
end
|
31
|
+
result.success? ? result.capture!(wrap_in) : result
|
40
32
|
end
|
41
33
|
end
|
42
34
|
|
43
|
-
# Will call
|
35
|
+
# Will call `map` on captures
|
44
36
|
def map_captures(&block)
|
45
37
|
Parser.new do |state|
|
46
38
|
result = run_with_state(state)
|
47
|
-
|
48
|
-
Success.new(result.value, result.length, block.call(result.captures))
|
49
|
-
else
|
50
|
-
result
|
51
|
-
end
|
39
|
+
result.success? ? Success.new(result.value, result.length, block.call(result.captures)) : result
|
52
40
|
end
|
53
41
|
end
|
54
42
|
|
@@ -56,22 +44,18 @@ module Dolos
|
|
56
44
|
def map(&block)
|
57
45
|
Parser.new do |state|
|
58
46
|
result = run_with_state(state)
|
59
|
-
|
60
|
-
Success.new(block.call(result.value), result.length, result.captures)
|
61
|
-
else
|
62
|
-
result
|
63
|
-
end
|
47
|
+
result.success? ? Success.new(block.call(result.value), result.length, result.captures) : result
|
64
48
|
end
|
65
49
|
end
|
66
50
|
|
67
51
|
def combine(&block)
|
68
52
|
Parser.new do |state|
|
69
53
|
result = run_with_state(state)
|
54
|
+
|
70
55
|
if result.success?
|
56
|
+
state.input.advance(result.length)
|
71
57
|
new_parser = block.call(result.value, result.captures)
|
72
|
-
|
73
|
-
new_state.input.advance(result.length)
|
74
|
-
new_parser.run_with_state(new_state)
|
58
|
+
new_parser.run_with_state(state)
|
75
59
|
else
|
76
60
|
result
|
77
61
|
end
|
@@ -140,10 +124,9 @@ module Dolos
|
|
140
124
|
values = []
|
141
125
|
captures = []
|
142
126
|
count = 0
|
143
|
-
state.input.mark_offset
|
144
127
|
|
145
128
|
loop do
|
146
|
-
result = run_with_state(state.dup
|
129
|
+
result = run_with_state(state) # Removing .dup for performance. Be cautious of side effects.
|
147
130
|
|
148
131
|
if result.failure? || count >= n_max
|
149
132
|
break
|
@@ -155,7 +138,7 @@ module Dolos
|
|
155
138
|
count += 1
|
156
139
|
|
157
140
|
if separator && count < n_max
|
158
|
-
sep_result = separator.run_with_state(state.dup
|
141
|
+
sep_result = separator.run_with_state(state) # Removing .dup for performance. Be cautious of side effects.
|
159
142
|
break if sep_result.failure?
|
160
143
|
|
161
144
|
state.input.advance(sep_result.length)
|
@@ -163,10 +146,9 @@ module Dolos
|
|
163
146
|
end
|
164
147
|
|
165
148
|
if count < n_min
|
166
|
-
error_pos = state.input.offset
|
167
149
|
Failure.new(
|
168
|
-
"Expected parser to match at least #{n_min} times but matched only #{count} times",
|
169
|
-
|
150
|
+
-> { "Expected parser to match at least #{n_min} times but matched only #{count} times" },
|
151
|
+
state.input.offset,
|
170
152
|
state
|
171
153
|
)
|
172
154
|
else
|
@@ -174,7 +156,6 @@ module Dolos
|
|
174
156
|
end
|
175
157
|
end
|
176
158
|
end
|
177
|
-
|
178
159
|
def zero_or_more
|
179
160
|
repeat(n_min: 0, n_max: Float::INFINITY)
|
180
161
|
end
|
@@ -201,7 +182,6 @@ module Dolos
|
|
201
182
|
end
|
202
183
|
alias_method :opt, :optional
|
203
184
|
|
204
|
-
# Unstable API
|
205
185
|
# Used to declare lazy parser to avoid infinite loops in recursive parsers
|
206
186
|
def lazy
|
207
187
|
parser_memo = nil
|
@@ -212,11 +192,5 @@ module Dolos
|
|
212
192
|
end
|
213
193
|
end
|
214
194
|
|
215
|
-
private
|
216
|
-
|
217
|
-
def combine_and_discard_empty(*arrays)
|
218
|
-
arrays.compact.reject { |arr| arr.is_a?(Array) && arr.empty? }
|
219
|
-
end
|
220
|
-
|
221
195
|
end
|
222
196
|
end
|
@@ -1,5 +1,16 @@
|
|
1
1
|
module Dolos
|
2
2
|
module CommonParsers
|
3
|
+
def digit: -> Parser[String]
|
4
|
+
def digits: -> Parser[String]
|
5
|
+
|
6
|
+
def int: -> Parser[Integer]
|
7
|
+
|
8
|
+
def eol: -> Parser[String]
|
9
|
+
|
3
10
|
def ws: -> Parser[String]
|
11
|
+
def ws_rep0: -> Parser[String]
|
12
|
+
|
13
|
+
def alpha: -> Parser[String]
|
14
|
+
def alphanum: -> Parser[String]
|
4
15
|
end
|
5
16
|
end
|
data/sig/dolos/parser.rbs
CHANGED
@@ -4,16 +4,20 @@ module Dolos
|
|
4
4
|
def initialize: (^(ParserState) -> Result[A]) -> Parser[A]
|
5
5
|
def capture!: -> Parser[A]
|
6
6
|
def choice: [B](Parser[B])-> Parser[A | B]
|
7
|
+
def combine: [B](^(A, B) -> Parser[B]) -> Parser[B]
|
7
8
|
def flat_map: [B](Parser[A], ^(A) -> Parser[B]) -> Parser[B]
|
8
9
|
def flatten: -> Parser[A]
|
9
10
|
def map: [B](^(A) -> B) -> Parser[B]
|
10
|
-
def
|
11
|
+
def map_captures: [B](^(A) -> B) -> Parser[B]
|
11
12
|
def optional: -> Parser[A?]
|
12
13
|
def product: [B](Parser[A]) -> Parser[B]
|
14
|
+
def product_l: [B](Parser[B]) -> Parser[B]
|
15
|
+
def product_r: [B](Parser[B]) -> Parser[A]
|
13
16
|
def run: (String) -> Result[A]
|
14
17
|
def run_with_state: (ParserState) -> Result[A]
|
15
|
-
def repeat: (Integer, Integer)-> Parser[Array[A]]
|
18
|
+
def repeat: [B](Integer, Integer, Parser[B]?)-> Parser[Array[A]]
|
16
19
|
def zero_or_more: -> Parser[Array[A]]
|
17
20
|
def one_or_more: (Integer?) -> Parser[Array[A]]
|
21
|
+
def lazy: -> Parser[A]
|
18
22
|
end
|
19
23
|
end
|
data/sig/dolos/parser_state.rbs
CHANGED
data/sig/dolos/parsers.rbs
CHANGED
@@ -1,6 +1,10 @@
|
|
1
1
|
module Dolos
|
2
2
|
module Parsers
|
3
3
|
def any_char: -> Parser[String]
|
4
|
+
def char_in: -> Parser[String]
|
5
|
+
def char_while : -> Parser[String]
|
6
|
+
def recursive: [A,B,C]() { (Parser[A]) -> Parser[B] } -> Parser[C]
|
7
|
+
|
4
8
|
def regex: (Regexp) -> Parser[String]
|
5
9
|
def string: (String)-> Parser[String]
|
6
10
|
end
|
data/sig/dolos/result.rbs
CHANGED
@@ -15,6 +15,11 @@ module Dolos
|
|
15
15
|
end
|
16
16
|
|
17
17
|
class Failure < Result[bot]
|
18
|
+
@message_proc: ^-> String
|
19
|
+
@message_evaluated: bool
|
20
|
+
@message_value: String
|
21
|
+
@state: ParserState
|
22
|
+
|
18
23
|
attr_reader committed: bool
|
19
24
|
attr_reader error_position: Integer
|
20
25
|
attr_reader message: String
|
@@ -25,6 +30,8 @@ module Dolos
|
|
25
30
|
|
26
31
|
def map: [B](^(bot) -> B) -> Result[B]
|
27
32
|
|
33
|
+
def pretty_print: -> String
|
34
|
+
|
28
35
|
def success?: -> bool
|
29
36
|
end
|
30
37
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dolos
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- benetis
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-08-
|
11
|
+
date: 2023-08-22 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Parser combinators library for Ruby. In active development, not stable
|
14
14
|
yet.
|
@@ -24,7 +24,9 @@ files:
|
|
24
24
|
- README.md
|
25
25
|
- Rakefile
|
26
26
|
- benchmarks/json/json.rb
|
27
|
-
- benchmarks/json/
|
27
|
+
- benchmarks/json/nested_json_166.json
|
28
|
+
- benchmarks/json/nested_json_1m.json
|
29
|
+
- benchmarks/letter.rb
|
28
30
|
- docs/dolos_stable_diff.png
|
29
31
|
- examples/letter.rb
|
30
32
|
- lib/dolos.rb
|
File without changes
|