dolos 0.1.2 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +41 -30
- data/benchmarks/json/json.rb +60 -0
- data/benchmarks/json/nested_json.json +2541 -0
- data/{lib/example.rb → examples/letter.rb} +14 -14
- data/lib/dolos/parser_state.rb +3 -1
- data/lib/dolos/parsers.rb +40 -7
- data/lib/dolos/result.rb +49 -15
- data/lib/dolos/string_io_wrapper.rb +0 -1
- data/lib/dolos/version.rb +1 -1
- data/lib/dolos.rb +75 -15
- data/lib/dolos_common_parsers/common_parsers.rb +5 -2
- data/sig/dolos/parser_state.rbs +1 -0
- data/sig/dolos/result.rbs +1 -0
- metadata +5 -3
@@ -1,6 +1,6 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
require_relative 'dolos'
|
3
|
-
require_relative 'dolos_common_parsers/common_parsers'
|
3
|
+
require_relative 'dolos_common_parsers/arsers/common_parsers'
|
4
4
|
|
5
5
|
include Dolos
|
6
6
|
|
@@ -27,12 +27,12 @@ alpha_with_lt = char_in("ąčęėįšųūžĄČĘĖĮŠŲŪŽ") | alpha
|
|
27
27
|
|
28
28
|
# Capture all letters in a row and join them,
|
29
29
|
# because they are captured as elements of array by each alpha_with_lt parser.
|
30
|
-
first_name = alpha_with_lt.rep.
|
31
|
-
last_name = alpha_with_lt.rep.
|
30
|
+
first_name = alpha_with_lt.rep.map(&:join).capture!
|
31
|
+
last_name = alpha_with_lt.rep.map(&:join).capture!
|
32
32
|
|
33
33
|
# Combine first line parsers
|
34
34
|
# Consume zero or more whitespace, after that honorific must follow and so on
|
35
|
-
name_line = ws.rep0
|
35
|
+
name_line = ws.rep0 & honorific & first_name & ws & last_name & eol
|
36
36
|
|
37
37
|
# Next line is company info
|
38
38
|
# We could choose to accept UAB and AB or just AB and etc.
|
@@ -42,9 +42,9 @@ quote_open = c("„")
|
|
42
42
|
quote_close = c("“")
|
43
43
|
|
44
44
|
# Consume LT alphabet with whitespace
|
45
|
-
company_name = (alpha_with_lt | ws).rep.
|
46
|
-
company_info = company_type
|
47
|
-
second_line = ws.rep0
|
45
|
+
company_name = (alpha_with_lt | ws).rep.map(&:join).capture!
|
46
|
+
company_info = company_type & ws.rep0 & quote_open & company_name & quote_close
|
47
|
+
second_line = ws.rep0 & company_info & eol
|
48
48
|
|
49
49
|
# Address line
|
50
50
|
# 'char_while' will consume characters while passed predicate is true
|
@@ -52,18 +52,18 @@ second_line = ws.rep0 >> company_info >> eol
|
|
52
52
|
# After that result is captured and mapped to hash
|
53
53
|
# Mapping to hash so at the end its easy to tell tuples apart
|
54
54
|
# Also while mapping, doing some cleaning with '.strip'
|
55
|
-
street_name = char_while(->(char) { !char.match(/\d/) }).
|
56
|
-
building = digits.
|
57
|
-
address_line = ws.rep0
|
55
|
+
street_name = char_while(->(char) { !char.match(/\d/) }).map { |s| { street: s.strip } }.capture!
|
56
|
+
building = digits.map { |s| { building: s.strip } }.capture!
|
57
|
+
address_line = ws.rep0 & street_name & building & eol
|
58
58
|
|
59
59
|
# City line
|
60
60
|
# All digits can be matched here or 'digits.rep(5)' could be used. Also joining with map.
|
61
|
-
postcode = digits.
|
62
|
-
city = alpha_with_lt.rep.
|
63
|
-
city_line = ws.rep0
|
61
|
+
postcode = digits.map { |s| { postcode: s.strip } }.capture!
|
62
|
+
city = alpha_with_lt.rep.map(&:join).map { |s| { city: s.strip } }.capture!
|
63
|
+
city_line = ws.rep0 & postcode & ws & city & eol
|
64
64
|
|
65
65
|
# Full letter parser which is combined from all previous parsers. All previous parsers can be ran separately.
|
66
|
-
letter_parser = name_line
|
66
|
+
letter_parser = name_line & second_line & address_line & city_line
|
67
67
|
result = letter_parser.run(letter)
|
68
68
|
|
69
69
|
pp result.captures
|
data/lib/dolos/parser_state.rb
CHANGED
data/lib/dolos/parsers.rb
CHANGED
@@ -10,14 +10,17 @@ module Dolos
|
|
10
10
|
Success.new(utf8_str, str.bytesize)
|
11
11
|
else
|
12
12
|
advanced = state.input.offset
|
13
|
+
got_error = state.input.io.string.byteslice(state.input.backup, advanced)
|
13
14
|
state.input.rollback
|
14
15
|
Failure.new(
|
15
|
-
"Expected #{str.inspect} but got #{
|
16
|
-
advanced
|
16
|
+
"Expected #{str.inspect} but got #{got_error.inspect}",
|
17
|
+
advanced,
|
18
|
+
state
|
17
19
|
)
|
18
20
|
end
|
19
21
|
end
|
20
22
|
end
|
23
|
+
|
21
24
|
alias_method :c, :string
|
22
25
|
|
23
26
|
def regex(pattern)
|
@@ -30,13 +33,13 @@ module Dolos
|
|
30
33
|
state.input.rollback
|
31
34
|
Failure.new(
|
32
35
|
"Expected pattern #{pattern.inspect} but got #{state.input.io.string.inspect}",
|
33
|
-
advanced
|
36
|
+
advanced,
|
37
|
+
state
|
34
38
|
)
|
35
39
|
end
|
36
40
|
end
|
37
41
|
end
|
38
42
|
|
39
|
-
|
40
43
|
def any_char
|
41
44
|
Parser.new do |state|
|
42
45
|
state.input.mark_offset
|
@@ -48,7 +51,11 @@ module Dolos
|
|
48
51
|
else
|
49
52
|
advanced = state.input.offset
|
50
53
|
state.input.rollback
|
51
|
-
Failure.new(
|
54
|
+
Failure.new(
|
55
|
+
'Expected any character but got end of input',
|
56
|
+
advanced,
|
57
|
+
state
|
58
|
+
)
|
52
59
|
end
|
53
60
|
end
|
54
61
|
end
|
@@ -71,7 +78,8 @@ module Dolos
|
|
71
78
|
state.input.rollback
|
72
79
|
Failure.new(
|
73
80
|
"Expected one of #{characters_array.inspect} but got #{char.inspect}",
|
74
|
-
advanced
|
81
|
+
advanced,
|
82
|
+
state
|
75
83
|
)
|
76
84
|
end
|
77
85
|
end
|
@@ -92,12 +100,37 @@ module Dolos
|
|
92
100
|
|
93
101
|
if buffer.empty?
|
94
102
|
advanced = state.input.offset
|
95
|
-
Failure.new(
|
103
|
+
Failure.new(
|
104
|
+
"Predicate never returned true",
|
105
|
+
advanced,
|
106
|
+
state
|
107
|
+
)
|
96
108
|
else
|
97
109
|
Success.new(buffer, 0)
|
98
110
|
end
|
99
111
|
end
|
100
112
|
end
|
101
113
|
|
114
|
+
# Unstable API
|
115
|
+
def recursive(&block)
|
116
|
+
recursive_parser = nil
|
117
|
+
|
118
|
+
placeholder = Parser.new do |state|
|
119
|
+
raise "Recursive parser accessed before it was initialized!" if recursive_parser.nil?
|
120
|
+
|
121
|
+
recursive_parser.call.run_with_state(state).tap do |result|
|
122
|
+
if result.failure?
|
123
|
+
error_msg = "Error in recursive structure around position #{state.input.offset}: #{result.message}"
|
124
|
+
Failure.new(error_msg, state.input.offset, state)
|
125
|
+
end
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
recursive_parser = -> { block.call(placeholder) }
|
130
|
+
placeholder
|
131
|
+
end
|
132
|
+
|
133
|
+
|
134
|
+
|
102
135
|
end
|
103
136
|
end
|
data/lib/dolos/result.rb
CHANGED
@@ -10,20 +10,21 @@ module Dolos
|
|
10
10
|
def initialize(value, length, captures = [])
|
11
11
|
@value = value
|
12
12
|
@length = length
|
13
|
-
# @captures = captures || value
|
14
13
|
@captures = captures
|
15
14
|
end
|
16
15
|
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
16
|
+
# can be some named capture, :street, {:street => capture }
|
17
|
+
# or an array, [], [capture]
|
18
|
+
def capture!(wrap_in = nil)
|
19
|
+
mapped_value = self.value # use the transformed value here
|
20
|
+
|
21
|
+
if wrap_in.is_a?(Array)
|
22
|
+
save_capture([mapped_value])
|
23
|
+
elsif wrap_in.is_a?(Symbol)
|
24
|
+
save_capture({ wrap_in => mapped_value })
|
22
25
|
else
|
23
|
-
|
26
|
+
save_capture(mapped_value)
|
24
27
|
end
|
25
|
-
|
26
|
-
Success.new(value, length, captures)
|
27
28
|
end
|
28
29
|
|
29
30
|
def inspect
|
@@ -37,21 +38,54 @@ module Dolos
|
|
37
38
|
def failure?
|
38
39
|
false
|
39
40
|
end
|
41
|
+
|
42
|
+
private
|
43
|
+
|
44
|
+
def save_capture(val)
|
45
|
+
if val.is_a?(Array)
|
46
|
+
val.each do |v|
|
47
|
+
captures << v
|
48
|
+
end
|
49
|
+
else
|
50
|
+
captures << val
|
51
|
+
end
|
52
|
+
|
53
|
+
Success.new(val, length, captures)
|
54
|
+
end
|
40
55
|
end
|
41
56
|
|
42
57
|
class Failure < Result
|
43
|
-
attr_reader :message, :
|
58
|
+
attr_reader :message, :error_position, :state
|
44
59
|
|
45
|
-
def initialize(message,
|
60
|
+
def initialize(message, error_position, state)
|
46
61
|
@message = message
|
47
|
-
@
|
62
|
+
@error_position = error_position
|
63
|
+
@state = state
|
48
64
|
end
|
49
65
|
|
50
66
|
def inspect
|
67
|
+
pretty_print
|
68
|
+
end
|
69
|
+
|
70
|
+
def pretty_print
|
71
|
+
input_string = state.input.io.string
|
72
|
+
|
73
|
+
pointer = "^" # This will point to the error position
|
74
|
+
|
75
|
+
context_range = 10 # Chars before and after the error to display
|
76
|
+
|
77
|
+
start_index = [error_position - context_range, 0].max
|
78
|
+
end_index = [error_position + context_range, input_string.length].max
|
79
|
+
|
80
|
+
substring = input_string[start_index..end_index]
|
81
|
+
|
82
|
+
padding = error_position - start_index
|
83
|
+
|
51
84
|
[
|
52
|
-
"Failure",
|
53
|
-
|
54
|
-
"
|
85
|
+
"Failure: #{message}",
|
86
|
+
substring,
|
87
|
+
"#{' ' * padding}#{pointer}",
|
88
|
+
"Error Position: #{error_position}, Last Success Position: #{state.last_success_position}"
|
55
89
|
].join("\n")
|
56
90
|
end
|
57
91
|
|
data/lib/dolos/version.rb
CHANGED
data/lib/dolos.rb
CHANGED
@@ -22,21 +22,26 @@ module Dolos
|
|
22
22
|
end
|
23
23
|
|
24
24
|
def run_with_state(state)
|
25
|
-
parser_proc.call(state)
|
25
|
+
result = parser_proc.call(state)
|
26
|
+
if result.success?
|
27
|
+
state.last_success_position = state.input.offset
|
28
|
+
end
|
29
|
+
result
|
26
30
|
end
|
27
31
|
|
28
|
-
def capture!
|
32
|
+
def capture!(wrap_in = nil)
|
29
33
|
Parser.new do |state|
|
30
34
|
result = run_with_state(state)
|
31
35
|
if result.success?
|
32
|
-
result.capture!
|
36
|
+
result.capture!(wrap_in)
|
33
37
|
else
|
34
38
|
result
|
35
39
|
end
|
36
40
|
end
|
37
41
|
end
|
38
42
|
|
39
|
-
|
43
|
+
# Will call block on captures
|
44
|
+
def map_captures(&block)
|
40
45
|
Parser.new do |state|
|
41
46
|
result = run_with_state(state)
|
42
47
|
if result.success?
|
@@ -47,7 +52,8 @@ module Dolos
|
|
47
52
|
end
|
48
53
|
end
|
49
54
|
|
50
|
-
|
55
|
+
# Will call block on tuple of value
|
56
|
+
def map(&block)
|
51
57
|
Parser.new do |state|
|
52
58
|
result = run_with_state(state)
|
53
59
|
if result.success?
|
@@ -58,7 +64,7 @@ module Dolos
|
|
58
64
|
end
|
59
65
|
end
|
60
66
|
|
61
|
-
def
|
67
|
+
def combine(&block)
|
62
68
|
Parser.new do |state|
|
63
69
|
result = run_with_state(state)
|
64
70
|
if result.success?
|
@@ -73,22 +79,44 @@ module Dolos
|
|
73
79
|
end
|
74
80
|
|
75
81
|
def flatten
|
76
|
-
|
82
|
+
map_captures do |captures|
|
77
83
|
captures.flatten
|
78
84
|
end
|
79
85
|
end
|
80
86
|
|
81
87
|
def product(other_parser)
|
82
|
-
|
83
|
-
other_parser.
|
88
|
+
combine do |value1, capture1|
|
89
|
+
other_parser.map do |value2|
|
84
90
|
[value1, value2]
|
85
|
-
end.
|
91
|
+
end.map_captures do |capture2|
|
92
|
+
[capture1, capture2].flatten
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
96
|
+
alias_method :&, :product
|
97
|
+
|
98
|
+
def product_l(other_parser)
|
99
|
+
combine do |value1, capture1|
|
100
|
+
other_parser.map do |_|
|
101
|
+
value1
|
102
|
+
end.map_captures do |capture2|
|
86
103
|
[capture1, capture2].flatten
|
87
104
|
end
|
88
105
|
end
|
89
106
|
end
|
90
107
|
|
91
|
-
|
108
|
+
def product_r(other_parser)
|
109
|
+
combine do |_, capture1|
|
110
|
+
other_parser.map do |value2|
|
111
|
+
value2
|
112
|
+
end.map_captures do |capture2|
|
113
|
+
[capture1, capture2].flatten
|
114
|
+
end
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
alias_method :<<, :product_l
|
119
|
+
alias_method :>>, :product_r
|
92
120
|
|
93
121
|
def choice(other_parser)
|
94
122
|
Parser.new do |state|
|
@@ -107,25 +135,40 @@ module Dolos
|
|
107
135
|
# rep(n = 2) # exactly 2
|
108
136
|
# repeat(n_min: 2, n_max: 4) # 2 to 4
|
109
137
|
# repeat(n_min: 2) # 2 or more
|
110
|
-
def repeat(n_min:, n_max: Float::INFINITY)
|
138
|
+
def repeat(n_min:, n_max: Float::INFINITY, separator: nil)
|
111
139
|
Parser.new do |state|
|
112
140
|
values = []
|
113
141
|
captures = []
|
114
142
|
count = 0
|
143
|
+
state.input.mark_offset
|
115
144
|
|
116
|
-
|
145
|
+
loop do
|
117
146
|
result = run_with_state(state.dup)
|
118
147
|
|
119
|
-
|
148
|
+
if result.failure? || count >= n_max
|
149
|
+
break
|
150
|
+
end
|
120
151
|
|
121
152
|
values << result.value
|
122
153
|
captures.concat(result.captures)
|
123
154
|
state.input.advance(result.length)
|
124
155
|
count += 1
|
156
|
+
|
157
|
+
if separator && count < n_max
|
158
|
+
sep_result = separator.run_with_state(state.dup)
|
159
|
+
break if sep_result.failure?
|
160
|
+
|
161
|
+
state.input.advance(sep_result.length)
|
162
|
+
end
|
125
163
|
end
|
126
164
|
|
127
165
|
if count < n_min
|
128
|
-
|
166
|
+
error_pos = state.input.offset
|
167
|
+
Failure.new(
|
168
|
+
"Expected parser to match at least #{n_min} times but matched only #{count} times",
|
169
|
+
error_pos,
|
170
|
+
state
|
171
|
+
)
|
129
172
|
else
|
130
173
|
Success.new(values, 0, captures)
|
131
174
|
end
|
@@ -158,5 +201,22 @@ module Dolos
|
|
158
201
|
end
|
159
202
|
alias_method :opt, :optional
|
160
203
|
|
204
|
+
# Unstable API
|
205
|
+
# Used to declare lazy parser to avoid infinite loops in recursive parsers
|
206
|
+
def lazy
|
207
|
+
parser_memo = nil
|
208
|
+
|
209
|
+
Parser.new do |state|
|
210
|
+
parser_memo ||= self
|
211
|
+
parser_memo.run_with_state(state)
|
212
|
+
end
|
213
|
+
end
|
214
|
+
|
215
|
+
private
|
216
|
+
|
217
|
+
def combine_and_discard_empty(*arrays)
|
218
|
+
arrays.compact.reject { |arr| arr.is_a?(Array) && arr.empty? }
|
219
|
+
end
|
220
|
+
|
161
221
|
end
|
162
222
|
end
|
@@ -10,9 +10,12 @@ module Dolos
|
|
10
10
|
regex(/\n|\r\n|\r/)
|
11
11
|
end
|
12
12
|
|
13
|
-
# Capture as String and convert to integer
|
14
13
|
def digit
|
15
|
-
regex(/\d/)
|
14
|
+
regex(/\d/)
|
15
|
+
end
|
16
|
+
|
17
|
+
def int
|
18
|
+
digit.map(&:to_i)
|
16
19
|
end
|
17
20
|
|
18
21
|
# Capture as string
|
data/sig/dolos/parser_state.rbs
CHANGED
data/sig/dolos/result.rbs
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dolos
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- benetis
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-08-
|
11
|
+
date: 2023-08-19 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Parser combinators library for Ruby. In active development, not stable
|
14
14
|
yet.
|
@@ -23,7 +23,10 @@ files:
|
|
23
23
|
- LICENSE.txt
|
24
24
|
- README.md
|
25
25
|
- Rakefile
|
26
|
+
- benchmarks/json/json.rb
|
27
|
+
- benchmarks/json/nested_json.json
|
26
28
|
- docs/dolos_stable_diff.png
|
29
|
+
- examples/letter.rb
|
27
30
|
- lib/dolos.rb
|
28
31
|
- lib/dolos/parser_state.rb
|
29
32
|
- lib/dolos/parsers.rb
|
@@ -31,7 +34,6 @@ files:
|
|
31
34
|
- lib/dolos/string_io_wrapper.rb
|
32
35
|
- lib/dolos/version.rb
|
33
36
|
- lib/dolos_common_parsers/common_parsers.rb
|
34
|
-
- lib/example.rb
|
35
37
|
- sig/dolos.rbs
|
36
38
|
- sig/dolos/common_parsers.rbs
|
37
39
|
- sig/dolos/parser.rbs
|