dolos 0.1.3 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +38 -31
- data/benchmarks/json/json.rb +60 -0
- data/benchmarks/json/nested_json.json +2541 -0
- data/{lib/example.rb → examples/letter.rb} +14 -14
- data/lib/dolos/parsers.rb +21 -0
- data/lib/dolos/result.rb +24 -9
- data/lib/dolos/version.rb +1 -1
- data/lib/dolos.rb +63 -13
- data/lib/dolos_common_parsers/common_parsers.rb +5 -2
- metadata +5 -3
@@ -1,6 +1,6 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
require_relative 'dolos'
|
3
|
-
require_relative 'dolos_common_parsers/common_parsers'
|
3
|
+
require_relative 'dolos_common_parsers/arsers/common_parsers'
|
4
4
|
|
5
5
|
include Dolos
|
6
6
|
|
@@ -27,12 +27,12 @@ alpha_with_lt = char_in("ąčęėįšųūžĄČĘĖĮŠŲŪŽ") | alpha
|
|
27
27
|
|
28
28
|
# Capture all letters in a row and join them,
|
29
29
|
# because they are captured as elements of array by each alpha_with_lt parser.
|
30
|
-
first_name = alpha_with_lt.rep.
|
31
|
-
last_name = alpha_with_lt.rep.
|
30
|
+
first_name = alpha_with_lt.rep.map(&:join).capture!
|
31
|
+
last_name = alpha_with_lt.rep.map(&:join).capture!
|
32
32
|
|
33
33
|
# Combine first line parsers
|
34
34
|
# Consume zero or more whitespace, after that honorific must follow and so on
|
35
|
-
name_line = ws.rep0
|
35
|
+
name_line = ws.rep0 & honorific & first_name & ws & last_name & eol
|
36
36
|
|
37
37
|
# Next line is company info
|
38
38
|
# We could choose to accept UAB and AB or just AB and etc.
|
@@ -42,9 +42,9 @@ quote_open = c("„")
|
|
42
42
|
quote_close = c("“")
|
43
43
|
|
44
44
|
# Consume LT alphabet with whitespace
|
45
|
-
company_name = (alpha_with_lt | ws).rep.
|
46
|
-
company_info = company_type
|
47
|
-
second_line = ws.rep0
|
45
|
+
company_name = (alpha_with_lt | ws).rep.map(&:join).capture!
|
46
|
+
company_info = company_type & ws.rep0 & quote_open & company_name & quote_close
|
47
|
+
second_line = ws.rep0 & company_info & eol
|
48
48
|
|
49
49
|
# Address line
|
50
50
|
# 'char_while' will consume characters while passed predicate is true
|
@@ -52,18 +52,18 @@ second_line = ws.rep0 >> company_info >> eol
|
|
52
52
|
# After that result is captured and mapped to hash
|
53
53
|
# Mapping to hash so at the end its easy to tell tuples apart
|
54
54
|
# Also while mapping, doing some cleaning with '.strip'
|
55
|
-
street_name = char_while(->(char) { !char.match(/\d/) }).
|
56
|
-
building = digits.
|
57
|
-
address_line = ws.rep0
|
55
|
+
street_name = char_while(->(char) { !char.match(/\d/) }).map { |s| { street: s.strip } }.capture!
|
56
|
+
building = digits.map { |s| { building: s.strip } }.capture!
|
57
|
+
address_line = ws.rep0 & street_name & building & eol
|
58
58
|
|
59
59
|
# City line
|
60
60
|
# All digits can be matched here or 'digits.rep(5)' could be used. Also joining with map.
|
61
|
-
postcode = digits.
|
62
|
-
city = alpha_with_lt.rep.
|
63
|
-
city_line = ws.rep0
|
61
|
+
postcode = digits.map { |s| { postcode: s.strip } }.capture!
|
62
|
+
city = alpha_with_lt.rep.map(&:join).map { |s| { city: s.strip } }.capture!
|
63
|
+
city_line = ws.rep0 & postcode & ws & city & eol
|
64
64
|
|
65
65
|
# Full letter parser which is combined from all previous parsers. All previous parsers can be ran separately.
|
66
|
-
letter_parser = name_line
|
66
|
+
letter_parser = name_line & second_line & address_line & city_line
|
67
67
|
result = letter_parser.run(letter)
|
68
68
|
|
69
69
|
pp result.captures
|
data/lib/dolos/parsers.rb
CHANGED
@@ -111,5 +111,26 @@ module Dolos
|
|
111
111
|
end
|
112
112
|
end
|
113
113
|
|
114
|
+
# Unstable API
|
115
|
+
def recursive(&block)
|
116
|
+
recursive_parser = nil
|
117
|
+
|
118
|
+
placeholder = Parser.new do |state|
|
119
|
+
raise "Recursive parser accessed before it was initialized!" if recursive_parser.nil?
|
120
|
+
|
121
|
+
recursive_parser.call.run_with_state(state).tap do |result|
|
122
|
+
if result.failure?
|
123
|
+
error_msg = "Error in recursive structure around position #{state.input.offset}: #{result.message}"
|
124
|
+
Failure.new(error_msg, state.input.offset, state)
|
125
|
+
end
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
recursive_parser = -> { block.call(placeholder) }
|
130
|
+
placeholder
|
131
|
+
end
|
132
|
+
|
133
|
+
|
134
|
+
|
114
135
|
end
|
115
136
|
end
|
data/lib/dolos/result.rb
CHANGED
@@ -10,20 +10,21 @@ module Dolos
|
|
10
10
|
def initialize(value, length, captures = [])
|
11
11
|
@value = value
|
12
12
|
@length = length
|
13
|
-
# @captures = captures || value
|
14
13
|
@captures = captures
|
15
14
|
end
|
16
15
|
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
16
|
+
# can be some named capture, :street, {:street => capture }
|
17
|
+
# or an array, [], [capture]
|
18
|
+
def capture!(wrap_in = nil)
|
19
|
+
mapped_value = self.value # use the transformed value here
|
20
|
+
|
21
|
+
if wrap_in.is_a?(Array)
|
22
|
+
save_capture([mapped_value])
|
23
|
+
elsif wrap_in.is_a?(Symbol)
|
24
|
+
save_capture({ wrap_in => mapped_value })
|
22
25
|
else
|
23
|
-
|
26
|
+
save_capture(mapped_value)
|
24
27
|
end
|
25
|
-
|
26
|
-
Success.new(value, length, captures)
|
27
28
|
end
|
28
29
|
|
29
30
|
def inspect
|
@@ -37,6 +38,20 @@ module Dolos
|
|
37
38
|
def failure?
|
38
39
|
false
|
39
40
|
end
|
41
|
+
|
42
|
+
private
|
43
|
+
|
44
|
+
def save_capture(val)
|
45
|
+
if val.is_a?(Array)
|
46
|
+
val.each do |v|
|
47
|
+
captures << v
|
48
|
+
end
|
49
|
+
else
|
50
|
+
captures << val
|
51
|
+
end
|
52
|
+
|
53
|
+
Success.new(val, length, captures)
|
54
|
+
end
|
40
55
|
end
|
41
56
|
|
42
57
|
class Failure < Result
|
data/lib/dolos/version.rb
CHANGED
data/lib/dolos.rb
CHANGED
@@ -29,18 +29,19 @@ module Dolos
|
|
29
29
|
result
|
30
30
|
end
|
31
31
|
|
32
|
-
def capture!
|
32
|
+
def capture!(wrap_in = nil)
|
33
33
|
Parser.new do |state|
|
34
34
|
result = run_with_state(state)
|
35
35
|
if result.success?
|
36
|
-
result.capture!
|
36
|
+
result.capture!(wrap_in)
|
37
37
|
else
|
38
38
|
result
|
39
39
|
end
|
40
40
|
end
|
41
41
|
end
|
42
42
|
|
43
|
-
|
43
|
+
# Will call block on captures
|
44
|
+
def map_captures(&block)
|
44
45
|
Parser.new do |state|
|
45
46
|
result = run_with_state(state)
|
46
47
|
if result.success?
|
@@ -51,7 +52,8 @@ module Dolos
|
|
51
52
|
end
|
52
53
|
end
|
53
54
|
|
54
|
-
|
55
|
+
# Will call block on tuple of value
|
56
|
+
def map(&block)
|
55
57
|
Parser.new do |state|
|
56
58
|
result = run_with_state(state)
|
57
59
|
if result.success?
|
@@ -62,7 +64,7 @@ module Dolos
|
|
62
64
|
end
|
63
65
|
end
|
64
66
|
|
65
|
-
def
|
67
|
+
def combine(&block)
|
66
68
|
Parser.new do |state|
|
67
69
|
result = run_with_state(state)
|
68
70
|
if result.success?
|
@@ -77,22 +79,44 @@ module Dolos
|
|
77
79
|
end
|
78
80
|
|
79
81
|
def flatten
|
80
|
-
|
82
|
+
map_captures do |captures|
|
81
83
|
captures.flatten
|
82
84
|
end
|
83
85
|
end
|
84
86
|
|
85
87
|
def product(other_parser)
|
86
|
-
|
87
|
-
other_parser.
|
88
|
+
combine do |value1, capture1|
|
89
|
+
other_parser.map do |value2|
|
88
90
|
[value1, value2]
|
89
|
-
end.
|
91
|
+
end.map_captures do |capture2|
|
92
|
+
[capture1, capture2].flatten
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
96
|
+
alias_method :&, :product
|
97
|
+
|
98
|
+
def product_l(other_parser)
|
99
|
+
combine do |value1, capture1|
|
100
|
+
other_parser.map do |_|
|
101
|
+
value1
|
102
|
+
end.map_captures do |capture2|
|
103
|
+
[capture1, capture2].flatten
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
def product_r(other_parser)
|
109
|
+
combine do |_, capture1|
|
110
|
+
other_parser.map do |value2|
|
111
|
+
value2
|
112
|
+
end.map_captures do |capture2|
|
90
113
|
[capture1, capture2].flatten
|
91
114
|
end
|
92
115
|
end
|
93
116
|
end
|
94
117
|
|
95
|
-
alias_method
|
118
|
+
alias_method :<<, :product_l
|
119
|
+
alias_method :>>, :product_r
|
96
120
|
|
97
121
|
def choice(other_parser)
|
98
122
|
Parser.new do |state|
|
@@ -111,22 +135,31 @@ module Dolos
|
|
111
135
|
# rep(n = 2) # exactly 2
|
112
136
|
# repeat(n_min: 2, n_max: 4) # 2 to 4
|
113
137
|
# repeat(n_min: 2) # 2 or more
|
114
|
-
def repeat(n_min:, n_max: Float::INFINITY)
|
138
|
+
def repeat(n_min:, n_max: Float::INFINITY, separator: nil)
|
115
139
|
Parser.new do |state|
|
116
140
|
values = []
|
117
141
|
captures = []
|
118
142
|
count = 0
|
119
143
|
state.input.mark_offset
|
120
144
|
|
121
|
-
|
145
|
+
loop do
|
122
146
|
result = run_with_state(state.dup)
|
123
147
|
|
124
|
-
|
148
|
+
if result.failure? || count >= n_max
|
149
|
+
break
|
150
|
+
end
|
125
151
|
|
126
152
|
values << result.value
|
127
153
|
captures.concat(result.captures)
|
128
154
|
state.input.advance(result.length)
|
129
155
|
count += 1
|
156
|
+
|
157
|
+
if separator && count < n_max
|
158
|
+
sep_result = separator.run_with_state(state.dup)
|
159
|
+
break if sep_result.failure?
|
160
|
+
|
161
|
+
state.input.advance(sep_result.length)
|
162
|
+
end
|
130
163
|
end
|
131
164
|
|
132
165
|
if count < n_min
|
@@ -168,5 +201,22 @@ module Dolos
|
|
168
201
|
end
|
169
202
|
alias_method :opt, :optional
|
170
203
|
|
204
|
+
# Unstable API
|
205
|
+
# Used to declare lazy parser to avoid infinite loops in recursive parsers
|
206
|
+
def lazy
|
207
|
+
parser_memo = nil
|
208
|
+
|
209
|
+
Parser.new do |state|
|
210
|
+
parser_memo ||= self
|
211
|
+
parser_memo.run_with_state(state)
|
212
|
+
end
|
213
|
+
end
|
214
|
+
|
215
|
+
private
|
216
|
+
|
217
|
+
def combine_and_discard_empty(*arrays)
|
218
|
+
arrays.compact.reject { |arr| arr.is_a?(Array) && arr.empty? }
|
219
|
+
end
|
220
|
+
|
171
221
|
end
|
172
222
|
end
|
@@ -10,9 +10,12 @@ module Dolos
|
|
10
10
|
regex(/\n|\r\n|\r/)
|
11
11
|
end
|
12
12
|
|
13
|
-
# Capture as String and convert to integer
|
14
13
|
def digit
|
15
|
-
regex(/\d/)
|
14
|
+
regex(/\d/)
|
15
|
+
end
|
16
|
+
|
17
|
+
def int
|
18
|
+
digit.map(&:to_i)
|
16
19
|
end
|
17
20
|
|
18
21
|
# Capture as string
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dolos
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- benetis
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-08-
|
11
|
+
date: 2023-08-19 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Parser combinators library for Ruby. In active development, not stable
|
14
14
|
yet.
|
@@ -23,7 +23,10 @@ files:
|
|
23
23
|
- LICENSE.txt
|
24
24
|
- README.md
|
25
25
|
- Rakefile
|
26
|
+
- benchmarks/json/json.rb
|
27
|
+
- benchmarks/json/nested_json.json
|
26
28
|
- docs/dolos_stable_diff.png
|
29
|
+
- examples/letter.rb
|
27
30
|
- lib/dolos.rb
|
28
31
|
- lib/dolos/parser_state.rb
|
29
32
|
- lib/dolos/parsers.rb
|
@@ -31,7 +34,6 @@ files:
|
|
31
34
|
- lib/dolos/string_io_wrapper.rb
|
32
35
|
- lib/dolos/version.rb
|
33
36
|
- lib/dolos_common_parsers/common_parsers.rb
|
34
|
-
- lib/example.rb
|
35
37
|
- sig/dolos.rbs
|
36
38
|
- sig/dolos/common_parsers.rbs
|
37
39
|
- sig/dolos/parser.rbs
|