dolos 0.1.2 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
  require_relative 'dolos'
3
- require_relative 'dolos_common_parsers/common_parsers'
3
+ require_relative 'dolos_common_parsers/arsers/common_parsers'
4
4
 
5
5
  include Dolos
6
6
 
@@ -27,12 +27,12 @@ alpha_with_lt = char_in("ąčęėįšųūžĄČĘĖĮŠŲŪŽ") | alpha
27
27
 
28
28
  # Capture all letters in a row and join them,
29
29
  # because they are captured as elements of array by each alpha_with_lt parser.
30
- first_name = alpha_with_lt.rep.capture!.map(&:join)
31
- last_name = alpha_with_lt.rep.capture!.map(&:join)
30
+ first_name = alpha_with_lt.rep.map(&:join).capture!
31
+ last_name = alpha_with_lt.rep.map(&:join).capture!
32
32
 
33
33
  # Combine first line parsers
34
34
  # Consume zero or more whitespace, after that honorific must follow and so on
35
- name_line = ws.rep0 >> honorific >> first_name >> ws >> last_name >> eol
35
+ name_line = ws.rep0 & honorific & first_name & ws & last_name & eol
36
36
 
37
37
  # Next line is company info
38
38
  # We could choose to accept UAB and AB or just AB and etc.
@@ -42,9 +42,9 @@ quote_open = c("„")
42
42
  quote_close = c("“")
43
43
 
44
44
  # Consume LT alphabet with whitespace
45
- company_name = (alpha_with_lt | ws).rep.capture!.map(&:join)
46
- company_info = company_type >> ws.rep0 >> quote_open >> company_name >> quote_close
47
- second_line = ws.rep0 >> company_info >> eol
45
+ company_name = (alpha_with_lt | ws).rep.map(&:join).capture!
46
+ company_info = company_type & ws.rep0 & quote_open & company_name & quote_close
47
+ second_line = ws.rep0 & company_info & eol
48
48
 
49
49
  # Address line
50
50
  # 'char_while' will consume characters while passed predicate is true
@@ -52,18 +52,18 @@ second_line = ws.rep0 >> company_info >> eol
52
52
  # After that result is captured and mapped to hash
53
53
  # Mapping to hash so at the end its easy to tell tuples apart
54
54
  # Also while mapping, doing some cleaning with '.strip'
55
- street_name = char_while(->(char) { !char.match(/\d/) }).capture!.map(&:first).map { |s| { street: s.strip } }
56
- building = digits.capture!.map(&:first).map { |s| { building: s.strip } }
57
- address_line = ws.rep0 >> street_name >> building >> eol
55
+ street_name = char_while(->(char) { !char.match(/\d/) }).map { |s| { street: s.strip } }.capture!
56
+ building = digits.map { |s| { building: s.strip } }.capture!
57
+ address_line = ws.rep0 & street_name & building & eol
58
58
 
59
59
  # City line
60
60
  # All digits can be matched here or 'digits.rep(5)' could be used. Also joining with map.
61
- postcode = digits.capture!.map(&:join).map { |s| { postcode: s.strip } }
62
- city = alpha_with_lt.rep.capture!.map(&:join).map { |s| { city: s.strip } }
63
- city_line = ws.rep0 >> postcode >> ws >> city >> eol
61
+ postcode = digits.map { |s| { postcode: s.strip } }.capture!
62
+ city = alpha_with_lt.rep.map(&:join).map { |s| { city: s.strip } }.capture!
63
+ city_line = ws.rep0 & postcode & ws & city & eol
64
64
 
65
65
  # Full letter parser which is combined from all previous parsers. All previous parsers can be ran separately.
66
- letter_parser = name_line >> second_line >> address_line >> city_line
66
+ letter_parser = name_line & second_line & address_line & city_line
67
67
  result = letter_parser.run(letter)
68
68
 
69
69
  pp result.captures
@@ -3,10 +3,12 @@
3
3
  module Dolos
4
4
  class ParserState
5
5
  attr_reader :input
6
+ attr_accessor :last_success_position
6
7
 
7
8
  def initialize(input)
8
9
  @input = StringIOWrapper.new(input)
10
+ @last_success_position = 0
9
11
  end
10
12
  end
11
-
12
13
  end
14
+
data/lib/dolos/parsers.rb CHANGED
@@ -10,14 +10,17 @@ module Dolos
10
10
  Success.new(utf8_str, str.bytesize)
11
11
  else
12
12
  advanced = state.input.offset
13
+ got_error = state.input.io.string.byteslice(state.input.backup, advanced)
13
14
  state.input.rollback
14
15
  Failure.new(
15
- "Expected #{str.inspect} but got #{state.input.io.string.inspect}",
16
- advanced
16
+ "Expected #{str.inspect} but got #{got_error.inspect}",
17
+ advanced,
18
+ state
17
19
  )
18
20
  end
19
21
  end
20
22
  end
23
+
21
24
  alias_method :c, :string
22
25
 
23
26
  def regex(pattern)
@@ -30,13 +33,13 @@ module Dolos
30
33
  state.input.rollback
31
34
  Failure.new(
32
35
  "Expected pattern #{pattern.inspect} but got #{state.input.io.string.inspect}",
33
- advanced
36
+ advanced,
37
+ state
34
38
  )
35
39
  end
36
40
  end
37
41
  end
38
42
 
39
-
40
43
  def any_char
41
44
  Parser.new do |state|
42
45
  state.input.mark_offset
@@ -48,7 +51,11 @@ module Dolos
48
51
  else
49
52
  advanced = state.input.offset
50
53
  state.input.rollback
51
- Failure.new('Expected any character but got end of input', advanced)
54
+ Failure.new(
55
+ 'Expected any character but got end of input',
56
+ advanced,
57
+ state
58
+ )
52
59
  end
53
60
  end
54
61
  end
@@ -71,7 +78,8 @@ module Dolos
71
78
  state.input.rollback
72
79
  Failure.new(
73
80
  "Expected one of #{characters_array.inspect} but got #{char.inspect}",
74
- advanced
81
+ advanced,
82
+ state
75
83
  )
76
84
  end
77
85
  end
@@ -92,12 +100,37 @@ module Dolos
92
100
 
93
101
  if buffer.empty?
94
102
  advanced = state.input.offset
95
- Failure.new("Predicate never returned true", advanced)
103
+ Failure.new(
104
+ "Predicate never returned true",
105
+ advanced,
106
+ state
107
+ )
96
108
  else
97
109
  Success.new(buffer, 0)
98
110
  end
99
111
  end
100
112
  end
101
113
 
114
+ # Unstable API
115
+ def recursive(&block)
116
+ recursive_parser = nil
117
+
118
+ placeholder = Parser.new do |state|
119
+ raise "Recursive parser accessed before it was initialized!" if recursive_parser.nil?
120
+
121
+ recursive_parser.call.run_with_state(state).tap do |result|
122
+ if result.failure?
123
+ error_msg = "Error in recursive structure around position #{state.input.offset}: #{result.message}"
124
+ Failure.new(error_msg, state.input.offset, state)
125
+ end
126
+ end
127
+ end
128
+
129
+ recursive_parser = -> { block.call(placeholder) }
130
+ placeholder
131
+ end
132
+
133
+
134
+
102
135
  end
103
136
  end
data/lib/dolos/result.rb CHANGED
@@ -10,20 +10,21 @@ module Dolos
10
10
  def initialize(value, length, captures = [])
11
11
  @value = value
12
12
  @length = length
13
- # @captures = captures || value
14
13
  @captures = captures
15
14
  end
16
15
 
17
- def capture!
18
- if value.is_a?(Array)
19
- value.each do |v|
20
- captures << v
21
- end
16
+ # can be some named capture, :street, {:street => capture }
17
+ # or an array, [], [capture]
18
+ def capture!(wrap_in = nil)
19
+ mapped_value = self.value # use the transformed value here
20
+
21
+ if wrap_in.is_a?(Array)
22
+ save_capture([mapped_value])
23
+ elsif wrap_in.is_a?(Symbol)
24
+ save_capture({ wrap_in => mapped_value })
22
25
  else
23
- captures << value
26
+ save_capture(mapped_value)
24
27
  end
25
-
26
- Success.new(value, length, captures)
27
28
  end
28
29
 
29
30
  def inspect
@@ -37,21 +38,54 @@ module Dolos
37
38
  def failure?
38
39
  false
39
40
  end
41
+
42
+ private
43
+
44
+ def save_capture(val)
45
+ if val.is_a?(Array)
46
+ val.each do |v|
47
+ captures << v
48
+ end
49
+ else
50
+ captures << val
51
+ end
52
+
53
+ Success.new(val, length, captures)
54
+ end
40
55
  end
41
56
 
42
57
  class Failure < Result
43
- attr_reader :message, :committed
58
+ attr_reader :message, :error_position, :state
44
59
 
45
- def initialize(message, committed)
60
+ def initialize(message, error_position, state)
46
61
  @message = message
47
- @committed = committed
62
+ @error_position = error_position
63
+ @state = state
48
64
  end
49
65
 
50
66
  def inspect
67
+ pretty_print
68
+ end
69
+
70
+ def pretty_print
71
+ input_string = state.input.io.string
72
+
73
+ pointer = "^" # This will point to the error position
74
+
75
+ context_range = 10 # Chars before and after the error to display
76
+
77
+ start_index = [error_position - context_range, 0].max
78
+ end_index = [error_position + context_range, input_string.length].max
79
+
80
+ substring = input_string[start_index..end_index]
81
+
82
+ padding = error_position - start_index
83
+
51
84
  [
52
- "Failure",
53
- "message: #{message}",
54
- "committed: #{committed}"
85
+ "Failure: #{message}",
86
+ substring,
87
+ "#{' ' * padding}#{pointer}",
88
+ "Error Position: #{error_position}, Last Success Position: #{state.last_success_position}"
55
89
  ].join("\n")
56
90
  end
57
91
 
@@ -22,7 +22,6 @@ module Dolos
22
22
 
23
23
  def matches?(utf8_str)
24
24
  read = io.read(utf8_str.bytesize)
25
- io.seek(offset)
26
25
 
27
26
  if read.nil?
28
27
  false
data/lib/dolos/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Dolos
4
- VERSION = "0.1.2"
4
+ VERSION = "0.2.0"
5
5
  end
data/lib/dolos.rb CHANGED
@@ -22,21 +22,26 @@ module Dolos
22
22
  end
23
23
 
24
24
  def run_with_state(state)
25
- parser_proc.call(state)
25
+ result = parser_proc.call(state)
26
+ if result.success?
27
+ state.last_success_position = state.input.offset
28
+ end
29
+ result
26
30
  end
27
31
 
28
- def capture!
32
+ def capture!(wrap_in = nil)
29
33
  Parser.new do |state|
30
34
  result = run_with_state(state)
31
35
  if result.success?
32
- result.capture!
36
+ result.capture!(wrap_in)
33
37
  else
34
38
  result
35
39
  end
36
40
  end
37
41
  end
38
42
 
39
- def map(&block)
43
+ # Will call block on captures
44
+ def map_captures(&block)
40
45
  Parser.new do |state|
41
46
  result = run_with_state(state)
42
47
  if result.success?
@@ -47,7 +52,8 @@ module Dolos
47
52
  end
48
53
  end
49
54
 
50
- def map_value(&block)
55
+ # Will call block on tuple of value
56
+ def map(&block)
51
57
  Parser.new do |state|
52
58
  result = run_with_state(state)
53
59
  if result.success?
@@ -58,7 +64,7 @@ module Dolos
58
64
  end
59
65
  end
60
66
 
61
- def flat_map(&block)
67
+ def combine(&block)
62
68
  Parser.new do |state|
63
69
  result = run_with_state(state)
64
70
  if result.success?
@@ -73,22 +79,44 @@ module Dolos
73
79
  end
74
80
 
75
81
  def flatten
76
- map do |captures|
82
+ map_captures do |captures|
77
83
  captures.flatten
78
84
  end
79
85
  end
80
86
 
81
87
  def product(other_parser)
82
- flat_map do |value1, capture1|
83
- other_parser.map_value do |value2|
88
+ combine do |value1, capture1|
89
+ other_parser.map do |value2|
84
90
  [value1, value2]
85
- end.map do |capture2|
91
+ end.map_captures do |capture2|
92
+ [capture1, capture2].flatten
93
+ end
94
+ end
95
+ end
96
+ alias_method :&, :product
97
+
98
+ def product_l(other_parser)
99
+ combine do |value1, capture1|
100
+ other_parser.map do |_|
101
+ value1
102
+ end.map_captures do |capture2|
86
103
  [capture1, capture2].flatten
87
104
  end
88
105
  end
89
106
  end
90
107
 
91
- alias_method :>>, :product
108
+ def product_r(other_parser)
109
+ combine do |_, capture1|
110
+ other_parser.map do |value2|
111
+ value2
112
+ end.map_captures do |capture2|
113
+ [capture1, capture2].flatten
114
+ end
115
+ end
116
+ end
117
+
118
+ alias_method :<<, :product_l
119
+ alias_method :>>, :product_r
92
120
 
93
121
  def choice(other_parser)
94
122
  Parser.new do |state|
@@ -107,25 +135,40 @@ module Dolos
107
135
  # rep(n = 2) # exactly 2
108
136
  # repeat(n_min: 2, n_max: 4) # 2 to 4
109
137
  # repeat(n_min: 2) # 2 or more
110
- def repeat(n_min:, n_max: Float::INFINITY)
138
+ def repeat(n_min:, n_max: Float::INFINITY, separator: nil)
111
139
  Parser.new do |state|
112
140
  values = []
113
141
  captures = []
114
142
  count = 0
143
+ state.input.mark_offset
115
144
 
116
- while count < n_max
145
+ loop do
117
146
  result = run_with_state(state.dup)
118
147
 
119
- break if result.failure?
148
+ if result.failure? || count >= n_max
149
+ break
150
+ end
120
151
 
121
152
  values << result.value
122
153
  captures.concat(result.captures)
123
154
  state.input.advance(result.length)
124
155
  count += 1
156
+
157
+ if separator && count < n_max
158
+ sep_result = separator.run_with_state(state.dup)
159
+ break if sep_result.failure?
160
+
161
+ state.input.advance(sep_result.length)
162
+ end
125
163
  end
126
164
 
127
165
  if count < n_min
128
- Failure.new("Expected parser to match at least #{n_min} times but matched only #{count} times", false)
166
+ error_pos = state.input.offset
167
+ Failure.new(
168
+ "Expected parser to match at least #{n_min} times but matched only #{count} times",
169
+ error_pos,
170
+ state
171
+ )
129
172
  else
130
173
  Success.new(values, 0, captures)
131
174
  end
@@ -158,5 +201,22 @@ module Dolos
158
201
  end
159
202
  alias_method :opt, :optional
160
203
 
204
+ # Unstable API
205
+ # Used to declare lazy parser to avoid infinite loops in recursive parsers
206
+ def lazy
207
+ parser_memo = nil
208
+
209
+ Parser.new do |state|
210
+ parser_memo ||= self
211
+ parser_memo.run_with_state(state)
212
+ end
213
+ end
214
+
215
+ private
216
+
217
+ def combine_and_discard_empty(*arrays)
218
+ arrays.compact.reject { |arr| arr.is_a?(Array) && arr.empty? }
219
+ end
220
+
161
221
  end
162
222
  end
@@ -10,9 +10,12 @@ module Dolos
10
10
  regex(/\n|\r\n|\r/)
11
11
  end
12
12
 
13
- # Capture as String and convert to integer
14
13
  def digit
15
- regex(/\d/).capture!.map { |capt| capt.map(&:to_i) }
14
+ regex(/\d/)
15
+ end
16
+
17
+ def int
18
+ digit.map(&:to_i)
16
19
  end
17
20
 
18
21
  # Capture as string
@@ -1,6 +1,7 @@
1
1
  module Dolos
2
2
  class ParserState
3
3
  attr_reader input: Dolos::StringIOWrapper
4
+ attr_accessor last_success_position: Integer
4
5
 
5
6
  def initialize: (String) -> void
6
7
  end
data/sig/dolos/result.rbs CHANGED
@@ -16,6 +16,7 @@ module Dolos
16
16
 
17
17
  class Failure < Result[bot]
18
18
  attr_reader committed: bool
19
+ attr_reader error_position: Integer
19
20
  attr_reader message: String
20
21
 
21
22
  def captures: -> []
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dolos
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - benetis
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-08-16 00:00:00.000000000 Z
11
+ date: 2023-08-19 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Parser combinators library for Ruby. In active development, not stable
14
14
  yet.
@@ -23,7 +23,10 @@ files:
23
23
  - LICENSE.txt
24
24
  - README.md
25
25
  - Rakefile
26
+ - benchmarks/json/json.rb
27
+ - benchmarks/json/nested_json.json
26
28
  - docs/dolos_stable_diff.png
29
+ - examples/letter.rb
27
30
  - lib/dolos.rb
28
31
  - lib/dolos/parser_state.rb
29
32
  - lib/dolos/parsers.rb
@@ -31,7 +34,6 @@ files:
31
34
  - lib/dolos/string_io_wrapper.rb
32
35
  - lib/dolos/version.rb
33
36
  - lib/dolos_common_parsers/common_parsers.rb
34
- - lib/example.rb
35
37
  - sig/dolos.rbs
36
38
  - sig/dolos/common_parsers.rbs
37
39
  - sig/dolos/parser.rbs