dolos 0.1.2 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
  require_relative 'dolos'
3
- require_relative 'dolos_common_parsers/common_parsers'
3
+ require_relative 'dolos_common_parsers/arsers/common_parsers'
4
4
 
5
5
  include Dolos
6
6
 
@@ -27,12 +27,12 @@ alpha_with_lt = char_in("ąčęėįšųūžĄČĘĖĮŠŲŪŽ") | alpha
27
27
 
28
28
  # Capture all letters in a row and join them,
29
29
  # because they are captured as elements of array by each alpha_with_lt parser.
30
- first_name = alpha_with_lt.rep.capture!.map(&:join)
31
- last_name = alpha_with_lt.rep.capture!.map(&:join)
30
+ first_name = alpha_with_lt.rep.map(&:join).capture!
31
+ last_name = alpha_with_lt.rep.map(&:join).capture!
32
32
 
33
33
  # Combine first line parsers
34
34
  # Consume zero or more whitespace, after that honorific must follow and so on
35
- name_line = ws.rep0 >> honorific >> first_name >> ws >> last_name >> eol
35
+ name_line = ws.rep0 & honorific & first_name & ws & last_name & eol
36
36
 
37
37
  # Next line is company info
38
38
  # We could choose to accept UAB and AB or just AB and etc.
@@ -42,9 +42,9 @@ quote_open = c("„")
42
42
  quote_close = c("“")
43
43
 
44
44
  # Consume LT alphabet with whitespace
45
- company_name = (alpha_with_lt | ws).rep.capture!.map(&:join)
46
- company_info = company_type >> ws.rep0 >> quote_open >> company_name >> quote_close
47
- second_line = ws.rep0 >> company_info >> eol
45
+ company_name = (alpha_with_lt | ws).rep.map(&:join).capture!
46
+ company_info = company_type & ws.rep0 & quote_open & company_name & quote_close
47
+ second_line = ws.rep0 & company_info & eol
48
48
 
49
49
  # Address line
50
50
  # 'char_while' will consume characters while passed predicate is true
@@ -52,18 +52,18 @@ second_line = ws.rep0 >> company_info >> eol
52
52
  # After that result is captured and mapped to hash
53
53
  # Mapping to hash so at the end its easy to tell tuples apart
54
54
  # Also while mapping, doing some cleaning with '.strip'
55
- street_name = char_while(->(char) { !char.match(/\d/) }).capture!.map(&:first).map { |s| { street: s.strip } }
56
- building = digits.capture!.map(&:first).map { |s| { building: s.strip } }
57
- address_line = ws.rep0 >> street_name >> building >> eol
55
+ street_name = char_while(->(char) { !char.match(/\d/) }).map { |s| { street: s.strip } }.capture!
56
+ building = digits.map { |s| { building: s.strip } }.capture!
57
+ address_line = ws.rep0 & street_name & building & eol
58
58
 
59
59
  # City line
60
60
  # All digits can be matched here or 'digits.rep(5)' could be used. Also joining with map.
61
- postcode = digits.capture!.map(&:join).map { |s| { postcode: s.strip } }
62
- city = alpha_with_lt.rep.capture!.map(&:join).map { |s| { city: s.strip } }
63
- city_line = ws.rep0 >> postcode >> ws >> city >> eol
61
+ postcode = digits.map { |s| { postcode: s.strip } }.capture!
62
+ city = alpha_with_lt.rep.map(&:join).map { |s| { city: s.strip } }.capture!
63
+ city_line = ws.rep0 & postcode & ws & city & eol
64
64
 
65
65
  # Full letter parser which is combined from all previous parsers. All previous parsers can be ran separately.
66
- letter_parser = name_line >> second_line >> address_line >> city_line
66
+ letter_parser = name_line & second_line & address_line & city_line
67
67
  result = letter_parser.run(letter)
68
68
 
69
69
  pp result.captures
@@ -3,10 +3,12 @@
3
3
  module Dolos
4
4
  class ParserState
5
5
  attr_reader :input
6
+ attr_accessor :last_success_position
6
7
 
7
8
  def initialize(input)
8
9
  @input = StringIOWrapper.new(input)
10
+ @last_success_position = 0
9
11
  end
10
12
  end
11
-
12
13
  end
14
+
data/lib/dolos/parsers.rb CHANGED
@@ -10,14 +10,17 @@ module Dolos
10
10
  Success.new(utf8_str, str.bytesize)
11
11
  else
12
12
  advanced = state.input.offset
13
+ got_error = state.input.io.string.byteslice(state.input.backup, advanced)
13
14
  state.input.rollback
14
15
  Failure.new(
15
- "Expected #{str.inspect} but got #{state.input.io.string.inspect}",
16
- advanced
16
+ "Expected #{str.inspect} but got #{got_error.inspect}",
17
+ advanced,
18
+ state
17
19
  )
18
20
  end
19
21
  end
20
22
  end
23
+
21
24
  alias_method :c, :string
22
25
 
23
26
  def regex(pattern)
@@ -30,13 +33,13 @@ module Dolos
30
33
  state.input.rollback
31
34
  Failure.new(
32
35
  "Expected pattern #{pattern.inspect} but got #{state.input.io.string.inspect}",
33
- advanced
36
+ advanced,
37
+ state
34
38
  )
35
39
  end
36
40
  end
37
41
  end
38
42
 
39
-
40
43
  def any_char
41
44
  Parser.new do |state|
42
45
  state.input.mark_offset
@@ -48,7 +51,11 @@ module Dolos
48
51
  else
49
52
  advanced = state.input.offset
50
53
  state.input.rollback
51
- Failure.new('Expected any character but got end of input', advanced)
54
+ Failure.new(
55
+ 'Expected any character but got end of input',
56
+ advanced,
57
+ state
58
+ )
52
59
  end
53
60
  end
54
61
  end
@@ -71,7 +78,8 @@ module Dolos
71
78
  state.input.rollback
72
79
  Failure.new(
73
80
  "Expected one of #{characters_array.inspect} but got #{char.inspect}",
74
- advanced
81
+ advanced,
82
+ state
75
83
  )
76
84
  end
77
85
  end
@@ -92,12 +100,37 @@ module Dolos
92
100
 
93
101
  if buffer.empty?
94
102
  advanced = state.input.offset
95
- Failure.new("Predicate never returned true", advanced)
103
+ Failure.new(
104
+ "Predicate never returned true",
105
+ advanced,
106
+ state
107
+ )
96
108
  else
97
109
  Success.new(buffer, 0)
98
110
  end
99
111
  end
100
112
  end
101
113
 
114
+ # Unstable API
115
+ def recursive(&block)
116
+ recursive_parser = nil
117
+
118
+ placeholder = Parser.new do |state|
119
+ raise "Recursive parser accessed before it was initialized!" if recursive_parser.nil?
120
+
121
+ recursive_parser.call.run_with_state(state).tap do |result|
122
+ if result.failure?
123
+ error_msg = "Error in recursive structure around position #{state.input.offset}: #{result.message}"
124
+ Failure.new(error_msg, state.input.offset, state)
125
+ end
126
+ end
127
+ end
128
+
129
+ recursive_parser = -> { block.call(placeholder) }
130
+ placeholder
131
+ end
132
+
133
+
134
+
102
135
  end
103
136
  end
data/lib/dolos/result.rb CHANGED
@@ -10,20 +10,21 @@ module Dolos
10
10
  def initialize(value, length, captures = [])
11
11
  @value = value
12
12
  @length = length
13
- # @captures = captures || value
14
13
  @captures = captures
15
14
  end
16
15
 
17
- def capture!
18
- if value.is_a?(Array)
19
- value.each do |v|
20
- captures << v
21
- end
16
+ # can be some named capture, :street, {:street => capture }
17
+ # or an array, [], [capture]
18
+ def capture!(wrap_in = nil)
19
+ mapped_value = self.value # use the transformed value here
20
+
21
+ if wrap_in.is_a?(Array)
22
+ save_capture([mapped_value])
23
+ elsif wrap_in.is_a?(Symbol)
24
+ save_capture({ wrap_in => mapped_value })
22
25
  else
23
- captures << value
26
+ save_capture(mapped_value)
24
27
  end
25
-
26
- Success.new(value, length, captures)
27
28
  end
28
29
 
29
30
  def inspect
@@ -37,21 +38,54 @@ module Dolos
37
38
  def failure?
38
39
  false
39
40
  end
41
+
42
+ private
43
+
44
+ def save_capture(val)
45
+ if val.is_a?(Array)
46
+ val.each do |v|
47
+ captures << v
48
+ end
49
+ else
50
+ captures << val
51
+ end
52
+
53
+ Success.new(val, length, captures)
54
+ end
40
55
  end
41
56
 
42
57
  class Failure < Result
43
- attr_reader :message, :committed
58
+ attr_reader :message, :error_position, :state
44
59
 
45
- def initialize(message, committed)
60
+ def initialize(message, error_position, state)
46
61
  @message = message
47
- @committed = committed
62
+ @error_position = error_position
63
+ @state = state
48
64
  end
49
65
 
50
66
  def inspect
67
+ pretty_print
68
+ end
69
+
70
+ def pretty_print
71
+ input_string = state.input.io.string
72
+
73
+ pointer = "^" # This will point to the error position
74
+
75
+ context_range = 10 # Chars before and after the error to display
76
+
77
+ start_index = [error_position - context_range, 0].max
78
+ end_index = [error_position + context_range, input_string.length].max
79
+
80
+ substring = input_string[start_index..end_index]
81
+
82
+ padding = error_position - start_index
83
+
51
84
  [
52
- "Failure",
53
- "message: #{message}",
54
- "committed: #{committed}"
85
+ "Failure: #{message}",
86
+ substring,
87
+ "#{' ' * padding}#{pointer}",
88
+ "Error Position: #{error_position}, Last Success Position: #{state.last_success_position}"
55
89
  ].join("\n")
56
90
  end
57
91
 
@@ -22,7 +22,6 @@ module Dolos
22
22
 
23
23
  def matches?(utf8_str)
24
24
  read = io.read(utf8_str.bytesize)
25
- io.seek(offset)
26
25
 
27
26
  if read.nil?
28
27
  false
data/lib/dolos/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Dolos
4
- VERSION = "0.1.2"
4
+ VERSION = "0.2.0"
5
5
  end
data/lib/dolos.rb CHANGED
@@ -22,21 +22,26 @@ module Dolos
22
22
  end
23
23
 
24
24
  def run_with_state(state)
25
- parser_proc.call(state)
25
+ result = parser_proc.call(state)
26
+ if result.success?
27
+ state.last_success_position = state.input.offset
28
+ end
29
+ result
26
30
  end
27
31
 
28
- def capture!
32
+ def capture!(wrap_in = nil)
29
33
  Parser.new do |state|
30
34
  result = run_with_state(state)
31
35
  if result.success?
32
- result.capture!
36
+ result.capture!(wrap_in)
33
37
  else
34
38
  result
35
39
  end
36
40
  end
37
41
  end
38
42
 
39
- def map(&block)
43
+ # Will call block on captures
44
+ def map_captures(&block)
40
45
  Parser.new do |state|
41
46
  result = run_with_state(state)
42
47
  if result.success?
@@ -47,7 +52,8 @@ module Dolos
47
52
  end
48
53
  end
49
54
 
50
- def map_value(&block)
55
+ # Will call block on tuple of value
56
+ def map(&block)
51
57
  Parser.new do |state|
52
58
  result = run_with_state(state)
53
59
  if result.success?
@@ -58,7 +64,7 @@ module Dolos
58
64
  end
59
65
  end
60
66
 
61
- def flat_map(&block)
67
+ def combine(&block)
62
68
  Parser.new do |state|
63
69
  result = run_with_state(state)
64
70
  if result.success?
@@ -73,22 +79,44 @@ module Dolos
73
79
  end
74
80
 
75
81
  def flatten
76
- map do |captures|
82
+ map_captures do |captures|
77
83
  captures.flatten
78
84
  end
79
85
  end
80
86
 
81
87
  def product(other_parser)
82
- flat_map do |value1, capture1|
83
- other_parser.map_value do |value2|
88
+ combine do |value1, capture1|
89
+ other_parser.map do |value2|
84
90
  [value1, value2]
85
- end.map do |capture2|
91
+ end.map_captures do |capture2|
92
+ [capture1, capture2].flatten
93
+ end
94
+ end
95
+ end
96
+ alias_method :&, :product
97
+
98
+ def product_l(other_parser)
99
+ combine do |value1, capture1|
100
+ other_parser.map do |_|
101
+ value1
102
+ end.map_captures do |capture2|
86
103
  [capture1, capture2].flatten
87
104
  end
88
105
  end
89
106
  end
90
107
 
91
- alias_method :>>, :product
108
+ def product_r(other_parser)
109
+ combine do |_, capture1|
110
+ other_parser.map do |value2|
111
+ value2
112
+ end.map_captures do |capture2|
113
+ [capture1, capture2].flatten
114
+ end
115
+ end
116
+ end
117
+
118
+ alias_method :<<, :product_l
119
+ alias_method :>>, :product_r
92
120
 
93
121
  def choice(other_parser)
94
122
  Parser.new do |state|
@@ -107,25 +135,40 @@ module Dolos
107
135
  # rep(n = 2) # exactly 2
108
136
  # repeat(n_min: 2, n_max: 4) # 2 to 4
109
137
  # repeat(n_min: 2) # 2 or more
110
- def repeat(n_min:, n_max: Float::INFINITY)
138
+ def repeat(n_min:, n_max: Float::INFINITY, separator: nil)
111
139
  Parser.new do |state|
112
140
  values = []
113
141
  captures = []
114
142
  count = 0
143
+ state.input.mark_offset
115
144
 
116
- while count < n_max
145
+ loop do
117
146
  result = run_with_state(state.dup)
118
147
 
119
- break if result.failure?
148
+ if result.failure? || count >= n_max
149
+ break
150
+ end
120
151
 
121
152
  values << result.value
122
153
  captures.concat(result.captures)
123
154
  state.input.advance(result.length)
124
155
  count += 1
156
+
157
+ if separator && count < n_max
158
+ sep_result = separator.run_with_state(state.dup)
159
+ break if sep_result.failure?
160
+
161
+ state.input.advance(sep_result.length)
162
+ end
125
163
  end
126
164
 
127
165
  if count < n_min
128
- Failure.new("Expected parser to match at least #{n_min} times but matched only #{count} times", false)
166
+ error_pos = state.input.offset
167
+ Failure.new(
168
+ "Expected parser to match at least #{n_min} times but matched only #{count} times",
169
+ error_pos,
170
+ state
171
+ )
129
172
  else
130
173
  Success.new(values, 0, captures)
131
174
  end
@@ -158,5 +201,22 @@ module Dolos
158
201
  end
159
202
  alias_method :opt, :optional
160
203
 
204
+ # Unstable API
205
+ # Used to declare lazy parser to avoid infinite loops in recursive parsers
206
+ def lazy
207
+ parser_memo = nil
208
+
209
+ Parser.new do |state|
210
+ parser_memo ||= self
211
+ parser_memo.run_with_state(state)
212
+ end
213
+ end
214
+
215
+ private
216
+
217
+ def combine_and_discard_empty(*arrays)
218
+ arrays.compact.reject { |arr| arr.is_a?(Array) && arr.empty? }
219
+ end
220
+
161
221
  end
162
222
  end
@@ -10,9 +10,12 @@ module Dolos
10
10
  regex(/\n|\r\n|\r/)
11
11
  end
12
12
 
13
- # Capture as String and convert to integer
14
13
  def digit
15
- regex(/\d/).capture!.map { |capt| capt.map(&:to_i) }
14
+ regex(/\d/)
15
+ end
16
+
17
+ def int
18
+ digit.map(&:to_i)
16
19
  end
17
20
 
18
21
  # Capture as string
@@ -1,6 +1,7 @@
1
1
  module Dolos
2
2
  class ParserState
3
3
  attr_reader input: Dolos::StringIOWrapper
4
+ attr_accessor last_success_position: Integer
4
5
 
5
6
  def initialize: (String) -> void
6
7
  end
data/sig/dolos/result.rbs CHANGED
@@ -16,6 +16,7 @@ module Dolos
16
16
 
17
17
  class Failure < Result[bot]
18
18
  attr_reader committed: bool
19
+ attr_reader error_position: Integer
19
20
  attr_reader message: String
20
21
 
21
22
  def captures: -> []
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dolos
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - benetis
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-08-16 00:00:00.000000000 Z
11
+ date: 2023-08-19 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Parser combinators library for Ruby. In active development, not stable
14
14
  yet.
@@ -23,7 +23,10 @@ files:
23
23
  - LICENSE.txt
24
24
  - README.md
25
25
  - Rakefile
26
+ - benchmarks/json/json.rb
27
+ - benchmarks/json/nested_json.json
26
28
  - docs/dolos_stable_diff.png
29
+ - examples/letter.rb
27
30
  - lib/dolos.rb
28
31
  - lib/dolos/parser_state.rb
29
32
  - lib/dolos/parsers.rb
@@ -31,7 +34,6 @@ files:
31
34
  - lib/dolos/string_io_wrapper.rb
32
35
  - lib/dolos/version.rb
33
36
  - lib/dolos_common_parsers/common_parsers.rb
34
- - lib/example.rb
35
37
  - sig/dolos.rbs
36
38
  - sig/dolos/common_parsers.rbs
37
39
  - sig/dolos/parser.rbs