dolos 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,78 @@
1
+ # frozen_string_literal: true
2
+ require 'bundler/setup'
3
+ require 'dolos'
4
+ require 'dolos_common_parsers/common_parsers'
5
+ require 'benchmark/ips'
6
+
7
+ include Dolos
8
+
9
+ # Include common parsers
10
+ # In future this can be more structured, moved them to separate module to prevent breaking changes
11
+ include Dolos::CommonParsers
12
+
13
+ # Library usage example
14
+ # Parse out a name and address from a letter
15
+ # For higher difficulty, we will not split this into multiple lines, but instead parse it all at once
16
+ letter = <<-LETTER
17
+ Mr. Vardeniui Pavardeniui
18
+ AB „Lietuvos Paštas“
19
+ Totorių g. 8
20
+ 01121 Vilnius
21
+ LETTER
22
+
23
+ # Combine with 'or'
24
+ honorific = c("Mr. ") | c("Mrs. ") | c("Ms. ")
25
+
26
+ # Can be parsed any_char which will include needed letters
27
+ # Or combine LT letters with latin alphabet
28
+ alpha_with_lt = char_in("ąčęėįšųūžĄČĘĖĮŠŲŪŽ") | alpha
29
+
30
+ # Capture all letters in a row and join them,
31
+ # because they are captured as elements of array by each alpha_with_lt parser.
32
+ first_name = alpha_with_lt.rep.map(&:join).capture!
33
+ last_name = alpha_with_lt.rep.map(&:join).capture!
34
+
35
+ # Combine first line parsers
36
+ # Consume zero or more whitespace, after that honorific must follow and so on
37
+ name_line = ws_rep0 & honorific & first_name & ws & last_name & eol
38
+
39
+ # Next line is company info
40
+ # We could choose to accept UAB and AB or just AB and etc.
41
+ # 'c("AB")' is for case-sensitive string. 'string' can also be used
42
+ company_type = c("AB")
43
+ quote_open = c("„")
44
+ quote_close = c("“")
45
+
46
+ # Consume LT alphabet with whitespace
47
+ company_name = (alpha_with_lt | ws).rep.map(&:join).capture!
48
+ company_info = company_type & ws_rep0 & quote_open & company_name & quote_close
49
+ second_line = ws_rep0 & company_info & eol
50
+
51
+ # Address line
52
+ # 'char_while' will consume characters while passed predicate is true
53
+ # This could be an alternative to previous 'alpha_with_lt' approach
54
+ # After that result is captured and mapped to hash
55
+ # Mapping to hash so at the end its easy to tell tuples apart
56
+ # Also while mapping, doing some cleaning with '.strip'
57
+ street_name = char_while(->(char) { !char.match(/\d/) }).map { |s| { street: s.strip } }.capture!
58
+ building = digits.map { |s| { building: s.strip } }.capture!
59
+ address_line = ws_rep0 & street_name & building & eol
60
+
61
+ # City line
62
+ # All digits can be matched here or 'digits.rep(5)' could be used. Also joining with map.
63
+ postcode = digits.map { |s| { postcode: s.strip } }.capture!
64
+ city = alpha_with_lt.rep.map(&:join).map { |s| { city: s.strip } }.capture!
65
+ city_line = ws_rep0 & postcode & ws & city & eol
66
+
67
+ # Full letter parser which is combined from all previous parsers. All previous parsers can be ran separately.
68
+ letter_parser = name_line & second_line & address_line & city_line
69
+ result = letter_parser.run(letter)
70
+
71
+ puts result.success?
72
+
73
+ Benchmark.ips do |x|
74
+ x.report('letter benchmark') do
75
+ letter_parser.run(letter)
76
+ end
77
+ x.compare!
78
+ end
data/examples/letter.rb CHANGED
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
- require_relative 'dolos'
3
- require_relative 'dolos_common_parsers/arsers/common_parsers'
2
+ require 'dolos'
3
+ require 'dolos_common_parsers/common_parsers'
4
4
 
5
5
  include Dolos
6
6
 
data/lib/dolos/parsers.rb CHANGED
@@ -3,9 +3,10 @@
3
3
  module Dolos
4
4
  module Parsers
5
5
  def string(str)
6
+ utf8_str = str.encode('UTF-8')
7
+
6
8
  Parser.new do |state|
7
9
  state.input.mark_offset
8
- utf8_str = str.encode('UTF-8')
9
10
  if state.input.matches?(utf8_str)
10
11
  Success.new(utf8_str, str.bytesize)
11
12
  else
@@ -13,7 +14,7 @@ module Dolos
13
14
  got_error = state.input.io.string.byteslice(state.input.backup, advanced)
14
15
  state.input.rollback
15
16
  Failure.new(
16
- "Expected #{str.inspect} but got #{got_error.inspect}",
17
+ -> { "Expected #{str.inspect} but got #{got_error.inspect}" },
17
18
  advanced,
18
19
  state
19
20
  )
@@ -32,7 +33,7 @@ module Dolos
32
33
  advanced = state.input.offset
33
34
  state.input.rollback
34
35
  Failure.new(
35
- "Expected pattern #{pattern.inspect} but got #{state.input.io.string.inspect}",
36
+ -> { "Expected pattern #{pattern.inspect} but got #{state.input.io.string.inspect}" },
36
37
  advanced,
37
38
  state
38
39
  )
@@ -52,7 +53,7 @@ module Dolos
52
53
  advanced = state.input.offset
53
54
  state.input.rollback
54
55
  Failure.new(
55
- 'Expected any character but got end of input',
56
+ -> { 'Expected any character but got end of input' },
56
57
  advanced,
57
58
  state
58
59
  )
@@ -64,20 +65,20 @@ module Dolos
64
65
  # Example:
65
66
  # char_in('abc').run('b') # => Success.new('b', 1)
66
67
  def char_in(characters_string)
67
- characters_array = characters_string.chars
68
+ characters_set = characters_string.chars
68
69
 
69
70
  Parser.new do |state|
70
71
  state.input.mark_offset
71
72
 
72
73
  char, bytesize = state.input.peek(1)
73
74
 
74
- if char && characters_array.include?(char)
75
+ if char && characters_set.include?(char)
75
76
  Success.new(char, bytesize)
76
77
  else
77
78
  advanced = state.input.offset
78
79
  state.input.rollback
79
80
  Failure.new(
80
- "Expected one of #{characters_array.inspect} but got #{char.inspect}",
81
+ -> { "Expected one of #{characters_set.to_a.inspect} but got #{char.inspect}" },
81
82
  advanced,
82
83
  state
83
84
  )
@@ -90,18 +91,18 @@ module Dolos
90
91
  state.input.mark_offset
91
92
 
92
93
  buffer = String.new
93
- loop do
94
- char, bytesize = state.input.peek(1)
95
- break if char.nil? || !predicate.call(char)
94
+ char, bytesize = state.input.peek(1)
96
95
 
96
+ while char && predicate.call(char)
97
97
  buffer << char
98
98
  state.input.advance(bytesize)
99
+ char, bytesize = state.input.peek(1)
99
100
  end
100
101
 
101
102
  if buffer.empty?
102
103
  advanced = state.input.offset
103
104
  Failure.new(
104
- "Predicate never returned true",
105
+ -> { "Predicate never returned true" },
105
106
  advanced,
106
107
  state
107
108
  )
@@ -111,7 +112,6 @@ module Dolos
111
112
  end
112
113
  end
113
114
 
114
- # Unstable API
115
115
  def recursive(&block)
116
116
  recursive_parser = nil
117
117
 
@@ -120,7 +120,7 @@ module Dolos
120
120
 
121
121
  recursive_parser.call.run_with_state(state).tap do |result|
122
122
  if result.failure?
123
- error_msg = "Error in recursive structure around position #{state.input.offset}: #{result.message}"
123
+ error_msg = -> { "Error in recursive structure around position #{state.input.offset}: #{result.message}" }
124
124
  Failure.new(error_msg, state.input.offset, state)
125
125
  end
126
126
  end
@@ -130,7 +130,5 @@ module Dolos
130
130
  placeholder
131
131
  end
132
132
 
133
-
134
-
135
133
  end
136
134
  end
data/lib/dolos/result.rb CHANGED
@@ -55,12 +55,21 @@ module Dolos
55
55
  end
56
56
 
57
57
  class Failure < Result
58
- attr_reader :message, :error_position, :state
58
+ attr_reader :error_position, :state
59
59
 
60
- def initialize(message, error_position, state)
61
- @message = message
60
+ def initialize(message_proc, error_position, state)
61
+ @message_proc = message_proc
62
62
  @error_position = error_position
63
63
  @state = state
64
+ @message_evaluated = false
65
+ end
66
+
67
+ def message
68
+ unless @message_evaluated
69
+ @message_value = @message_proc.call
70
+ @message_evaluated = true
71
+ end
72
+ @message_value
64
73
  end
65
74
 
66
75
  def inspect
@@ -22,12 +22,7 @@ module Dolos
22
22
 
23
23
  def matches?(utf8_str)
24
24
  read = io.read(utf8_str.bytesize)
25
-
26
- if read.nil?
27
- false
28
- else
29
- read.force_encoding('UTF-8') == utf8_str
30
- end
25
+ !read.nil? && read.force_encoding('UTF-8') == utf8_str
31
26
  end
32
27
 
33
28
  def advance(bytesize)
@@ -61,8 +56,8 @@ module Dolos
61
56
  remaining_data = io.read
62
57
  io.seek(current_position)
63
58
 
64
- if (match_data = remaining_data.match(/\A#{pattern}/))
65
- matched_string = match_data[0]
59
+ if remaining_data =~ /\A#{pattern}/
60
+ matched_string = $&
66
61
  io.seek(current_position + matched_string.bytesize)
67
62
  return matched_string
68
63
  end
data/lib/dolos/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Dolos
4
- VERSION = "0.2.0"
4
+ VERSION = "0.2.1"
5
5
  end
data/lib/dolos.rb CHANGED
@@ -10,9 +10,7 @@ module Dolos
10
10
  include Parsers
11
11
 
12
12
  class Parser
13
-
14
13
  attr_accessor :parser_proc
15
-
16
14
  def initialize(&block)
17
15
  @parser_proc = block
18
16
  end
@@ -22,33 +20,23 @@ module Dolos
22
20
  end
23
21
 
24
22
  def run_with_state(state)
25
- result = parser_proc.call(state)
26
- if result.success?
27
- state.last_success_position = state.input.offset
28
- end
23
+ result = @parser_proc.call(state)
24
+ state.last_success_position = state.input.offset if result.success?
29
25
  result
30
26
  end
31
27
 
32
28
  def capture!(wrap_in = nil)
33
29
  Parser.new do |state|
34
30
  result = run_with_state(state)
35
- if result.success?
36
- result.capture!(wrap_in)
37
- else
38
- result
39
- end
31
+ result.success? ? result.capture!(wrap_in) : result
40
32
  end
41
33
  end
42
34
 
43
- # Will call block on captures
35
+ # Will call `map` on captures
44
36
  def map_captures(&block)
45
37
  Parser.new do |state|
46
38
  result = run_with_state(state)
47
- if result.success?
48
- Success.new(result.value, result.length, block.call(result.captures))
49
- else
50
- result
51
- end
39
+ result.success? ? Success.new(result.value, result.length, block.call(result.captures)) : result
52
40
  end
53
41
  end
54
42
 
@@ -56,22 +44,18 @@ module Dolos
56
44
  def map(&block)
57
45
  Parser.new do |state|
58
46
  result = run_with_state(state)
59
- if result.success?
60
- Success.new(block.call(result.value), result.length, result.captures)
61
- else
62
- result
63
- end
47
+ result.success? ? Success.new(block.call(result.value), result.length, result.captures) : result
64
48
  end
65
49
  end
66
50
 
67
51
  def combine(&block)
68
52
  Parser.new do |state|
69
53
  result = run_with_state(state)
54
+
70
55
  if result.success?
56
+ state.input.advance(result.length)
71
57
  new_parser = block.call(result.value, result.captures)
72
- new_state = state.dup
73
- new_state.input.advance(result.length)
74
- new_parser.run_with_state(new_state)
58
+ new_parser.run_with_state(state)
75
59
  else
76
60
  result
77
61
  end
@@ -140,10 +124,9 @@ module Dolos
140
124
  values = []
141
125
  captures = []
142
126
  count = 0
143
- state.input.mark_offset
144
127
 
145
128
  loop do
146
- result = run_with_state(state.dup)
129
+ result = run_with_state(state) # Removing .dup for performance. Be cautious of side effects.
147
130
 
148
131
  if result.failure? || count >= n_max
149
132
  break
@@ -155,7 +138,7 @@ module Dolos
155
138
  count += 1
156
139
 
157
140
  if separator && count < n_max
158
- sep_result = separator.run_with_state(state.dup)
141
+ sep_result = separator.run_with_state(state) # Removing .dup for performance. Be cautious of side effects.
159
142
  break if sep_result.failure?
160
143
 
161
144
  state.input.advance(sep_result.length)
@@ -163,10 +146,9 @@ module Dolos
163
146
  end
164
147
 
165
148
  if count < n_min
166
- error_pos = state.input.offset
167
149
  Failure.new(
168
- "Expected parser to match at least #{n_min} times but matched only #{count} times",
169
- error_pos,
150
+ -> { "Expected parser to match at least #{n_min} times but matched only #{count} times" },
151
+ state.input.offset,
170
152
  state
171
153
  )
172
154
  else
@@ -174,7 +156,6 @@ module Dolos
174
156
  end
175
157
  end
176
158
  end
177
-
178
159
  def zero_or_more
179
160
  repeat(n_min: 0, n_max: Float::INFINITY)
180
161
  end
@@ -201,7 +182,6 @@ module Dolos
201
182
  end
202
183
  alias_method :opt, :optional
203
184
 
204
- # Unstable API
205
185
  # Used to declare lazy parser to avoid infinite loops in recursive parsers
206
186
  def lazy
207
187
  parser_memo = nil
@@ -212,11 +192,5 @@ module Dolos
212
192
  end
213
193
  end
214
194
 
215
- private
216
-
217
- def combine_and_discard_empty(*arrays)
218
- arrays.compact.reject { |arr| arr.is_a?(Array) && arr.empty? }
219
- end
220
-
221
195
  end
222
196
  end
@@ -6,6 +6,10 @@ module Dolos
6
6
  regex(/\s/)
7
7
  end
8
8
 
9
+ def ws_rep0
10
+ regex(/\s*/)
11
+ end
12
+
9
13
  def eol
10
14
  regex(/\n|\r\n|\r/)
11
15
  end
@@ -1,5 +1,16 @@
1
1
  module Dolos
2
2
  module CommonParsers
3
+ def digit: -> Parser[String]
4
+ def digits: -> Parser[String]
5
+
6
+ def int: -> Parser[Integer]
7
+
8
+ def eol: -> Parser[String]
9
+
3
10
  def ws: -> Parser[String]
11
+ def ws_rep0: -> Parser[String]
12
+
13
+ def alpha: -> Parser[String]
14
+ def alphanum: -> Parser[String]
4
15
  end
5
16
  end
data/sig/dolos/parser.rbs CHANGED
@@ -4,16 +4,20 @@ module Dolos
4
4
  def initialize: (^(ParserState) -> Result[A]) -> Parser[A]
5
5
  def capture!: -> Parser[A]
6
6
  def choice: [B](Parser[B])-> Parser[A | B]
7
+ def combine: [B](^(A, B) -> Parser[B]) -> Parser[B]
7
8
  def flat_map: [B](Parser[A], ^(A) -> Parser[B]) -> Parser[B]
8
9
  def flatten: -> Parser[A]
9
10
  def map: [B](^(A) -> B) -> Parser[B]
10
- def map_value: [B](^(A) -> B) -> Parser[B]
11
+ def map_captures: [B](^(A) -> B) -> Parser[B]
11
12
  def optional: -> Parser[A?]
12
13
  def product: [B](Parser[A]) -> Parser[B]
14
+ def product_l: [B](Parser[B]) -> Parser[B]
15
+ def product_r: [B](Parser[B]) -> Parser[A]
13
16
  def run: (String) -> Result[A]
14
17
  def run_with_state: (ParserState) -> Result[A]
15
- def repeat: (Integer, Integer)-> Parser[Array[A]]
18
+ def repeat: [B](Integer, Integer, Parser[B]?)-> Parser[Array[A]]
16
19
  def zero_or_more: -> Parser[Array[A]]
17
20
  def one_or_more: (Integer?) -> Parser[Array[A]]
21
+ def lazy: -> Parser[A]
18
22
  end
19
23
  end
@@ -1,6 +1,6 @@
1
1
  module Dolos
2
2
  class ParserState
3
- attr_reader input: Dolos::StringIOWrapper
3
+ attr_reader input: StringIOWrapper
4
4
  attr_accessor last_success_position: Integer
5
5
 
6
6
  def initialize: (String) -> void
@@ -1,6 +1,10 @@
1
1
  module Dolos
2
2
  module Parsers
3
3
  def any_char: -> Parser[String]
4
+ def char_in: -> Parser[String]
5
+ def char_while : -> Parser[String]
6
+ def recursive: [A,B,C]() { (Parser[A]) -> Parser[B] } -> Parser[C]
7
+
4
8
  def regex: (Regexp) -> Parser[String]
5
9
  def string: (String)-> Parser[String]
6
10
  end
data/sig/dolos/result.rbs CHANGED
@@ -15,6 +15,11 @@ module Dolos
15
15
  end
16
16
 
17
17
  class Failure < Result[bot]
18
+ @message_proc: ^-> String
19
+ @message_evaluated: bool
20
+ @message_value: String
21
+ @state: ParserState
22
+
18
23
  attr_reader committed: bool
19
24
  attr_reader error_position: Integer
20
25
  attr_reader message: String
@@ -25,6 +30,8 @@ module Dolos
25
30
 
26
31
  def map: [B](^(bot) -> B) -> Result[B]
27
32
 
33
+ def pretty_print: -> String
34
+
28
35
  def success?: -> bool
29
36
  end
30
37
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dolos
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - benetis
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-08-19 00:00:00.000000000 Z
11
+ date: 2023-08-22 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Parser combinators library for Ruby. In active development, not stable
14
14
  yet.
@@ -24,7 +24,9 @@ files:
24
24
  - README.md
25
25
  - Rakefile
26
26
  - benchmarks/json/json.rb
27
- - benchmarks/json/nested_json.json
27
+ - benchmarks/json/nested_json_166.json
28
+ - benchmarks/json/nested_json_1m.json
29
+ - benchmarks/letter.rb
28
30
  - docs/dolos_stable_diff.png
29
31
  - examples/letter.rb
30
32
  - lib/dolos.rb