dolos 0.2.0 → 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,78 @@
1
+ # frozen_string_literal: true
2
+ require 'bundler/setup'
3
+ require 'dolos'
4
+ require 'dolos_common_parsers/common_parsers'
5
+ require 'benchmark/ips'
6
+
7
+ include Dolos
8
+
9
+ # Include common parsers
10
+ # In future this can be more structured, moved them to separate module to prevent breaking changes
11
+ include Dolos::CommonParsers
12
+
13
+ # Library usage example
14
+ # Parse out a name and address from a letter
15
+ # For higher difficulty, we will not split this into multiple lines, but instead parse it all at once
16
+ letter = <<-LETTER
17
+ Mr. Vardeniui Pavardeniui
18
+ AB „Lietuvos Paštas“
19
+ Totorių g. 8
20
+ 01121 Vilnius
21
+ LETTER
22
+
23
+ # Combine with 'or'
24
+ honorific = c("Mr. ") | c("Mrs. ") | c("Ms. ")
25
+
26
+ # Can be parsed any_char which will include needed letters
27
+ # Or combine LT letters with latin alphabet
28
+ alpha_with_lt = char_in("ąčęėįšųūžĄČĘĖĮŠŲŪŽ") | alpha
29
+
30
+ # Capture all letters in a row and join them,
31
+ # because they are captured as elements of array by each alpha_with_lt parser.
32
+ first_name = alpha_with_lt.rep.map(&:join).capture!
33
+ last_name = alpha_with_lt.rep.map(&:join).capture!
34
+
35
+ # Combine first line parsers
36
+ # Consume zero or more whitespace, after that honorific must follow and so on
37
+ name_line = ws_rep0 & honorific & first_name & ws & last_name & eol
38
+
39
+ # Next line is company info
40
+ # We could choose to accept UAB and AB or just AB and etc.
41
+ # 'c("AB")' is for case-sensitive string. 'string' can also be used
42
+ company_type = c("AB")
43
+ quote_open = c("„")
44
+ quote_close = c("“")
45
+
46
+ # Consume LT alphabet with whitespace
47
+ company_name = (alpha_with_lt | ws).rep.map(&:join).capture!
48
+ company_info = company_type & ws_rep0 & quote_open & company_name & quote_close
49
+ second_line = ws_rep0 & company_info & eol
50
+
51
+ # Address line
52
+ # 'char_while' will consume characters while passed predicate is true
53
+ # This could be an alternative to previous 'alpha_with_lt' approach
54
+ # After that result is captured and mapped to hash
55
+ # Mapping to hash so at the end its easy to tell tuples apart
56
+ # Also while mapping, doing some cleaning with '.strip'
57
+ street_name = char_while(->(char) { !char.match(/\d/) }).map { |s| { street: s.strip } }.capture!
58
+ building = digits.map { |s| { building: s.strip } }.capture!
59
+ address_line = ws_rep0 & street_name & building & eol
60
+
61
+ # City line
62
+ # All digits can be matched here or 'digits.rep(5)' could be used. Also joining with map.
63
+ postcode = digits.map { |s| { postcode: s.strip } }.capture!
64
+ city = alpha_with_lt.rep.map(&:join).map { |s| { city: s.strip } }.capture!
65
+ city_line = ws_rep0 & postcode & ws & city & eol
66
+
67
+ # Full letter parser which is combined from all previous parsers. All previous parsers can be ran separately.
68
+ letter_parser = name_line & second_line & address_line & city_line
69
+ result = letter_parser.run(letter)
70
+
71
+ puts result.success?
72
+
73
+ Benchmark.ips do |x|
74
+ x.report('letter benchmark') do
75
+ letter_parser.run(letter)
76
+ end
77
+ x.compare!
78
+ end
data/examples/letter.rb CHANGED
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
- require_relative 'dolos'
3
- require_relative 'dolos_common_parsers/arsers/common_parsers'
2
+ require 'dolos'
3
+ require 'dolos_common_parsers/common_parsers'
4
4
 
5
5
  include Dolos
6
6
 
data/lib/dolos/parsers.rb CHANGED
@@ -3,9 +3,10 @@
3
3
  module Dolos
4
4
  module Parsers
5
5
  def string(str)
6
+ utf8_str = str.encode('UTF-8')
7
+
6
8
  Parser.new do |state|
7
9
  state.input.mark_offset
8
- utf8_str = str.encode('UTF-8')
9
10
  if state.input.matches?(utf8_str)
10
11
  Success.new(utf8_str, str.bytesize)
11
12
  else
@@ -13,7 +14,7 @@ module Dolos
13
14
  got_error = state.input.io.string.byteslice(state.input.backup, advanced)
14
15
  state.input.rollback
15
16
  Failure.new(
16
- "Expected #{str.inspect} but got #{got_error.inspect}",
17
+ -> { "Expected #{str.inspect} but got #{got_error.inspect}" },
17
18
  advanced,
18
19
  state
19
20
  )
@@ -32,7 +33,7 @@ module Dolos
32
33
  advanced = state.input.offset
33
34
  state.input.rollback
34
35
  Failure.new(
35
- "Expected pattern #{pattern.inspect} but got #{state.input.io.string.inspect}",
36
+ -> { "Expected pattern #{pattern.inspect} but got #{state.input.io.string.inspect}" },
36
37
  advanced,
37
38
  state
38
39
  )
@@ -52,7 +53,7 @@ module Dolos
52
53
  advanced = state.input.offset
53
54
  state.input.rollback
54
55
  Failure.new(
55
- 'Expected any character but got end of input',
56
+ -> { 'Expected any character but got end of input' },
56
57
  advanced,
57
58
  state
58
59
  )
@@ -64,20 +65,20 @@ module Dolos
64
65
  # Example:
65
66
  # char_in('abc').run('b') # => Success.new('b', 1)
66
67
  def char_in(characters_string)
67
- characters_array = characters_string.chars
68
+ characters_set = characters_string.chars
68
69
 
69
70
  Parser.new do |state|
70
71
  state.input.mark_offset
71
72
 
72
73
  char, bytesize = state.input.peek(1)
73
74
 
74
- if char && characters_array.include?(char)
75
+ if char && characters_set.include?(char)
75
76
  Success.new(char, bytesize)
76
77
  else
77
78
  advanced = state.input.offset
78
79
  state.input.rollback
79
80
  Failure.new(
80
- "Expected one of #{characters_array.inspect} but got #{char.inspect}",
81
+ -> { "Expected one of #{characters_set.to_a.inspect} but got #{char.inspect}" },
81
82
  advanced,
82
83
  state
83
84
  )
@@ -90,18 +91,18 @@ module Dolos
90
91
  state.input.mark_offset
91
92
 
92
93
  buffer = String.new
93
- loop do
94
- char, bytesize = state.input.peek(1)
95
- break if char.nil? || !predicate.call(char)
94
+ char, bytesize = state.input.peek(1)
96
95
 
96
+ while char && predicate.call(char)
97
97
  buffer << char
98
98
  state.input.advance(bytesize)
99
+ char, bytesize = state.input.peek(1)
99
100
  end
100
101
 
101
102
  if buffer.empty?
102
103
  advanced = state.input.offset
103
104
  Failure.new(
104
- "Predicate never returned true",
105
+ -> { "Predicate never returned true" },
105
106
  advanced,
106
107
  state
107
108
  )
@@ -111,7 +112,6 @@ module Dolos
111
112
  end
112
113
  end
113
114
 
114
- # Unstable API
115
115
  def recursive(&block)
116
116
  recursive_parser = nil
117
117
 
@@ -120,7 +120,7 @@ module Dolos
120
120
 
121
121
  recursive_parser.call.run_with_state(state).tap do |result|
122
122
  if result.failure?
123
- error_msg = "Error in recursive structure around position #{state.input.offset}: #{result.message}"
123
+ error_msg = -> { "Error in recursive structure around position #{state.input.offset}: #{result.message}" }
124
124
  Failure.new(error_msg, state.input.offset, state)
125
125
  end
126
126
  end
@@ -130,7 +130,5 @@ module Dolos
130
130
  placeholder
131
131
  end
132
132
 
133
-
134
-
135
133
  end
136
134
  end
data/lib/dolos/result.rb CHANGED
@@ -55,12 +55,21 @@ module Dolos
55
55
  end
56
56
 
57
57
  class Failure < Result
58
- attr_reader :message, :error_position, :state
58
+ attr_reader :error_position, :state
59
59
 
60
- def initialize(message, error_position, state)
61
- @message = message
60
+ def initialize(message_proc, error_position, state)
61
+ @message_proc = message_proc
62
62
  @error_position = error_position
63
63
  @state = state
64
+ @message_evaluated = false
65
+ end
66
+
67
+ def message
68
+ unless @message_evaluated
69
+ @message_value = @message_proc.call
70
+ @message_evaluated = true
71
+ end
72
+ @message_value
64
73
  end
65
74
 
66
75
  def inspect
@@ -22,12 +22,7 @@ module Dolos
22
22
 
23
23
  def matches?(utf8_str)
24
24
  read = io.read(utf8_str.bytesize)
25
-
26
- if read.nil?
27
- false
28
- else
29
- read.force_encoding('UTF-8') == utf8_str
30
- end
25
+ !read.nil? && read.force_encoding('UTF-8') == utf8_str
31
26
  end
32
27
 
33
28
  def advance(bytesize)
@@ -61,8 +56,8 @@ module Dolos
61
56
  remaining_data = io.read
62
57
  io.seek(current_position)
63
58
 
64
- if (match_data = remaining_data.match(/\A#{pattern}/))
65
- matched_string = match_data[0]
59
+ if remaining_data =~ /\A#{pattern}/
60
+ matched_string = $&
66
61
  io.seek(current_position + matched_string.bytesize)
67
62
  return matched_string
68
63
  end
data/lib/dolos/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Dolos
4
- VERSION = "0.2.0"
4
+ VERSION = "0.2.1"
5
5
  end
data/lib/dolos.rb CHANGED
@@ -10,9 +10,7 @@ module Dolos
10
10
  include Parsers
11
11
 
12
12
  class Parser
13
-
14
13
  attr_accessor :parser_proc
15
-
16
14
  def initialize(&block)
17
15
  @parser_proc = block
18
16
  end
@@ -22,33 +20,23 @@ module Dolos
22
20
  end
23
21
 
24
22
  def run_with_state(state)
25
- result = parser_proc.call(state)
26
- if result.success?
27
- state.last_success_position = state.input.offset
28
- end
23
+ result = @parser_proc.call(state)
24
+ state.last_success_position = state.input.offset if result.success?
29
25
  result
30
26
  end
31
27
 
32
28
  def capture!(wrap_in = nil)
33
29
  Parser.new do |state|
34
30
  result = run_with_state(state)
35
- if result.success?
36
- result.capture!(wrap_in)
37
- else
38
- result
39
- end
31
+ result.success? ? result.capture!(wrap_in) : result
40
32
  end
41
33
  end
42
34
 
43
- # Will call block on captures
35
+ # Will call `map` on captures
44
36
  def map_captures(&block)
45
37
  Parser.new do |state|
46
38
  result = run_with_state(state)
47
- if result.success?
48
- Success.new(result.value, result.length, block.call(result.captures))
49
- else
50
- result
51
- end
39
+ result.success? ? Success.new(result.value, result.length, block.call(result.captures)) : result
52
40
  end
53
41
  end
54
42
 
@@ -56,22 +44,18 @@ module Dolos
56
44
  def map(&block)
57
45
  Parser.new do |state|
58
46
  result = run_with_state(state)
59
- if result.success?
60
- Success.new(block.call(result.value), result.length, result.captures)
61
- else
62
- result
63
- end
47
+ result.success? ? Success.new(block.call(result.value), result.length, result.captures) : result
64
48
  end
65
49
  end
66
50
 
67
51
  def combine(&block)
68
52
  Parser.new do |state|
69
53
  result = run_with_state(state)
54
+
70
55
  if result.success?
56
+ state.input.advance(result.length)
71
57
  new_parser = block.call(result.value, result.captures)
72
- new_state = state.dup
73
- new_state.input.advance(result.length)
74
- new_parser.run_with_state(new_state)
58
+ new_parser.run_with_state(state)
75
59
  else
76
60
  result
77
61
  end
@@ -140,10 +124,9 @@ module Dolos
140
124
  values = []
141
125
  captures = []
142
126
  count = 0
143
- state.input.mark_offset
144
127
 
145
128
  loop do
146
- result = run_with_state(state.dup)
129
+ result = run_with_state(state) # Removing .dup for performance. Be cautious of side effects.
147
130
 
148
131
  if result.failure? || count >= n_max
149
132
  break
@@ -155,7 +138,7 @@ module Dolos
155
138
  count += 1
156
139
 
157
140
  if separator && count < n_max
158
- sep_result = separator.run_with_state(state.dup)
141
+ sep_result = separator.run_with_state(state) # Removing .dup for performance. Be cautious of side effects.
159
142
  break if sep_result.failure?
160
143
 
161
144
  state.input.advance(sep_result.length)
@@ -163,10 +146,9 @@ module Dolos
163
146
  end
164
147
 
165
148
  if count < n_min
166
- error_pos = state.input.offset
167
149
  Failure.new(
168
- "Expected parser to match at least #{n_min} times but matched only #{count} times",
169
- error_pos,
150
+ -> { "Expected parser to match at least #{n_min} times but matched only #{count} times" },
151
+ state.input.offset,
170
152
  state
171
153
  )
172
154
  else
@@ -174,7 +156,6 @@ module Dolos
174
156
  end
175
157
  end
176
158
  end
177
-
178
159
  def zero_or_more
179
160
  repeat(n_min: 0, n_max: Float::INFINITY)
180
161
  end
@@ -201,7 +182,6 @@ module Dolos
201
182
  end
202
183
  alias_method :opt, :optional
203
184
 
204
- # Unstable API
205
185
  # Used to declare lazy parser to avoid infinite loops in recursive parsers
206
186
  def lazy
207
187
  parser_memo = nil
@@ -212,11 +192,5 @@ module Dolos
212
192
  end
213
193
  end
214
194
 
215
- private
216
-
217
- def combine_and_discard_empty(*arrays)
218
- arrays.compact.reject { |arr| arr.is_a?(Array) && arr.empty? }
219
- end
220
-
221
195
  end
222
196
  end
@@ -6,6 +6,10 @@ module Dolos
6
6
  regex(/\s/)
7
7
  end
8
8
 
9
+ def ws_rep0
10
+ regex(/\s*/)
11
+ end
12
+
9
13
  def eol
10
14
  regex(/\n|\r\n|\r/)
11
15
  end
@@ -1,5 +1,16 @@
1
1
  module Dolos
2
2
  module CommonParsers
3
+ def digit: -> Parser[String]
4
+ def digits: -> Parser[String]
5
+
6
+ def int: -> Parser[Integer]
7
+
8
+ def eol: -> Parser[String]
9
+
3
10
  def ws: -> Parser[String]
11
+ def ws_rep0: -> Parser[String]
12
+
13
+ def alpha: -> Parser[String]
14
+ def alphanum: -> Parser[String]
4
15
  end
5
16
  end
data/sig/dolos/parser.rbs CHANGED
@@ -4,16 +4,20 @@ module Dolos
4
4
  def initialize: (^(ParserState) -> Result[A]) -> Parser[A]
5
5
  def capture!: -> Parser[A]
6
6
  def choice: [B](Parser[B])-> Parser[A | B]
7
+ def combine: [B](^(A, B) -> Parser[B]) -> Parser[B]
7
8
  def flat_map: [B](Parser[A], ^(A) -> Parser[B]) -> Parser[B]
8
9
  def flatten: -> Parser[A]
9
10
  def map: [B](^(A) -> B) -> Parser[B]
10
- def map_value: [B](^(A) -> B) -> Parser[B]
11
+ def map_captures: [B](^(A) -> B) -> Parser[B]
11
12
  def optional: -> Parser[A?]
12
13
  def product: [B](Parser[A]) -> Parser[B]
14
+ def product_l: [B](Parser[B]) -> Parser[B]
15
+ def product_r: [B](Parser[B]) -> Parser[A]
13
16
  def run: (String) -> Result[A]
14
17
  def run_with_state: (ParserState) -> Result[A]
15
- def repeat: (Integer, Integer)-> Parser[Array[A]]
18
+ def repeat: [B](Integer, Integer, Parser[B]?)-> Parser[Array[A]]
16
19
  def zero_or_more: -> Parser[Array[A]]
17
20
  def one_or_more: (Integer?) -> Parser[Array[A]]
21
+ def lazy: -> Parser[A]
18
22
  end
19
23
  end
@@ -1,6 +1,6 @@
1
1
  module Dolos
2
2
  class ParserState
3
- attr_reader input: Dolos::StringIOWrapper
3
+ attr_reader input: StringIOWrapper
4
4
  attr_accessor last_success_position: Integer
5
5
 
6
6
  def initialize: (String) -> void
@@ -1,6 +1,10 @@
1
1
  module Dolos
2
2
  module Parsers
3
3
  def any_char: -> Parser[String]
4
+ def char_in: -> Parser[String]
5
+ def char_while : -> Parser[String]
6
+ def recursive: [A,B,C]() { (Parser[A]) -> Parser[B] } -> Parser[C]
7
+
4
8
  def regex: (Regexp) -> Parser[String]
5
9
  def string: (String)-> Parser[String]
6
10
  end
data/sig/dolos/result.rbs CHANGED
@@ -15,6 +15,11 @@ module Dolos
15
15
  end
16
16
 
17
17
  class Failure < Result[bot]
18
+ @message_proc: ^-> String
19
+ @message_evaluated: bool
20
+ @message_value: String
21
+ @state: ParserState
22
+
18
23
  attr_reader committed: bool
19
24
  attr_reader error_position: Integer
20
25
  attr_reader message: String
@@ -25,6 +30,8 @@ module Dolos
25
30
 
26
31
  def map: [B](^(bot) -> B) -> Result[B]
27
32
 
33
+ def pretty_print: -> String
34
+
28
35
  def success?: -> bool
29
36
  end
30
37
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dolos
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - benetis
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-08-19 00:00:00.000000000 Z
11
+ date: 2023-08-22 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Parser combinators library for Ruby. In active development, not stable
14
14
  yet.
@@ -24,7 +24,9 @@ files:
24
24
  - README.md
25
25
  - Rakefile
26
26
  - benchmarks/json/json.rb
27
- - benchmarks/json/nested_json.json
27
+ - benchmarks/json/nested_json_166.json
28
+ - benchmarks/json/nested_json_1m.json
29
+ - benchmarks/letter.rb
28
30
  - docs/dolos_stable_diff.png
29
31
  - examples/letter.rb
30
32
  - lib/dolos.rb