dolos 0.1.3 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +47 -31
- data/benchmarks/json/json.rb +83 -0
- data/benchmarks/json/nested_json_166.json +2541 -0
- data/benchmarks/json/nested_json_1m.json +20557 -0
- data/benchmarks/letter.rb +78 -0
- data/{lib/example.rb → examples/letter.rb} +15 -15
- data/lib/dolos/parsers.rb +30 -11
- data/lib/dolos/result.rb +36 -12
- data/lib/dolos/string_io_wrapper.rb +3 -8
- data/lib/dolos/version.rb +1 -1
- data/lib/dolos.rb +66 -42
- data/lib/dolos_common_parsers/common_parsers.rb +9 -2
- data/sig/dolos/common_parsers.rbs +11 -0
- data/sig/dolos/parser.rbs +6 -2
- data/sig/dolos/parser_state.rbs +1 -1
- data/sig/dolos/parsers.rbs +4 -0
- data/sig/dolos/result.rbs +7 -0
- metadata +7 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: cfeffbbe4108e43ba81ccd210f2689910d310fc3947fe05ed22a9972e581e96a
|
4
|
+
data.tar.gz: e181ccca4725008464fc92670b8120cf55262ba469d6ec494270081eb902c241
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 27fe73f34d41692d31c00d30bc03869cdefcef4ee785acbb72da12c9bceb85b78357da7c3d6d1435598b40fb635fb5e71d56254c18597c8d7d424748afc7c098
|
7
|
+
data.tar.gz: 9dd78f5e313830742505bbeb1488ed643a700facc2193ed3e996eef795913a50bb660a2af2dcd7c9a73e6e5f84befa7e461b2cf10d4bd11f22d68901ecc59e9b
|
data/README.md
CHANGED
@@ -23,30 +23,34 @@ require 'dolos'
|
|
23
23
|
include Dolos
|
24
24
|
|
25
25
|
ws = c(" ")
|
26
|
-
parser = c("Parsers")
|
26
|
+
parser = c("Parsers") & ws & c("are") & ws & c("great!")
|
27
27
|
parser.run("Parsers are great!") # <Result::Success>
|
28
28
|
|
29
29
|
greeter = c("Hello")
|
30
|
-
greet_and_speak = greeter
|
30
|
+
greet_and_speak = greeter & c(", ") & parser
|
31
31
|
greet_and_speak.run("Hello, Parsers are great!") # <Result::Success>
|
32
32
|
```
|
33
33
|
|
34
34
|
### Letter address parser example
|
35
|
+
|
35
36
|
```ruby
|
36
37
|
require 'dolos'
|
37
38
|
require 'dolos_common_parsers/common_parsers'
|
38
39
|
|
40
|
+
include Dolos
|
41
|
+
# frozen_string_literal: true
|
42
|
+
require_relative 'dolos'
|
43
|
+
require_relative 'dolos_common_parsers/common_parsers'
|
44
|
+
|
39
45
|
include Dolos
|
40
46
|
|
41
47
|
# Include common parsers
|
42
|
-
# In future this can be more structured,
|
43
|
-
# moved them to separate module to prevent breaking changes
|
48
|
+
# In future this can be more structured, moved them to separate module to prevent breaking changes
|
44
49
|
include Dolos::CommonParsers
|
45
50
|
|
46
51
|
# Library usage example
|
47
52
|
# Parse out a name and address from a letter
|
48
|
-
# For higher difficulty, we will not split this into multiple lines,
|
49
|
-
# but instead parse it all at once
|
53
|
+
# For higher difficulty, we will not split this into multiple lines, but instead parse it all at once
|
50
54
|
letter = <<-LETTER
|
51
55
|
Mr. Vardeniui Pavardeniui
|
52
56
|
AB „Lietuvos Paštas“
|
@@ -63,12 +67,12 @@ alpha_with_lt = char_in("ąčęėįšųūžĄČĘĖĮŠŲŪŽ") | alpha
|
|
63
67
|
|
64
68
|
# Capture all letters in a row and join them,
|
65
69
|
# because they are captured as elements of array by each alpha_with_lt parser.
|
66
|
-
first_name = alpha_with_lt.rep.
|
67
|
-
last_name = alpha_with_lt.rep.
|
70
|
+
first_name = alpha_with_lt.rep.map(&:join).capture!
|
71
|
+
last_name = alpha_with_lt.rep.map(&:join).capture!
|
68
72
|
|
69
73
|
# Combine first line parsers
|
70
74
|
# Consume zero or more whitespace, after that honorific must follow and so on
|
71
|
-
name_line = ws.rep0
|
75
|
+
name_line = ws.rep0 & honorific & first_name & ws & last_name & eol
|
72
76
|
|
73
77
|
# Next line is company info
|
74
78
|
# We could choose to accept UAB and AB or just AB and etc.
|
@@ -78,9 +82,9 @@ quote_open = c("„")
|
|
78
82
|
quote_close = c("“")
|
79
83
|
|
80
84
|
# Consume LT alphabet with whitespace
|
81
|
-
company_name = (alpha_with_lt | ws).rep.
|
82
|
-
company_info = company_type
|
83
|
-
second_line = ws.rep0
|
85
|
+
company_name = (alpha_with_lt | ws).rep.map(&:join).capture!
|
86
|
+
company_info = company_type & ws.rep0 & quote_open & company_name & quote_close
|
87
|
+
second_line = ws.rep0 & company_info & eol
|
84
88
|
|
85
89
|
# Address line
|
86
90
|
# 'char_while' will consume characters while passed predicate is true
|
@@ -88,35 +92,47 @@ second_line = ws.rep0 >> company_info >> eol
|
|
88
92
|
# After that result is captured and mapped to hash
|
89
93
|
# Mapping to hash so at the end its easy to tell tuples apart
|
90
94
|
# Also while mapping, doing some cleaning with '.strip'
|
91
|
-
street_name = char_while(->(char) { !char.match(/\d/) })
|
92
|
-
|
93
|
-
|
94
|
-
.map { |s| { street: s.strip } }
|
95
|
-
building = digits.capture!.map(&:first).map { |s| { building: s.strip } }
|
96
|
-
address_line = ws.rep0 >> street_name >> building >> eol
|
95
|
+
street_name = char_while(->(char) { !char.match(/\d/) }).map { |s| { street: s.strip } }.capture!
|
96
|
+
building = digits.map { |s| { building: s.strip } }.capture!
|
97
|
+
address_line = ws.rep0 & street_name & building & eol
|
97
98
|
|
98
99
|
# City line
|
99
|
-
# All digits can be matched here or 'digits.rep(5)' could be used.
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
# Also, all previous parsers can be ran separately.
|
107
|
-
letter_parser = name_line >> second_line >> address_line >> city_line
|
100
|
+
# All digits can be matched here or 'digits.rep(5)' could be used. Also joining with map.
|
101
|
+
postcode = digits.map { |s| { postcode: s.strip } }.capture!
|
102
|
+
city = alpha_with_lt.rep.map(&:join).map { |s| { city: s.strip } }.capture!
|
103
|
+
city_line = ws.rep0 & postcode & ws & city & eol
|
104
|
+
|
105
|
+
# Full letter parser which is combined from all previous parsers. All previous parsers can be ran separately.
|
106
|
+
letter_parser = name_line & second_line & address_line & city_line
|
108
107
|
result = letter_parser.run(letter)
|
109
108
|
|
110
|
-
# List of tuples
|
111
109
|
pp result.captures
|
112
|
-
# ["Vardeniui", "Pavardeniui", "Lietuvos Paštas", {:street=>"Totorių g."},
|
113
|
-
# {:building=>"8"}, {:postcode=>"01121"}, {:city=>"Vilnius"}]
|
114
110
|
|
115
111
|
```
|
116
112
|
### Roadmap
|
117
113
|
- Better error handling
|
118
|
-
- Recursive parser
|
119
114
|
- Benchmarks & parser tests
|
115
|
+
- Documentation
|
116
|
+
- Performance
|
117
|
+
|
118
|
+
### Benchmarks
|
119
|
+
`bundle exec ruby benchmarks/json/json.rb`
|
120
|
+
```
|
121
|
+
Dolos
|
122
|
+
nested json benchmark 8.426 (± 0.0%) i/s - 43.000 in 5.103600s
|
123
|
+
letter benchmark 3.145k (± 0.7%) i/s - 15.810k in 5.027961s
|
124
|
+
|
125
|
+
# Note: 23 times slower than Pure Ruby specialized json parser (below) if used to parse json
|
126
|
+
nested json 166KB bench 8.189 (± 0.0%) i/s - 41.000 in 5.007158s
|
127
|
+
nested json 1MB bench 0.959 (± 0.0%) i/s - 5.000 in 5.230650s
|
128
|
+
|
129
|
+
-----------------------------------------------------------
|
130
|
+
Pure ruby (flori/json)
|
131
|
+
nested json 1MB bench 24.213 (± 4.1%) i/s - 122.000 in 5.042309s
|
132
|
+
nested json 166KB bench 188.070 (± 1.6%) i/s - 954.000 in 5.073788s
|
133
|
+
Ruby native (C)
|
134
|
+
nested json 1MB bench 309.519 (± 0.3%) i/s - 1.560k in 5.040164s
|
135
|
+
```
|
120
136
|
|
121
137
|
### Contributing
|
122
138
|
Contributors are welcome. Note: since library is not yet stable, I recommend getting in touch with me before starting to work on something.
|
@@ -0,0 +1,83 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'benchmark/ips'
|
4
|
+
require 'bundler/setup'
|
5
|
+
require 'dolos'
|
6
|
+
require 'dolos_common_parsers/common_parsers'
|
7
|
+
|
8
|
+
include Dolos
|
9
|
+
include Dolos::CommonParsers
|
10
|
+
def comma = c(",")
|
11
|
+
|
12
|
+
def string_literal = (c("\"") >> char_while(->(ch) { ch != "\"" }).opt << c("\""))
|
13
|
+
|
14
|
+
def boolean = (c("true").map { true } | c("false").map { false })
|
15
|
+
|
16
|
+
def null = c("null").map { nil }
|
17
|
+
|
18
|
+
def array = recursive do |arr|
|
19
|
+
c("[") >> ws_rep0 >> value.repeat(n_min: 0, separator: (comma << ws_rep0)) << ws_rep0 << c("]")
|
20
|
+
end
|
21
|
+
|
22
|
+
def negative_sign = c("-").opt
|
23
|
+
|
24
|
+
def decimal_point = c('.').opt
|
25
|
+
|
26
|
+
def number = (negative_sign & digits & decimal_point & digits.opt).map do |tuple|
|
27
|
+
tuple.join.to_f
|
28
|
+
end
|
29
|
+
|
30
|
+
def value = number | object | string_literal | boolean | null | array
|
31
|
+
|
32
|
+
def key_line = ((string_literal << ws_rep0) << c(":") & ws_rep0 >> value).map do |tuple|
|
33
|
+
{ tuple[0] => tuple[1] }
|
34
|
+
end
|
35
|
+
|
36
|
+
def key_lines = (key_line << ws_rep0).repeat(n_min: 1, separator: (comma << ws_rep0 << eol.opt)).map do |arr|
|
37
|
+
arr.reduce({}) do |acc, hash|
|
38
|
+
acc.merge(hash)
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
def object = recursive do |obj|
|
43
|
+
c("{") >> ws_rep0 >> key_lines.opt << ws_rep0 << c("}")
|
44
|
+
end
|
45
|
+
|
46
|
+
def json_parser = ws_rep0 >> value
|
47
|
+
|
48
|
+
require 'json/pure'
|
49
|
+
|
50
|
+
|
51
|
+
json_from_file = File.read('benchmarks/json/nested_json_166.json')
|
52
|
+
|
53
|
+
result = json_parser.run(json_from_file)
|
54
|
+
puts result.success?
|
55
|
+
|
56
|
+
Benchmark.ips do |x|
|
57
|
+
x.report('nested json 166kb benchmark') do
|
58
|
+
json_parser.run(json_from_file)
|
59
|
+
end
|
60
|
+
x.report('Pure ruby json: nested json 166kb benchmark') do
|
61
|
+
JSON.parse(json_from_file)
|
62
|
+
end
|
63
|
+
x.compare!
|
64
|
+
end
|
65
|
+
|
66
|
+
json_from_file1m = File.read('benchmarks/json/nested_json_1m.json')
|
67
|
+
result1m = json_parser.run(json_from_file1m)
|
68
|
+
puts result1m.success?
|
69
|
+
|
70
|
+
# require 'json'
|
71
|
+
|
72
|
+
Benchmark.ips do |x|
|
73
|
+
# x.report('nested json 1mb benchmark') do
|
74
|
+
# json_parser.run(json_from_file1m)
|
75
|
+
# end
|
76
|
+
# x.report('Ruby native: nested json 1mb benchmark') do
|
77
|
+
# JSON.parse(json_from_file1m)
|
78
|
+
# end
|
79
|
+
# x.report('Pure ruby json: nested json 1mb benchmark') do
|
80
|
+
# JSON.parse(json_from_file1m)
|
81
|
+
# end
|
82
|
+
x.compare!
|
83
|
+
end
|