dolos 0.1.3 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c8a83a078e5fa82aa8f6d28f3315597937b84c604d59e18f177a950594860e93
4
- data.tar.gz: bccf5fc9390189e43ef9c785baeaf93b85e00d1ce480c63057cf0cc26c468050
3
+ metadata.gz: cfeffbbe4108e43ba81ccd210f2689910d310fc3947fe05ed22a9972e581e96a
4
+ data.tar.gz: e181ccca4725008464fc92670b8120cf55262ba469d6ec494270081eb902c241
5
5
  SHA512:
6
- metadata.gz: 75fcc5acf6a6dfbd4eeaf7493f9f82a3d31b5d6b61c64f2d9ee3f6f7fe8856fa1392866cbde95ba12bdb06155f07ed7c9cfe46c24f4353c111bf3782f6033289
7
- data.tar.gz: '0608ddcb140a18c1d156ae8729f4c998b1abe4b257be8e02dddff796a3e7d861892ddfebcc6f54b744e9b03d381ed029ad81954b2ff484cc2621d5b7f804d3cb'
6
+ metadata.gz: 27fe73f34d41692d31c00d30bc03869cdefcef4ee785acbb72da12c9bceb85b78357da7c3d6d1435598b40fb635fb5e71d56254c18597c8d7d424748afc7c098
7
+ data.tar.gz: 9dd78f5e313830742505bbeb1488ed643a700facc2193ed3e996eef795913a50bb660a2af2dcd7c9a73e6e5f84befa7e461b2cf10d4bd11f22d68901ecc59e9b
data/README.md CHANGED
@@ -23,30 +23,34 @@ require 'dolos'
23
23
  include Dolos
24
24
 
25
25
  ws = c(" ")
26
- parser = c("Parsers") >> ws >> c("are") >> ws >> c("great!")
26
+ parser = c("Parsers") & ws & c("are") & ws & c("great!")
27
27
  parser.run("Parsers are great!") # <Result::Success>
28
28
 
29
29
  greeter = c("Hello")
30
- greet_and_speak = greeter >> c(", ") >> parser
30
+ greet_and_speak = greeter & c(", ") & parser
31
31
  greet_and_speak.run("Hello, Parsers are great!") # <Result::Success>
32
32
  ```
33
33
 
34
34
  ### Letter address parser example
35
+
35
36
  ```ruby
36
37
  require 'dolos'
37
38
  require 'dolos_common_parsers/common_parsers'
38
39
 
40
+ include Dolos
41
+ # frozen_string_literal: true
42
+ require_relative 'dolos'
43
+ require_relative 'dolos_common_parsers/common_parsers'
44
+
39
45
  include Dolos
40
46
 
41
47
  # Include common parsers
42
- # In future this can be more structured,
43
- # moved them to separate module to prevent breaking changes
48
+ # In future this can be more structured, moved them to separate module to prevent breaking changes
44
49
  include Dolos::CommonParsers
45
50
 
46
51
  # Library usage example
47
52
  # Parse out a name and address from a letter
48
- # For higher difficulty, we will not split this into multiple lines,
49
- # but instead parse it all at once
53
+ # For higher difficulty, we will not split this into multiple lines, but instead parse it all at once
50
54
  letter = <<-LETTER
51
55
  Mr. Vardeniui Pavardeniui
52
56
  AB „Lietuvos Paštas“
@@ -63,12 +67,12 @@ alpha_with_lt = char_in("ąčęėįšųūžĄČĘĖĮŠŲŪŽ") | alpha
63
67
 
64
68
  # Capture all letters in a row and join them,
65
69
  # because they are captured as elements of array by each alpha_with_lt parser.
66
- first_name = alpha_with_lt.rep.capture!.map(&:join)
67
- last_name = alpha_with_lt.rep.capture!.map(&:join)
70
+ first_name = alpha_with_lt.rep.map(&:join).capture!
71
+ last_name = alpha_with_lt.rep.map(&:join).capture!
68
72
 
69
73
  # Combine first line parsers
70
74
  # Consume zero or more whitespace, after that honorific must follow and so on
71
- name_line = ws.rep0 >> honorific >> first_name >> ws >> last_name >> eol
75
+ name_line = ws.rep0 & honorific & first_name & ws & last_name & eol
72
76
 
73
77
  # Next line is company info
74
78
  # We could choose to accept UAB and AB or just AB and etc.
@@ -78,9 +82,9 @@ quote_open = c("„")
78
82
  quote_close = c("“")
79
83
 
80
84
  # Consume LT alphabet with whitespace
81
- company_name = (alpha_with_lt | ws).rep.capture!.map(&:join)
82
- company_info = company_type >> ws.rep0 >> quote_open >> company_name >> quote_close
83
- second_line = ws.rep0 >> company_info >> eol
85
+ company_name = (alpha_with_lt | ws).rep.map(&:join).capture!
86
+ company_info = company_type & ws.rep0 & quote_open & company_name & quote_close
87
+ second_line = ws.rep0 & company_info & eol
84
88
 
85
89
  # Address line
86
90
  # 'char_while' will consume characters while passed predicate is true
@@ -88,35 +92,47 @@ second_line = ws.rep0 >> company_info >> eol
88
92
  # After that result is captured and mapped to hash
89
93
  # Mapping to hash so at the end its easy to tell tuples apart
90
94
  # Also while mapping, doing some cleaning with '.strip'
91
- street_name = char_while(->(char) { !char.match(/\d/) })
92
- .capture!
93
- .map(&:first)
94
- .map { |s| { street: s.strip } }
95
- building = digits.capture!.map(&:first).map { |s| { building: s.strip } }
96
- address_line = ws.rep0 >> street_name >> building >> eol
95
+ street_name = char_while(->(char) { !char.match(/\d/) }).map { |s| { street: s.strip } }.capture!
96
+ building = digits.map { |s| { building: s.strip } }.capture!
97
+ address_line = ws.rep0 & street_name & building & eol
97
98
 
98
99
  # City line
99
- # All digits can be matched here or 'digits.rep(5)' could be used.
100
- # Also joining with map results.
101
- postcode = digits.capture!.map(&:join).map { |s| { postcode: s.strip } }
102
- city = alpha_with_lt.rep.capture!.map(&:join).map { |s| { city: s.strip } }
103
- city_line = ws.rep0 >> postcode >> ws >> city >> eol
104
-
105
- # Full letter parser which is combined from all previous parsers.
106
- # Also, all previous parsers can be ran separately.
107
- letter_parser = name_line >> second_line >> address_line >> city_line
100
+ # All digits can be matched here or 'digits.rep(5)' could be used. Also joining with map.
101
+ postcode = digits.map { |s| { postcode: s.strip } }.capture!
102
+ city = alpha_with_lt.rep.map(&:join).map { |s| { city: s.strip } }.capture!
103
+ city_line = ws.rep0 & postcode & ws & city & eol
104
+
105
+ # Full letter parser which is combined from all previous parsers. All previous parsers can be ran separately.
106
+ letter_parser = name_line & second_line & address_line & city_line
108
107
  result = letter_parser.run(letter)
109
108
 
110
- # List of tuples
111
109
  pp result.captures
112
- # ["Vardeniui", "Pavardeniui", "Lietuvos Paštas", {:street=>"Totorių g."},
113
- # {:building=>"8"}, {:postcode=>"01121"}, {:city=>"Vilnius"}]
114
110
 
115
111
  ```
116
112
  ### Roadmap
117
113
  - Better error handling
118
- - Recursive parser
119
114
  - Benchmarks & parser tests
115
+ - Documentation
116
+ - Performance
117
+
118
+ ### Benchmarks
119
+ `bundle exec ruby benchmarks/json/json.rb`
120
+ ```
121
+ Dolos
122
+ nested json benchmark 8.426 (± 0.0%) i/s - 43.000 in 5.103600s
123
+ letter benchmark 3.145k (± 0.7%) i/s - 15.810k in 5.027961s
124
+
125
+ # Note: 23 times slower than Pure Ruby specialized json parser (below) if used to parse json
126
+ nested json 166KB bench 8.189 (± 0.0%) i/s - 41.000 in 5.007158s
127
+ nested json 1MB bench 0.959 (± 0.0%) i/s - 5.000 in 5.230650s
128
+
129
+ -----------------------------------------------------------
130
+ Pure ruby (flori/json)
131
+ nested json 1MB bench 24.213 (± 4.1%) i/s - 122.000 in 5.042309s
132
+ nested json 166KB bench 188.070 (± 1.6%) i/s - 954.000 in 5.073788s
133
+ Ruby native (C)
134
+ nested json 1MB bench 309.519 (± 0.3%) i/s - 1.560k in 5.040164s
135
+ ```
120
136
 
121
137
  ### Contributing
122
138
  Contributors are welcome. Note: since library is not yet stable, I recommend getting in touch with me before starting to work on something.
@@ -0,0 +1,83 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'benchmark/ips'
4
+ require 'bundler/setup'
5
+ require 'dolos'
6
+ require 'dolos_common_parsers/common_parsers'
7
+
8
+ include Dolos
9
+ include Dolos::CommonParsers
10
+ def comma = c(",")
11
+
12
+ def string_literal = (c("\"") >> char_while(->(ch) { ch != "\"" }).opt << c("\""))
13
+
14
+ def boolean = (c("true").map { true } | c("false").map { false })
15
+
16
+ def null = c("null").map { nil }
17
+
18
+ def array = recursive do |arr|
19
+ c("[") >> ws_rep0 >> value.repeat(n_min: 0, separator: (comma << ws_rep0)) << ws_rep0 << c("]")
20
+ end
21
+
22
+ def negative_sign = c("-").opt
23
+
24
+ def decimal_point = c('.').opt
25
+
26
+ def number = (negative_sign & digits & decimal_point & digits.opt).map do |tuple|
27
+ tuple.join.to_f
28
+ end
29
+
30
+ def value = number | object | string_literal | boolean | null | array
31
+
32
+ def key_line = ((string_literal << ws_rep0) << c(":") & ws_rep0 >> value).map do |tuple|
33
+ { tuple[0] => tuple[1] }
34
+ end
35
+
36
+ def key_lines = (key_line << ws_rep0).repeat(n_min: 1, separator: (comma << ws_rep0 << eol.opt)).map do |arr|
37
+ arr.reduce({}) do |acc, hash|
38
+ acc.merge(hash)
39
+ end
40
+ end
41
+
42
+ def object = recursive do |obj|
43
+ c("{") >> ws_rep0 >> key_lines.opt << ws_rep0 << c("}")
44
+ end
45
+
46
+ def json_parser = ws_rep0 >> value
47
+
48
+ require 'json/pure'
49
+
50
+
51
+ json_from_file = File.read('benchmarks/json/nested_json_166.json')
52
+
53
+ result = json_parser.run(json_from_file)
54
+ puts result.success?
55
+
56
+ Benchmark.ips do |x|
57
+ x.report('nested json 166kb benchmark') do
58
+ json_parser.run(json_from_file)
59
+ end
60
+ x.report('Pure ruby json: nested json 166kb benchmark') do
61
+ JSON.parse(json_from_file)
62
+ end
63
+ x.compare!
64
+ end
65
+
66
+ json_from_file1m = File.read('benchmarks/json/nested_json_1m.json')
67
+ result1m = json_parser.run(json_from_file1m)
68
+ puts result1m.success?
69
+
70
+ # require 'json'
71
+
72
+ Benchmark.ips do |x|
73
+ # x.report('nested json 1mb benchmark') do
74
+ # json_parser.run(json_from_file1m)
75
+ # end
76
+ # x.report('Ruby native: nested json 1mb benchmark') do
77
+ # JSON.parse(json_from_file1m)
78
+ # end
79
+ # x.report('Pure ruby json: nested json 1mb benchmark') do
80
+ # JSON.parse(json_from_file1m)
81
+ # end
82
+ x.compare!
83
+ end