dolos 0.1.3 → 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c8a83a078e5fa82aa8f6d28f3315597937b84c604d59e18f177a950594860e93
4
- data.tar.gz: bccf5fc9390189e43ef9c785baeaf93b85e00d1ce480c63057cf0cc26c468050
3
+ metadata.gz: cfeffbbe4108e43ba81ccd210f2689910d310fc3947fe05ed22a9972e581e96a
4
+ data.tar.gz: e181ccca4725008464fc92670b8120cf55262ba469d6ec494270081eb902c241
5
5
  SHA512:
6
- metadata.gz: 75fcc5acf6a6dfbd4eeaf7493f9f82a3d31b5d6b61c64f2d9ee3f6f7fe8856fa1392866cbde95ba12bdb06155f07ed7c9cfe46c24f4353c111bf3782f6033289
7
- data.tar.gz: '0608ddcb140a18c1d156ae8729f4c998b1abe4b257be8e02dddff796a3e7d861892ddfebcc6f54b744e9b03d381ed029ad81954b2ff484cc2621d5b7f804d3cb'
6
+ metadata.gz: 27fe73f34d41692d31c00d30bc03869cdefcef4ee785acbb72da12c9bceb85b78357da7c3d6d1435598b40fb635fb5e71d56254c18597c8d7d424748afc7c098
7
+ data.tar.gz: 9dd78f5e313830742505bbeb1488ed643a700facc2193ed3e996eef795913a50bb660a2af2dcd7c9a73e6e5f84befa7e461b2cf10d4bd11f22d68901ecc59e9b
data/README.md CHANGED
@@ -23,30 +23,34 @@ require 'dolos'
23
23
  include Dolos
24
24
 
25
25
  ws = c(" ")
26
- parser = c("Parsers") >> ws >> c("are") >> ws >> c("great!")
26
+ parser = c("Parsers") & ws & c("are") & ws & c("great!")
27
27
  parser.run("Parsers are great!") # <Result::Success>
28
28
 
29
29
  greeter = c("Hello")
30
- greet_and_speak = greeter >> c(", ") >> parser
30
+ greet_and_speak = greeter & c(", ") & parser
31
31
  greet_and_speak.run("Hello, Parsers are great!") # <Result::Success>
32
32
  ```
33
33
 
34
34
  ### Letter address parser example
35
+
35
36
  ```ruby
36
37
  require 'dolos'
37
38
  require 'dolos_common_parsers/common_parsers'
38
39
 
40
+ include Dolos
41
+ # frozen_string_literal: true
42
+ require_relative 'dolos'
43
+ require_relative 'dolos_common_parsers/common_parsers'
44
+
39
45
  include Dolos
40
46
 
41
47
  # Include common parsers
42
- # In future this can be more structured,
43
- # moved them to separate module to prevent breaking changes
48
+ # In future this can be more structured, moved them to separate module to prevent breaking changes
44
49
  include Dolos::CommonParsers
45
50
 
46
51
  # Library usage example
47
52
  # Parse out a name and address from a letter
48
- # For higher difficulty, we will not split this into multiple lines,
49
- # but instead parse it all at once
53
+ # For higher difficulty, we will not split this into multiple lines, but instead parse it all at once
50
54
  letter = <<-LETTER
51
55
  Mr. Vardeniui Pavardeniui
52
56
  AB „Lietuvos Paštas“
@@ -63,12 +67,12 @@ alpha_with_lt = char_in("ąčęėįšųūžĄČĘĖĮŠŲŪŽ") | alpha
63
67
 
64
68
  # Capture all letters in a row and join them,
65
69
  # because they are captured as elements of array by each alpha_with_lt parser.
66
- first_name = alpha_with_lt.rep.capture!.map(&:join)
67
- last_name = alpha_with_lt.rep.capture!.map(&:join)
70
+ first_name = alpha_with_lt.rep.map(&:join).capture!
71
+ last_name = alpha_with_lt.rep.map(&:join).capture!
68
72
 
69
73
  # Combine first line parsers
70
74
  # Consume zero or more whitespace, after that honorific must follow and so on
71
- name_line = ws.rep0 >> honorific >> first_name >> ws >> last_name >> eol
75
+ name_line = ws.rep0 & honorific & first_name & ws & last_name & eol
72
76
 
73
77
  # Next line is company info
74
78
  # We could choose to accept UAB and AB or just AB and etc.
@@ -78,9 +82,9 @@ quote_open = c("„")
78
82
  quote_close = c("“")
79
83
 
80
84
  # Consume LT alphabet with whitespace
81
- company_name = (alpha_with_lt | ws).rep.capture!.map(&:join)
82
- company_info = company_type >> ws.rep0 >> quote_open >> company_name >> quote_close
83
- second_line = ws.rep0 >> company_info >> eol
85
+ company_name = (alpha_with_lt | ws).rep.map(&:join).capture!
86
+ company_info = company_type & ws.rep0 & quote_open & company_name & quote_close
87
+ second_line = ws.rep0 & company_info & eol
84
88
 
85
89
  # Address line
86
90
  # 'char_while' will consume characters while passed predicate is true
@@ -88,35 +92,47 @@ second_line = ws.rep0 >> company_info >> eol
88
92
  # After that result is captured and mapped to hash
89
93
  # Mapping to hash so at the end its easy to tell tuples apart
90
94
  # Also while mapping, doing some cleaning with '.strip'
91
- street_name = char_while(->(char) { !char.match(/\d/) })
92
- .capture!
93
- .map(&:first)
94
- .map { |s| { street: s.strip } }
95
- building = digits.capture!.map(&:first).map { |s| { building: s.strip } }
96
- address_line = ws.rep0 >> street_name >> building >> eol
95
+ street_name = char_while(->(char) { !char.match(/\d/) }).map { |s| { street: s.strip } }.capture!
96
+ building = digits.map { |s| { building: s.strip } }.capture!
97
+ address_line = ws.rep0 & street_name & building & eol
97
98
 
98
99
  # City line
99
- # All digits can be matched here or 'digits.rep(5)' could be used.
100
- # Also joining with map results.
101
- postcode = digits.capture!.map(&:join).map { |s| { postcode: s.strip } }
102
- city = alpha_with_lt.rep.capture!.map(&:join).map { |s| { city: s.strip } }
103
- city_line = ws.rep0 >> postcode >> ws >> city >> eol
104
-
105
- # Full letter parser which is combined from all previous parsers.
106
- # Also, all previous parsers can be ran separately.
107
- letter_parser = name_line >> second_line >> address_line >> city_line
100
+ # All digits can be matched here or 'digits.rep(5)' could be used. Also joining with map.
101
+ postcode = digits.map { |s| { postcode: s.strip } }.capture!
102
+ city = alpha_with_lt.rep.map(&:join).map { |s| { city: s.strip } }.capture!
103
+ city_line = ws.rep0 & postcode & ws & city & eol
104
+
105
+ # Full letter parser which is combined from all previous parsers. All previous parsers can be ran separately.
106
+ letter_parser = name_line & second_line & address_line & city_line
108
107
  result = letter_parser.run(letter)
109
108
 
110
- # List of tuples
111
109
  pp result.captures
112
- # ["Vardeniui", "Pavardeniui", "Lietuvos Paštas", {:street=>"Totorių g."},
113
- # {:building=>"8"}, {:postcode=>"01121"}, {:city=>"Vilnius"}]
114
110
 
115
111
  ```
116
112
  ### Roadmap
117
113
  - Better error handling
118
- - Recursive parser
119
114
  - Benchmarks & parser tests
115
+ - Documentation
116
+ - Performance
117
+
118
+ ### Benchmarks
119
+ `bundle exec ruby benchmarks/json/json.rb`
120
+ ```
121
+ Dolos
122
+ nested json benchmark 8.426 (± 0.0%) i/s - 43.000 in 5.103600s
123
+ letter benchmark 3.145k (± 0.7%) i/s - 15.810k in 5.027961s
124
+
125
+ # Note: 23 times slower than Pure Ruby specialized json parser (below) if used to parse json
126
+ nested json 166KB bench 8.189 (± 0.0%) i/s - 41.000 in 5.007158s
127
+ nested json 1MB bench 0.959 (± 0.0%) i/s - 5.000 in 5.230650s
128
+
129
+ -----------------------------------------------------------
130
+ Pure ruby (flori/json)
131
+ nested json 1MB bench 24.213 (± 4.1%) i/s - 122.000 in 5.042309s
132
+ nested json 166KB bench 188.070 (± 1.6%) i/s - 954.000 in 5.073788s
133
+ Ruby native (C)
134
+ nested json 1MB bench 309.519 (± 0.3%) i/s - 1.560k in 5.040164s
135
+ ```
120
136
 
121
137
  ### Contributing
122
138
  Contributors are welcome. Note: since library is not yet stable, I recommend getting in touch with me before starting to work on something.
@@ -0,0 +1,83 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'benchmark/ips'
4
+ require 'bundler/setup'
5
+ require 'dolos'
6
+ require 'dolos_common_parsers/common_parsers'
7
+
8
+ include Dolos
9
+ include Dolos::CommonParsers
10
+ def comma = c(",")
11
+
12
+ def string_literal = (c("\"") >> char_while(->(ch) { ch != "\"" }).opt << c("\""))
13
+
14
+ def boolean = (c("true").map { true } | c("false").map { false })
15
+
16
+ def null = c("null").map { nil }
17
+
18
+ def array = recursive do |arr|
19
+ c("[") >> ws_rep0 >> value.repeat(n_min: 0, separator: (comma << ws_rep0)) << ws_rep0 << c("]")
20
+ end
21
+
22
+ def negative_sign = c("-").opt
23
+
24
+ def decimal_point = c('.').opt
25
+
26
+ def number = (negative_sign & digits & decimal_point & digits.opt).map do |tuple|
27
+ tuple.join.to_f
28
+ end
29
+
30
+ def value = number | object | string_literal | boolean | null | array
31
+
32
+ def key_line = ((string_literal << ws_rep0) << c(":") & ws_rep0 >> value).map do |tuple|
33
+ { tuple[0] => tuple[1] }
34
+ end
35
+
36
+ def key_lines = (key_line << ws_rep0).repeat(n_min: 1, separator: (comma << ws_rep0 << eol.opt)).map do |arr|
37
+ arr.reduce({}) do |acc, hash|
38
+ acc.merge(hash)
39
+ end
40
+ end
41
+
42
+ def object = recursive do |obj|
43
+ c("{") >> ws_rep0 >> key_lines.opt << ws_rep0 << c("}")
44
+ end
45
+
46
+ def json_parser = ws_rep0 >> value
47
+
48
+ require 'json/pure'
49
+
50
+
51
+ json_from_file = File.read('benchmarks/json/nested_json_166.json')
52
+
53
+ result = json_parser.run(json_from_file)
54
+ puts result.success?
55
+
56
+ Benchmark.ips do |x|
57
+ x.report('nested json 166kb benchmark') do
58
+ json_parser.run(json_from_file)
59
+ end
60
+ x.report('Pure ruby json: nested json 166kb benchmark') do
61
+ JSON.parse(json_from_file)
62
+ end
63
+ x.compare!
64
+ end
65
+
66
+ json_from_file1m = File.read('benchmarks/json/nested_json_1m.json')
67
+ result1m = json_parser.run(json_from_file1m)
68
+ puts result1m.success?
69
+
70
+ # require 'json'
71
+
72
+ Benchmark.ips do |x|
73
+ # x.report('nested json 1mb benchmark') do
74
+ # json_parser.run(json_from_file1m)
75
+ # end
76
+ # x.report('Ruby native: nested json 1mb benchmark') do
77
+ # JSON.parse(json_from_file1m)
78
+ # end
79
+ # x.report('Pure ruby json: nested json 1mb benchmark') do
80
+ # JSON.parse(json_from_file1m)
81
+ # end
82
+ x.compare!
83
+ end