fast_jsonparser 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: cb4380ffb8ced606931028f66c05ddb3af40498a9aada56833be2f5ef0bc47b4
4
- data.tar.gz: 3a3159926d6f1b1b0d90431171b7c97a93df4082bc1371313c4fe31e6c4de0c8
3
+ metadata.gz: 2b9a3639a83aa2f68468df13dcc8922b244c727c00b2e040a7dd94f9b3832698
4
+ data.tar.gz: 49e6c154239a25e2ecc695288166914ee60a1500d7e2773ef25d05e39eddfa4d
5
5
  SHA512:
6
- metadata.gz: ffa4a69c6550db893fd93c1f13df5778004188f1f3b54944b2d20049366026bbf2720fb0d62dce124d279fab650ed4e2f12c763c48207323ec9a900da240b32a
7
- data.tar.gz: fe75932c18f3cf0d896536ffc1d4f8f30067fe85046c7c1bf1806e819674f3f0a48816240cc634ac5dd530eb06e80f7b2a35f63a91e5c4ff1e432e8bf5310df5
6
+ metadata.gz: '08ebbd273a4180261ab78c50874e7a5cf28afa7c1718be05fbc8bec93bd6652097c88ad2b0ac7eba184f4755a209cc0e9256551d04e1350ac1139d4141a50412'
7
+ data.tar.gz: 6400ba1f5f0f0083b4c2a75f947888ab782695c66c676a1bb35011efde40b7ce2b1ab38d0fbbfc76eda50e80cef6194cc8bf495e3f8cb6b1aef0c8855bf2ed76
data/.gitignore CHANGED
@@ -6,3 +6,5 @@
6
6
  /pkg/
7
7
  /spec/reports/
8
8
  /tmp/
9
+ *.so
10
+ *.bundle
@@ -0,0 +1,4 @@
1
+ # 0.4.0
2
+ * load_many accept batch_size parameter to parse documents larger than 1 MB in [PR #5](https://github.com/anilmaurya/fast_jsonparser/pull/5), thanks to [casperisfine](https://github.com/casperisfine)
3
+ * Add option for symbolize_keys, default to true in [PR #9](https://github.com/anilmaurya/fast_jsonparser/pull/9), thanks to [casperisfine](https://github.com/casperisfine)
4
+ * Parse string values as UTF-8 in [PR #10](https://github.com/anilmaurya/fast_jsonparser/pull/10), thanks to [casperisfine](https://github.com/casperisfine)
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- fast_jsonparser (0.2.0)
4
+ fast_jsonparser (0.3.0)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
data/README.md CHANGED
@@ -107,9 +107,22 @@ Example: logs.json with following content
107
107
  "17/May/2015:08:05:23 +0000"
108
108
  "17/May/2015:08:05:24 +0000"
109
109
  ```
110
+ If size of json batch is greater than 1 MB then use `batch_size` option
110
111
 
112
+ ```
113
+ FastJsonparser.load_many(f.path, batch_size: 2_000) {}
114
+ ```
115
+
116
+ 4. Accept optional param :symbolize_keys (default symbolize_keys: true)
117
+
118
+ If string key is expected in parsed result then use
119
+
120
+ ```
121
+ FastJsonparser.parse('{"one": 1, "two": 2}', symbolize_keys: false)
122
+
123
+ ```
111
124
 
112
- 4. Raise FastJsonparser::ParseError when invalid JSON provided for parsing
125
+ 5. Raise FastJsonparser::ParseError when invalid JSON provided for parsing
113
126
 
114
127
  ```
115
128
  FastJsonparser.parse("123: 1") # FastJsonparser::ParseError (parse error)
@@ -124,9 +137,9 @@ FastJsonparser.parse("123: 1") # FastJsonparser::ParseError (parse error)
124
137
  ```
125
138
  ## Development
126
139
 
127
- After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake test` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
140
+ After checking out the repo, run `rake compile` to install dependencies. Then, run `rake test` to run the tests.
128
141
 
129
- To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
142
+ For more option, refer https://github.com/rake-compiler/rake-compiler
130
143
 
131
144
  ## Contributing
132
145
 
data/Rakefile CHANGED
@@ -3,6 +3,7 @@ require "rake/testtask"
3
3
  require "rake/extensiontask"
4
4
 
5
5
  Rake::ExtensionTask.new("fast_jsonparser") do |ext|
6
+ ext.ext_dir = 'ext/fast_jsonparser'
6
7
  ext.lib_dir = "lib/fast_jsonparser"
7
8
  end
8
9
 
@@ -2,116 +2,122 @@
2
2
 
3
3
  #include "simdjson.h"
4
4
 
5
- VALUE rb_mFastJsonparser;
6
-
7
- VALUE rb_eFastJsonparserParseError;
5
+ VALUE rb_eFastJsonparserUnknownError, rb_eFastJsonparserParseError;
8
6
 
9
7
  using namespace simdjson;
10
8
 
11
9
  // Convert tape to Ruby's Object
12
- static VALUE make_ruby_object(dom::element element)
10
+ static VALUE make_ruby_object(dom::element element, bool symbolize_keys)
13
11
  {
14
- auto t = element.type();
15
- if (t == dom::element_type::ARRAY)
12
+ switch (element.type())
13
+ {
14
+ case dom::element_type::ARRAY:
16
15
  {
17
16
  VALUE ary = rb_ary_new();
18
17
  for (dom::element x : element)
19
18
  {
20
- VALUE e = make_ruby_object(x);
19
+ VALUE e = make_ruby_object(x, symbolize_keys);
21
20
  rb_ary_push(ary, e);
22
21
  }
23
22
  return ary;
24
23
  }
25
- else if (t == dom::element_type::OBJECT)
24
+ case dom::element_type::OBJECT:
26
25
  {
27
26
  VALUE hash = rb_hash_new();
28
27
  for (dom::key_value_pair field : dom::object(element))
29
28
  {
30
29
  std::string_view view(field.key);
31
- VALUE k = rb_intern(view.data());
32
- VALUE v = make_ruby_object(field.value);
33
- rb_hash_aset(hash, ID2SYM(k), v);
30
+ VALUE k = rb_utf8_str_new(view.data(), view.size());
31
+ if (symbolize_keys)
32
+ {
33
+ k = ID2SYM(rb_intern_str(k));
34
+ }
35
+ VALUE v = make_ruby_object(field.value, symbolize_keys);
36
+ rb_hash_aset(hash, k, v);
34
37
  }
35
38
  return hash;
36
39
  }
37
- else if (t == dom::element_type::INT64)
40
+ case dom::element_type::INT64:
38
41
  {
39
42
  return LONG2NUM(element.get<int64_t>());
40
43
  }
41
- else if (t == dom::element_type::UINT64)
44
+ case dom::element_type::UINT64:
42
45
  {
43
46
  return ULONG2NUM(element.get<uint64_t>());
44
47
  }
45
- else if (t == dom::element_type::DOUBLE)
48
+ case dom::element_type::DOUBLE:
46
49
  {
47
50
  return DBL2NUM(double(element));
48
51
  }
49
- else if (t == dom::element_type::STRING)
52
+ case dom::element_type::STRING:
50
53
  {
51
54
  std::string_view view(element);
52
- return rb_str_new(view.data(), view.size());
55
+ return rb_utf8_str_new(view.data(), view.size());
53
56
  }
54
- else if (t == dom::element_type::BOOL)
57
+ case dom::element_type::BOOL:
55
58
  {
56
59
  return bool(element) ? Qtrue : Qfalse;
57
60
  }
58
- else if (t == dom::element_type::NULL_VALUE)
61
+ case dom::element_type::NULL_VALUE:
59
62
  {
60
63
  return Qnil;
61
64
  }
65
+ }
62
66
  // unknown case (bug)
63
67
  rb_raise(rb_eException, "[BUG] must not happen");
64
68
  }
65
69
 
66
- static VALUE rb_fast_jsonparser_parse(VALUE self, VALUE arg)
70
+ static VALUE rb_fast_jsonparser_parse(VALUE self, VALUE arg, VALUE symbolize_keys)
67
71
  {
68
72
  Check_Type(arg, T_STRING);
69
73
 
70
74
  dom::parser parser;
71
75
  auto [doc, error] = parser.parse(RSTRING_PTR(arg), RSTRING_LEN(arg));
72
- if (error == SUCCESS)
76
+ if (error != SUCCESS)
73
77
  {
74
- return make_ruby_object(doc);
78
+ rb_raise(rb_eFastJsonparserParseError, "%s", error_message(error));
75
79
  }
76
- // TODO better error handling
77
- rb_raise(rb_eFastJsonparserParseError, "parse error");
78
- return Qnil;
80
+ return make_ruby_object(doc, RTEST(symbolize_keys));
79
81
  }
80
82
 
81
- static VALUE rb_fast_jsonparser_load(VALUE self, VALUE arg)
83
+ static VALUE rb_fast_jsonparser_load(VALUE self, VALUE arg, VALUE symbolize_keys)
82
84
  {
83
85
  Check_Type(arg, T_STRING);
84
86
 
85
87
  dom::parser parser;
86
88
  auto [doc, error] = parser.load(RSTRING_PTR(arg));
87
- if (error == SUCCESS)
89
+ if (error != SUCCESS)
88
90
  {
89
- return make_ruby_object(doc);
91
+ rb_raise(rb_eFastJsonparserParseError, "%s", error_message(error));
90
92
  }
91
- // TODO better error handling
92
- rb_raise(rb_eFastJsonparserParseError, "parse error");
93
- return Qnil;
93
+ return make_ruby_object(doc, RTEST(symbolize_keys));
94
94
  }
95
95
 
96
- static VALUE rb_fast_jsonparser_load_many(VALUE self, VALUE arg)
96
+ static VALUE rb_fast_jsonparser_load_many(VALUE self, VALUE arg, VALUE symbolize_keys, VALUE batch_size)
97
97
  {
98
98
  Check_Type(arg, T_STRING);
99
+ Check_Type(batch_size, T_FIXNUM);
99
100
 
100
- dom::parser parser;
101
- auto [docs, error] = parser.load_many(RSTRING_PTR(arg));
102
- if (error == SUCCESS)
101
+ try
103
102
  {
103
+ dom::parser parser;
104
+ auto [docs, error] = parser.load_many(RSTRING_PTR(arg), FIX2INT(batch_size));
105
+ if (error != SUCCESS)
106
+ {
107
+ rb_raise(rb_eFastJsonparserParseError, "%s", error_message(error));
108
+ }
109
+
104
110
  for (dom::element doc : docs)
105
111
  {
106
- if (rb_block_given_p())
107
- {
108
- rb_yield(make_ruby_object(doc));
109
- }
112
+ rb_yield(make_ruby_object(doc, RTEST(symbolize_keys)));
110
113
  }
114
+
111
115
  return Qnil;
112
116
  }
113
- rb_raise(rb_eFastJsonparserParseError, "parse error");
114
- return Qnil;
117
+ catch (simdjson::simdjson_error error)
118
+ {
119
+ rb_raise(rb_eFastJsonparserUnknownError, "%s", error.what());
120
+ }
115
121
  }
116
122
 
117
123
  extern "C"
@@ -119,10 +125,15 @@ extern "C"
119
125
 
120
126
  void Init_fast_jsonparser(void)
121
127
  {
122
- rb_mFastJsonparser = rb_define_module("FastJsonparser");
123
- rb_eFastJsonparserParseError = rb_define_class_under(rb_mFastJsonparser, "ParseError", rb_eStandardError);
124
- rb_define_module_function(rb_mFastJsonparser, "parse", reinterpret_cast<VALUE (*)(...)>(rb_fast_jsonparser_parse), 1);
125
- rb_define_module_function(rb_mFastJsonparser, "load", reinterpret_cast<VALUE (*)(...)>(rb_fast_jsonparser_load), 1);
126
- rb_define_module_function(rb_mFastJsonparser, "load_many", reinterpret_cast<VALUE (*)(...)>(rb_fast_jsonparser_load_many), 1);
128
+ VALUE rb_mFastJsonparser = rb_const_get(rb_cObject, rb_intern("FastJsonparser"));
129
+
130
+ rb_define_module_function(rb_mFastJsonparser, "_parse", reinterpret_cast<VALUE (*)(...)>(rb_fast_jsonparser_parse), 2);
131
+ rb_define_module_function(rb_mFastJsonparser, "_load", reinterpret_cast<VALUE (*)(...)>(rb_fast_jsonparser_load), 2);
132
+ rb_define_module_function(rb_mFastJsonparser, "_load_many", reinterpret_cast<VALUE (*)(...)>(rb_fast_jsonparser_load_many), 3);
133
+
134
+ rb_eFastJsonparserParseError = rb_const_get(rb_mFastJsonparser, rb_intern("ParseError"));
135
+ rb_global_variable(&rb_eFastJsonparserParseError);
136
+ rb_eFastJsonparserUnknownError = rb_const_get(rb_mFastJsonparser, rb_intern("UnknownError"));
137
+ rb_global_variable(&rb_eFastJsonparserUnknownError);
127
138
  }
128
139
  }
@@ -1,8 +1,36 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "fast_jsonparser/version"
2
4
 
3
5
  module FastJsonparser
4
- class Error < StandardError; end
5
- # Your code goes here...
6
- end
6
+ Error = Class.new(StandardError)
7
+ ParseError = Class.new(Error)
8
+ UnknownError = Class.new(Error)
9
+ BatchSizeTooSmall = Class.new(Error)
10
+
11
+ DEFAULT_BATCH_SIZE = 1_000_000 # from include/simdjson/dom/parser.h
12
+
13
+ class << self
14
+ def parse(source, symbolize_keys: true)
15
+ _parse(source, symbolize_keys)
16
+ end
7
17
 
8
- require "fast_jsonparser/fast_jsonparser" # loads cpp extension
18
+ def load(source, symbolize_keys: true)
19
+ _load(source, symbolize_keys)
20
+ end
21
+
22
+ def load_many(source, symbolize_keys: true, batch_size: DEFAULT_BATCH_SIZE, &block)
23
+ _load_many(source, symbolize_keys, batch_size, &block)
24
+ rescue UnknownError => error
25
+ case error.message
26
+ when "This parser can't support a document that big"
27
+ raise BatchSizeTooSmall, "One of the documents was bigger than the batch size (#{batch_size}B), try increasing it."
28
+ else
29
+ raise
30
+ end
31
+ end
32
+
33
+ require "fast_jsonparser/fast_jsonparser" # loads cpp extension
34
+ private :_parse, :_load, :_load_many
35
+ end
36
+ end
@@ -1,3 +1,3 @@
1
1
  module FastJsonparser
2
- VERSION = "0.3.0"
2
+ VERSION = "0.4.0"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fast_jsonparser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Anil Maurya
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-07-14 00:00:00.000000000 Z
11
+ date: 2020-07-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -104,6 +104,7 @@ extra_rdoc_files: []
104
104
  files:
105
105
  - ".gitignore"
106
106
  - ".travis.yml"
107
+ - CHANGELOG.md
107
108
  - CODE_OF_CONDUCT.md
108
109
  - Gemfile
109
110
  - Gemfile.lock
@@ -119,7 +120,6 @@ files:
119
120
  - ext/fast_jsonparser/simdjson.h
120
121
  - fast_jsonparser.gemspec
121
122
  - lib/fast_jsonparser.rb
122
- - lib/fast_jsonparser/fast_jsonparser.bundle
123
123
  - lib/fast_jsonparser/version.rb
124
124
  homepage: https://github.com/anilmaurya/fast_jsonparser
125
125
  licenses: