fast_jsonparser 0.3.0 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: cb4380ffb8ced606931028f66c05ddb3af40498a9aada56833be2f5ef0bc47b4
4
- data.tar.gz: 3a3159926d6f1b1b0d90431171b7c97a93df4082bc1371313c4fe31e6c4de0c8
3
+ metadata.gz: 2b9a3639a83aa2f68468df13dcc8922b244c727c00b2e040a7dd94f9b3832698
4
+ data.tar.gz: 49e6c154239a25e2ecc695288166914ee60a1500d7e2773ef25d05e39eddfa4d
5
5
  SHA512:
6
- metadata.gz: ffa4a69c6550db893fd93c1f13df5778004188f1f3b54944b2d20049366026bbf2720fb0d62dce124d279fab650ed4e2f12c763c48207323ec9a900da240b32a
7
- data.tar.gz: fe75932c18f3cf0d896536ffc1d4f8f30067fe85046c7c1bf1806e819674f3f0a48816240cc634ac5dd530eb06e80f7b2a35f63a91e5c4ff1e432e8bf5310df5
6
+ metadata.gz: '08ebbd273a4180261ab78c50874e7a5cf28afa7c1718be05fbc8bec93bd6652097c88ad2b0ac7eba184f4755a209cc0e9256551d04e1350ac1139d4141a50412'
7
+ data.tar.gz: 6400ba1f5f0f0083b4c2a75f947888ab782695c66c676a1bb35011efde40b7ce2b1ab38d0fbbfc76eda50e80cef6194cc8bf495e3f8cb6b1aef0c8855bf2ed76
data/.gitignore CHANGED
@@ -6,3 +6,5 @@
6
6
  /pkg/
7
7
  /spec/reports/
8
8
  /tmp/
9
+ *.so
10
+ *.bundle
@@ -0,0 +1,4 @@
1
+ # 0.4.0
2
+ * load_many accept batch_size parameter to parse documents larger than 1 MB in [PR #5](https://github.com/anilmaurya/fast_jsonparser/pull/5), thanks to [casperisfine](https://github.com/casperisfine)
3
+ * Add option for symbolize_keys, default to true in [PR #9](https://github.com/anilmaurya/fast_jsonparser/pull/9), thanks to [casperisfine](https://github.com/casperisfine)
4
+ * Parse string values as UTF-8 in [PR #10](https://github.com/anilmaurya/fast_jsonparser/pull/10), thanks to [casperisfine](https://github.com/casperisfine)
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- fast_jsonparser (0.2.0)
4
+ fast_jsonparser (0.3.0)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
data/README.md CHANGED
@@ -107,9 +107,22 @@ Example: logs.json with following content
107
107
  "17/May/2015:08:05:23 +0000"
108
108
  "17/May/2015:08:05:24 +0000"
109
109
  ```
110
+ If size of json batch is greater than 1 MB then use `batch_size` option
110
111
 
112
+ ```
113
+ FastJsonparser.load_many(f.path, batch_size: 2_000) {}
114
+ ```
115
+
116
+ 4. Accept optional param :symbolize_keys (default symbolize_keys: true)
117
+
118
+ If string key is expected in parsed result then use
119
+
120
+ ```
121
+ FastJsonparser.parse('{"one": 1, "two": 2}', symbolize_keys: false)
122
+
123
+ ```
111
124
 
112
- 4. Raise FastJsonparser::ParseError when invalid JSON provided for parsing
125
+ 5. Raise FastJsonparser::ParseError when invalid JSON provided for parsing
113
126
 
114
127
  ```
115
128
  FastJsonparser.parse("123: 1") # FastJsonparser::ParseError (parse error)
@@ -124,9 +137,9 @@ FastJsonparser.parse("123: 1") # FastJsonparser::ParseError (parse error)
124
137
  ```
125
138
  ## Development
126
139
 
127
- After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake test` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
140
+ After checking out the repo, run `rake compile` to install dependencies. Then, run `rake test` to run the tests.
128
141
 
129
- To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
142
+ For more option, refer https://github.com/rake-compiler/rake-compiler
130
143
 
131
144
  ## Contributing
132
145
 
data/Rakefile CHANGED
@@ -3,6 +3,7 @@ require "rake/testtask"
3
3
  require "rake/extensiontask"
4
4
 
5
5
  Rake::ExtensionTask.new("fast_jsonparser") do |ext|
6
+ ext.ext_dir = 'ext/fast_jsonparser'
6
7
  ext.lib_dir = "lib/fast_jsonparser"
7
8
  end
8
9
 
@@ -2,116 +2,122 @@
2
2
 
3
3
  #include "simdjson.h"
4
4
 
5
- VALUE rb_mFastJsonparser;
6
-
7
- VALUE rb_eFastJsonparserParseError;
5
+ VALUE rb_eFastJsonparserUnknownError, rb_eFastJsonparserParseError;
8
6
 
9
7
  using namespace simdjson;
10
8
 
11
9
  // Convert tape to Ruby's Object
12
- static VALUE make_ruby_object(dom::element element)
10
+ static VALUE make_ruby_object(dom::element element, bool symbolize_keys)
13
11
  {
14
- auto t = element.type();
15
- if (t == dom::element_type::ARRAY)
12
+ switch (element.type())
13
+ {
14
+ case dom::element_type::ARRAY:
16
15
  {
17
16
  VALUE ary = rb_ary_new();
18
17
  for (dom::element x : element)
19
18
  {
20
- VALUE e = make_ruby_object(x);
19
+ VALUE e = make_ruby_object(x, symbolize_keys);
21
20
  rb_ary_push(ary, e);
22
21
  }
23
22
  return ary;
24
23
  }
25
- else if (t == dom::element_type::OBJECT)
24
+ case dom::element_type::OBJECT:
26
25
  {
27
26
  VALUE hash = rb_hash_new();
28
27
  for (dom::key_value_pair field : dom::object(element))
29
28
  {
30
29
  std::string_view view(field.key);
31
- VALUE k = rb_intern(view.data());
32
- VALUE v = make_ruby_object(field.value);
33
- rb_hash_aset(hash, ID2SYM(k), v);
30
+ VALUE k = rb_utf8_str_new(view.data(), view.size());
31
+ if (symbolize_keys)
32
+ {
33
+ k = ID2SYM(rb_intern_str(k));
34
+ }
35
+ VALUE v = make_ruby_object(field.value, symbolize_keys);
36
+ rb_hash_aset(hash, k, v);
34
37
  }
35
38
  return hash;
36
39
  }
37
- else if (t == dom::element_type::INT64)
40
+ case dom::element_type::INT64:
38
41
  {
39
42
  return LONG2NUM(element.get<int64_t>());
40
43
  }
41
- else if (t == dom::element_type::UINT64)
44
+ case dom::element_type::UINT64:
42
45
  {
43
46
  return ULONG2NUM(element.get<uint64_t>());
44
47
  }
45
- else if (t == dom::element_type::DOUBLE)
48
+ case dom::element_type::DOUBLE:
46
49
  {
47
50
  return DBL2NUM(double(element));
48
51
  }
49
- else if (t == dom::element_type::STRING)
52
+ case dom::element_type::STRING:
50
53
  {
51
54
  std::string_view view(element);
52
- return rb_str_new(view.data(), view.size());
55
+ return rb_utf8_str_new(view.data(), view.size());
53
56
  }
54
- else if (t == dom::element_type::BOOL)
57
+ case dom::element_type::BOOL:
55
58
  {
56
59
  return bool(element) ? Qtrue : Qfalse;
57
60
  }
58
- else if (t == dom::element_type::NULL_VALUE)
61
+ case dom::element_type::NULL_VALUE:
59
62
  {
60
63
  return Qnil;
61
64
  }
65
+ }
62
66
  // unknown case (bug)
63
67
  rb_raise(rb_eException, "[BUG] must not happen");
64
68
  }
65
69
 
66
- static VALUE rb_fast_jsonparser_parse(VALUE self, VALUE arg)
70
+ static VALUE rb_fast_jsonparser_parse(VALUE self, VALUE arg, VALUE symbolize_keys)
67
71
  {
68
72
  Check_Type(arg, T_STRING);
69
73
 
70
74
  dom::parser parser;
71
75
  auto [doc, error] = parser.parse(RSTRING_PTR(arg), RSTRING_LEN(arg));
72
- if (error == SUCCESS)
76
+ if (error != SUCCESS)
73
77
  {
74
- return make_ruby_object(doc);
78
+ rb_raise(rb_eFastJsonparserParseError, "%s", error_message(error));
75
79
  }
76
- // TODO better error handling
77
- rb_raise(rb_eFastJsonparserParseError, "parse error");
78
- return Qnil;
80
+ return make_ruby_object(doc, RTEST(symbolize_keys));
79
81
  }
80
82
 
81
- static VALUE rb_fast_jsonparser_load(VALUE self, VALUE arg)
83
+ static VALUE rb_fast_jsonparser_load(VALUE self, VALUE arg, VALUE symbolize_keys)
82
84
  {
83
85
  Check_Type(arg, T_STRING);
84
86
 
85
87
  dom::parser parser;
86
88
  auto [doc, error] = parser.load(RSTRING_PTR(arg));
87
- if (error == SUCCESS)
89
+ if (error != SUCCESS)
88
90
  {
89
- return make_ruby_object(doc);
91
+ rb_raise(rb_eFastJsonparserParseError, "%s", error_message(error));
90
92
  }
91
- // TODO better error handling
92
- rb_raise(rb_eFastJsonparserParseError, "parse error");
93
- return Qnil;
93
+ return make_ruby_object(doc, RTEST(symbolize_keys));
94
94
  }
95
95
 
96
- static VALUE rb_fast_jsonparser_load_many(VALUE self, VALUE arg)
96
+ static VALUE rb_fast_jsonparser_load_many(VALUE self, VALUE arg, VALUE symbolize_keys, VALUE batch_size)
97
97
  {
98
98
  Check_Type(arg, T_STRING);
99
+ Check_Type(batch_size, T_FIXNUM);
99
100
 
100
- dom::parser parser;
101
- auto [docs, error] = parser.load_many(RSTRING_PTR(arg));
102
- if (error == SUCCESS)
101
+ try
103
102
  {
103
+ dom::parser parser;
104
+ auto [docs, error] = parser.load_many(RSTRING_PTR(arg), FIX2INT(batch_size));
105
+ if (error != SUCCESS)
106
+ {
107
+ rb_raise(rb_eFastJsonparserParseError, "%s", error_message(error));
108
+ }
109
+
104
110
  for (dom::element doc : docs)
105
111
  {
106
- if (rb_block_given_p())
107
- {
108
- rb_yield(make_ruby_object(doc));
109
- }
112
+ rb_yield(make_ruby_object(doc, RTEST(symbolize_keys)));
110
113
  }
114
+
111
115
  return Qnil;
112
116
  }
113
- rb_raise(rb_eFastJsonparserParseError, "parse error");
114
- return Qnil;
117
+ catch (simdjson::simdjson_error error)
118
+ {
119
+ rb_raise(rb_eFastJsonparserUnknownError, "%s", error.what());
120
+ }
115
121
  }
116
122
 
117
123
  extern "C"
@@ -119,10 +125,15 @@ extern "C"
119
125
 
120
126
  void Init_fast_jsonparser(void)
121
127
  {
122
- rb_mFastJsonparser = rb_define_module("FastJsonparser");
123
- rb_eFastJsonparserParseError = rb_define_class_under(rb_mFastJsonparser, "ParseError", rb_eStandardError);
124
- rb_define_module_function(rb_mFastJsonparser, "parse", reinterpret_cast<VALUE (*)(...)>(rb_fast_jsonparser_parse), 1);
125
- rb_define_module_function(rb_mFastJsonparser, "load", reinterpret_cast<VALUE (*)(...)>(rb_fast_jsonparser_load), 1);
126
- rb_define_module_function(rb_mFastJsonparser, "load_many", reinterpret_cast<VALUE (*)(...)>(rb_fast_jsonparser_load_many), 1);
128
+ VALUE rb_mFastJsonparser = rb_const_get(rb_cObject, rb_intern("FastJsonparser"));
129
+
130
+ rb_define_module_function(rb_mFastJsonparser, "_parse", reinterpret_cast<VALUE (*)(...)>(rb_fast_jsonparser_parse), 2);
131
+ rb_define_module_function(rb_mFastJsonparser, "_load", reinterpret_cast<VALUE (*)(...)>(rb_fast_jsonparser_load), 2);
132
+ rb_define_module_function(rb_mFastJsonparser, "_load_many", reinterpret_cast<VALUE (*)(...)>(rb_fast_jsonparser_load_many), 3);
133
+
134
+ rb_eFastJsonparserParseError = rb_const_get(rb_mFastJsonparser, rb_intern("ParseError"));
135
+ rb_global_variable(&rb_eFastJsonparserParseError);
136
+ rb_eFastJsonparserUnknownError = rb_const_get(rb_mFastJsonparser, rb_intern("UnknownError"));
137
+ rb_global_variable(&rb_eFastJsonparserUnknownError);
127
138
  }
128
139
  }
@@ -1,8 +1,36 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "fast_jsonparser/version"
2
4
 
3
5
  module FastJsonparser
4
- class Error < StandardError; end
5
- # Your code goes here...
6
- end
6
+ Error = Class.new(StandardError)
7
+ ParseError = Class.new(Error)
8
+ UnknownError = Class.new(Error)
9
+ BatchSizeTooSmall = Class.new(Error)
10
+
11
+ DEFAULT_BATCH_SIZE = 1_000_000 # from include/simdjson/dom/parser.h
12
+
13
+ class << self
14
+ def parse(source, symbolize_keys: true)
15
+ _parse(source, symbolize_keys)
16
+ end
7
17
 
8
- require "fast_jsonparser/fast_jsonparser" # loads cpp extension
18
+ def load(source, symbolize_keys: true)
19
+ _load(source, symbolize_keys)
20
+ end
21
+
22
+ def load_many(source, symbolize_keys: true, batch_size: DEFAULT_BATCH_SIZE, &block)
23
+ _load_many(source, symbolize_keys, batch_size, &block)
24
+ rescue UnknownError => error
25
+ case error.message
26
+ when "This parser can't support a document that big"
27
+ raise BatchSizeTooSmall, "One of the documents was bigger than the batch size (#{batch_size}B), try increasing it."
28
+ else
29
+ raise
30
+ end
31
+ end
32
+
33
+ require "fast_jsonparser/fast_jsonparser" # loads cpp extension
34
+ private :_parse, :_load, :_load_many
35
+ end
36
+ end
@@ -1,3 +1,3 @@
1
1
  module FastJsonparser
2
- VERSION = "0.3.0"
2
+ VERSION = "0.4.0"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fast_jsonparser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Anil Maurya
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-07-14 00:00:00.000000000 Z
11
+ date: 2020-07-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -104,6 +104,7 @@ extra_rdoc_files: []
104
104
  files:
105
105
  - ".gitignore"
106
106
  - ".travis.yml"
107
+ - CHANGELOG.md
107
108
  - CODE_OF_CONDUCT.md
108
109
  - Gemfile
109
110
  - Gemfile.lock
@@ -119,7 +120,6 @@ files:
119
120
  - ext/fast_jsonparser/simdjson.h
120
121
  - fast_jsonparser.gemspec
121
122
  - lib/fast_jsonparser.rb
122
- - lib/fast_jsonparser/fast_jsonparser.bundle
123
123
  - lib/fast_jsonparser/version.rb
124
124
  homepage: https://github.com/anilmaurya/fast_jsonparser
125
125
  licenses: