fast_jsonparser 0.3.0 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/CHANGELOG.md +4 -0
- data/Gemfile.lock +1 -1
- data/README.md +16 -3
- data/Rakefile +1 -0
- data/ext/fast_jsonparser/fast_jsonparser.cpp +56 -45
- data/lib/fast_jsonparser.rb +32 -4
- data/lib/fast_jsonparser/version.rb +1 -1
- metadata +3 -3
- data/lib/fast_jsonparser/fast_jsonparser.bundle +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2b9a3639a83aa2f68468df13dcc8922b244c727c00b2e040a7dd94f9b3832698
|
4
|
+
data.tar.gz: 49e6c154239a25e2ecc695288166914ee60a1500d7e2773ef25d05e39eddfa4d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: '08ebbd273a4180261ab78c50874e7a5cf28afa7c1718be05fbc8bec93bd6652097c88ad2b0ac7eba184f4755a209cc0e9256551d04e1350ac1139d4141a50412'
|
7
|
+
data.tar.gz: 6400ba1f5f0f0083b4c2a75f947888ab782695c66c676a1bb35011efde40b7ce2b1ab38d0fbbfc76eda50e80cef6194cc8bf495e3f8cb6b1aef0c8855bf2ed76
|
data/.gitignore
CHANGED
data/CHANGELOG.md
ADDED
@@ -0,0 +1,4 @@
|
|
1
|
+
# 0.4.0
|
2
|
+
* load_many accept batch_size parameter to parse documents larger than 1 MB in [PR #5](https://github.com/anilmaurya/fast_jsonparser/pull/5), thanks to [casperisfine](https://github.com/casperisfine)
|
3
|
+
* Add option for symbolize_keys, default to true in [PR #9](https://github.com/anilmaurya/fast_jsonparser/pull/9), thanks to [casperisfine](https://github.com/casperisfine)
|
4
|
+
* Parse string values as UTF-8 in [PR #10](https://github.com/anilmaurya/fast_jsonparser/pull/10), thanks to [casperisfine](https://github.com/casperisfine)
|
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -107,9 +107,22 @@ Example: logs.json with following content
|
|
107
107
|
"17/May/2015:08:05:23 +0000"
|
108
108
|
"17/May/2015:08:05:24 +0000"
|
109
109
|
```
|
110
|
+
If size of json batch is greater than 1 MB then use `batch_size` option
|
110
111
|
|
112
|
+
```
|
113
|
+
FastJsonparser.load_many(f.path, batch_size: 2_000) {}
|
114
|
+
```
|
115
|
+
|
116
|
+
4. Accept optional param :symbolize_keys (default symbolize_keys: true)
|
117
|
+
|
118
|
+
If string key is expected in parsed result then use
|
119
|
+
|
120
|
+
```
|
121
|
+
FastJsonparser.parse('{"one": 1, "two": 2}', symbolize_keys: false)
|
122
|
+
|
123
|
+
```
|
111
124
|
|
112
|
-
|
125
|
+
5. Raise FastJsonparser::ParseError when invalid JSON provided for parsing
|
113
126
|
|
114
127
|
```
|
115
128
|
FastJsonparser.parse("123: 1") # FastJsonparser::ParseError (parse error)
|
@@ -124,9 +137,9 @@ FastJsonparser.parse("123: 1") # FastJsonparser::ParseError (parse error)
|
|
124
137
|
```
|
125
138
|
## Development
|
126
139
|
|
127
|
-
After checking out the repo, run `
|
140
|
+
After checking out the repo, run `rake compile` to install dependencies. Then, run `rake test` to run the tests.
|
128
141
|
|
129
|
-
|
142
|
+
For more option, refer https://github.com/rake-compiler/rake-compiler
|
130
143
|
|
131
144
|
## Contributing
|
132
145
|
|
data/Rakefile
CHANGED
@@ -2,116 +2,122 @@
|
|
2
2
|
|
3
3
|
#include "simdjson.h"
|
4
4
|
|
5
|
-
VALUE
|
6
|
-
|
7
|
-
VALUE rb_eFastJsonparserParseError;
|
5
|
+
VALUE rb_eFastJsonparserUnknownError, rb_eFastJsonparserParseError;
|
8
6
|
|
9
7
|
using namespace simdjson;
|
10
8
|
|
11
9
|
// Convert tape to Ruby's Object
|
12
|
-
static VALUE make_ruby_object(dom::element element)
|
10
|
+
static VALUE make_ruby_object(dom::element element, bool symbolize_keys)
|
13
11
|
{
|
14
|
-
|
15
|
-
|
12
|
+
switch (element.type())
|
13
|
+
{
|
14
|
+
case dom::element_type::ARRAY:
|
16
15
|
{
|
17
16
|
VALUE ary = rb_ary_new();
|
18
17
|
for (dom::element x : element)
|
19
18
|
{
|
20
|
-
VALUE e = make_ruby_object(x);
|
19
|
+
VALUE e = make_ruby_object(x, symbolize_keys);
|
21
20
|
rb_ary_push(ary, e);
|
22
21
|
}
|
23
22
|
return ary;
|
24
23
|
}
|
25
|
-
|
24
|
+
case dom::element_type::OBJECT:
|
26
25
|
{
|
27
26
|
VALUE hash = rb_hash_new();
|
28
27
|
for (dom::key_value_pair field : dom::object(element))
|
29
28
|
{
|
30
29
|
std::string_view view(field.key);
|
31
|
-
VALUE k =
|
32
|
-
|
33
|
-
|
30
|
+
VALUE k = rb_utf8_str_new(view.data(), view.size());
|
31
|
+
if (symbolize_keys)
|
32
|
+
{
|
33
|
+
k = ID2SYM(rb_intern_str(k));
|
34
|
+
}
|
35
|
+
VALUE v = make_ruby_object(field.value, symbolize_keys);
|
36
|
+
rb_hash_aset(hash, k, v);
|
34
37
|
}
|
35
38
|
return hash;
|
36
39
|
}
|
37
|
-
|
40
|
+
case dom::element_type::INT64:
|
38
41
|
{
|
39
42
|
return LONG2NUM(element.get<int64_t>());
|
40
43
|
}
|
41
|
-
|
44
|
+
case dom::element_type::UINT64:
|
42
45
|
{
|
43
46
|
return ULONG2NUM(element.get<uint64_t>());
|
44
47
|
}
|
45
|
-
|
48
|
+
case dom::element_type::DOUBLE:
|
46
49
|
{
|
47
50
|
return DBL2NUM(double(element));
|
48
51
|
}
|
49
|
-
|
52
|
+
case dom::element_type::STRING:
|
50
53
|
{
|
51
54
|
std::string_view view(element);
|
52
|
-
return
|
55
|
+
return rb_utf8_str_new(view.data(), view.size());
|
53
56
|
}
|
54
|
-
|
57
|
+
case dom::element_type::BOOL:
|
55
58
|
{
|
56
59
|
return bool(element) ? Qtrue : Qfalse;
|
57
60
|
}
|
58
|
-
|
61
|
+
case dom::element_type::NULL_VALUE:
|
59
62
|
{
|
60
63
|
return Qnil;
|
61
64
|
}
|
65
|
+
}
|
62
66
|
// unknown case (bug)
|
63
67
|
rb_raise(rb_eException, "[BUG] must not happen");
|
64
68
|
}
|
65
69
|
|
66
|
-
static VALUE rb_fast_jsonparser_parse(VALUE self, VALUE arg)
|
70
|
+
static VALUE rb_fast_jsonparser_parse(VALUE self, VALUE arg, VALUE symbolize_keys)
|
67
71
|
{
|
68
72
|
Check_Type(arg, T_STRING);
|
69
73
|
|
70
74
|
dom::parser parser;
|
71
75
|
auto [doc, error] = parser.parse(RSTRING_PTR(arg), RSTRING_LEN(arg));
|
72
|
-
if (error
|
76
|
+
if (error != SUCCESS)
|
73
77
|
{
|
74
|
-
|
78
|
+
rb_raise(rb_eFastJsonparserParseError, "%s", error_message(error));
|
75
79
|
}
|
76
|
-
|
77
|
-
rb_raise(rb_eFastJsonparserParseError, "parse error");
|
78
|
-
return Qnil;
|
80
|
+
return make_ruby_object(doc, RTEST(symbolize_keys));
|
79
81
|
}
|
80
82
|
|
81
|
-
static VALUE rb_fast_jsonparser_load(VALUE self, VALUE arg)
|
83
|
+
static VALUE rb_fast_jsonparser_load(VALUE self, VALUE arg, VALUE symbolize_keys)
|
82
84
|
{
|
83
85
|
Check_Type(arg, T_STRING);
|
84
86
|
|
85
87
|
dom::parser parser;
|
86
88
|
auto [doc, error] = parser.load(RSTRING_PTR(arg));
|
87
|
-
if (error
|
89
|
+
if (error != SUCCESS)
|
88
90
|
{
|
89
|
-
|
91
|
+
rb_raise(rb_eFastJsonparserParseError, "%s", error_message(error));
|
90
92
|
}
|
91
|
-
|
92
|
-
rb_raise(rb_eFastJsonparserParseError, "parse error");
|
93
|
-
return Qnil;
|
93
|
+
return make_ruby_object(doc, RTEST(symbolize_keys));
|
94
94
|
}
|
95
95
|
|
96
|
-
static VALUE rb_fast_jsonparser_load_many(VALUE self, VALUE arg)
|
96
|
+
static VALUE rb_fast_jsonparser_load_many(VALUE self, VALUE arg, VALUE symbolize_keys, VALUE batch_size)
|
97
97
|
{
|
98
98
|
Check_Type(arg, T_STRING);
|
99
|
+
Check_Type(batch_size, T_FIXNUM);
|
99
100
|
|
100
|
-
|
101
|
-
auto [docs, error] = parser.load_many(RSTRING_PTR(arg));
|
102
|
-
if (error == SUCCESS)
|
101
|
+
try
|
103
102
|
{
|
103
|
+
dom::parser parser;
|
104
|
+
auto [docs, error] = parser.load_many(RSTRING_PTR(arg), FIX2INT(batch_size));
|
105
|
+
if (error != SUCCESS)
|
106
|
+
{
|
107
|
+
rb_raise(rb_eFastJsonparserParseError, "%s", error_message(error));
|
108
|
+
}
|
109
|
+
|
104
110
|
for (dom::element doc : docs)
|
105
111
|
{
|
106
|
-
|
107
|
-
{
|
108
|
-
rb_yield(make_ruby_object(doc));
|
109
|
-
}
|
112
|
+
rb_yield(make_ruby_object(doc, RTEST(symbolize_keys)));
|
110
113
|
}
|
114
|
+
|
111
115
|
return Qnil;
|
112
116
|
}
|
113
|
-
|
114
|
-
|
117
|
+
catch (simdjson::simdjson_error error)
|
118
|
+
{
|
119
|
+
rb_raise(rb_eFastJsonparserUnknownError, "%s", error.what());
|
120
|
+
}
|
115
121
|
}
|
116
122
|
|
117
123
|
extern "C"
|
@@ -119,10 +125,15 @@ extern "C"
|
|
119
125
|
|
120
126
|
void Init_fast_jsonparser(void)
|
121
127
|
{
|
122
|
-
rb_mFastJsonparser =
|
123
|
-
|
124
|
-
rb_define_module_function(rb_mFastJsonparser, "
|
125
|
-
rb_define_module_function(rb_mFastJsonparser, "
|
126
|
-
rb_define_module_function(rb_mFastJsonparser, "
|
128
|
+
VALUE rb_mFastJsonparser = rb_const_get(rb_cObject, rb_intern("FastJsonparser"));
|
129
|
+
|
130
|
+
rb_define_module_function(rb_mFastJsonparser, "_parse", reinterpret_cast<VALUE (*)(...)>(rb_fast_jsonparser_parse), 2);
|
131
|
+
rb_define_module_function(rb_mFastJsonparser, "_load", reinterpret_cast<VALUE (*)(...)>(rb_fast_jsonparser_load), 2);
|
132
|
+
rb_define_module_function(rb_mFastJsonparser, "_load_many", reinterpret_cast<VALUE (*)(...)>(rb_fast_jsonparser_load_many), 3);
|
133
|
+
|
134
|
+
rb_eFastJsonparserParseError = rb_const_get(rb_mFastJsonparser, rb_intern("ParseError"));
|
135
|
+
rb_global_variable(&rb_eFastJsonparserParseError);
|
136
|
+
rb_eFastJsonparserUnknownError = rb_const_get(rb_mFastJsonparser, rb_intern("UnknownError"));
|
137
|
+
rb_global_variable(&rb_eFastJsonparserUnknownError);
|
127
138
|
}
|
128
139
|
}
|
data/lib/fast_jsonparser.rb
CHANGED
@@ -1,8 +1,36 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "fast_jsonparser/version"
|
2
4
|
|
3
5
|
module FastJsonparser
|
4
|
-
|
5
|
-
|
6
|
-
|
6
|
+
Error = Class.new(StandardError)
|
7
|
+
ParseError = Class.new(Error)
|
8
|
+
UnknownError = Class.new(Error)
|
9
|
+
BatchSizeTooSmall = Class.new(Error)
|
10
|
+
|
11
|
+
DEFAULT_BATCH_SIZE = 1_000_000 # from include/simdjson/dom/parser.h
|
12
|
+
|
13
|
+
class << self
|
14
|
+
def parse(source, symbolize_keys: true)
|
15
|
+
_parse(source, symbolize_keys)
|
16
|
+
end
|
7
17
|
|
8
|
-
|
18
|
+
def load(source, symbolize_keys: true)
|
19
|
+
_load(source, symbolize_keys)
|
20
|
+
end
|
21
|
+
|
22
|
+
def load_many(source, symbolize_keys: true, batch_size: DEFAULT_BATCH_SIZE, &block)
|
23
|
+
_load_many(source, symbolize_keys, batch_size, &block)
|
24
|
+
rescue UnknownError => error
|
25
|
+
case error.message
|
26
|
+
when "This parser can't support a document that big"
|
27
|
+
raise BatchSizeTooSmall, "One of the documents was bigger than the batch size (#{batch_size}B), try increasing it."
|
28
|
+
else
|
29
|
+
raise
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
require "fast_jsonparser/fast_jsonparser" # loads cpp extension
|
34
|
+
private :_parse, :_load, :_load_many
|
35
|
+
end
|
36
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fast_jsonparser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Anil Maurya
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-07-
|
11
|
+
date: 2020-07-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -104,6 +104,7 @@ extra_rdoc_files: []
|
|
104
104
|
files:
|
105
105
|
- ".gitignore"
|
106
106
|
- ".travis.yml"
|
107
|
+
- CHANGELOG.md
|
107
108
|
- CODE_OF_CONDUCT.md
|
108
109
|
- Gemfile
|
109
110
|
- Gemfile.lock
|
@@ -119,7 +120,6 @@ files:
|
|
119
120
|
- ext/fast_jsonparser/simdjson.h
|
120
121
|
- fast_jsonparser.gemspec
|
121
122
|
- lib/fast_jsonparser.rb
|
122
|
-
- lib/fast_jsonparser/fast_jsonparser.bundle
|
123
123
|
- lib/fast_jsonparser/version.rb
|
124
124
|
homepage: https://github.com/anilmaurya/fast_jsonparser
|
125
125
|
licenses:
|
Binary file
|