fast_jsonparser 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/CHANGELOG.md +4 -0
- data/Gemfile.lock +1 -1
- data/README.md +16 -3
- data/Rakefile +1 -0
- data/ext/fast_jsonparser/fast_jsonparser.cpp +56 -45
- data/lib/fast_jsonparser.rb +32 -4
- data/lib/fast_jsonparser/version.rb +1 -1
- metadata +3 -3
- data/lib/fast_jsonparser/fast_jsonparser.bundle +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2b9a3639a83aa2f68468df13dcc8922b244c727c00b2e040a7dd94f9b3832698
|
4
|
+
data.tar.gz: 49e6c154239a25e2ecc695288166914ee60a1500d7e2773ef25d05e39eddfa4d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: '08ebbd273a4180261ab78c50874e7a5cf28afa7c1718be05fbc8bec93bd6652097c88ad2b0ac7eba184f4755a209cc0e9256551d04e1350ac1139d4141a50412'
|
7
|
+
data.tar.gz: 6400ba1f5f0f0083b4c2a75f947888ab782695c66c676a1bb35011efde40b7ce2b1ab38d0fbbfc76eda50e80cef6194cc8bf495e3f8cb6b1aef0c8855bf2ed76
|
data/.gitignore
CHANGED
data/CHANGELOG.md
ADDED
@@ -0,0 +1,4 @@
|
|
1
|
+
# 0.4.0
|
2
|
+
* load_many accept batch_size parameter to parse documents larger than 1 MB in [PR #5](https://github.com/anilmaurya/fast_jsonparser/pull/5), thanks to [casperisfine](https://github.com/casperisfine)
|
3
|
+
* Add option for symbolize_keys, default to true in [PR #9](https://github.com/anilmaurya/fast_jsonparser/pull/9), thanks to [casperisfine](https://github.com/casperisfine)
|
4
|
+
* Parse string values as UTF-8 in [PR #10](https://github.com/anilmaurya/fast_jsonparser/pull/10), thanks to [casperisfine](https://github.com/casperisfine)
|
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -107,9 +107,22 @@ Example: logs.json with following content
|
|
107
107
|
"17/May/2015:08:05:23 +0000"
|
108
108
|
"17/May/2015:08:05:24 +0000"
|
109
109
|
```
|
110
|
+
If size of json batch is greater than 1 MB then use `batch_size` option
|
110
111
|
|
112
|
+
```
|
113
|
+
FastJsonparser.load_many(f.path, batch_size: 2_000) {}
|
114
|
+
```
|
115
|
+
|
116
|
+
4. Accept optional param :symbolize_keys (default symbolize_keys: true)
|
117
|
+
|
118
|
+
If string key is expected in parsed result then use
|
119
|
+
|
120
|
+
```
|
121
|
+
FastJsonparser.parse('{"one": 1, "two": 2}', symbolize_keys: false)
|
122
|
+
|
123
|
+
```
|
111
124
|
|
112
|
-
|
125
|
+
5. Raise FastJsonparser::ParseError when invalid JSON provided for parsing
|
113
126
|
|
114
127
|
```
|
115
128
|
FastJsonparser.parse("123: 1") # FastJsonparser::ParseError (parse error)
|
@@ -124,9 +137,9 @@ FastJsonparser.parse("123: 1") # FastJsonparser::ParseError (parse error)
|
|
124
137
|
```
|
125
138
|
## Development
|
126
139
|
|
127
|
-
After checking out the repo, run `
|
140
|
+
After checking out the repo, run `rake compile` to install dependencies. Then, run `rake test` to run the tests.
|
128
141
|
|
129
|
-
|
142
|
+
For more option, refer https://github.com/rake-compiler/rake-compiler
|
130
143
|
|
131
144
|
## Contributing
|
132
145
|
|
data/Rakefile
CHANGED
@@ -2,116 +2,122 @@
|
|
2
2
|
|
3
3
|
#include "simdjson.h"
|
4
4
|
|
5
|
-
VALUE
|
6
|
-
|
7
|
-
VALUE rb_eFastJsonparserParseError;
|
5
|
+
VALUE rb_eFastJsonparserUnknownError, rb_eFastJsonparserParseError;
|
8
6
|
|
9
7
|
using namespace simdjson;
|
10
8
|
|
11
9
|
// Convert tape to Ruby's Object
|
12
|
-
static VALUE make_ruby_object(dom::element element)
|
10
|
+
static VALUE make_ruby_object(dom::element element, bool symbolize_keys)
|
13
11
|
{
|
14
|
-
|
15
|
-
|
12
|
+
switch (element.type())
|
13
|
+
{
|
14
|
+
case dom::element_type::ARRAY:
|
16
15
|
{
|
17
16
|
VALUE ary = rb_ary_new();
|
18
17
|
for (dom::element x : element)
|
19
18
|
{
|
20
|
-
VALUE e = make_ruby_object(x);
|
19
|
+
VALUE e = make_ruby_object(x, symbolize_keys);
|
21
20
|
rb_ary_push(ary, e);
|
22
21
|
}
|
23
22
|
return ary;
|
24
23
|
}
|
25
|
-
|
24
|
+
case dom::element_type::OBJECT:
|
26
25
|
{
|
27
26
|
VALUE hash = rb_hash_new();
|
28
27
|
for (dom::key_value_pair field : dom::object(element))
|
29
28
|
{
|
30
29
|
std::string_view view(field.key);
|
31
|
-
VALUE k =
|
32
|
-
|
33
|
-
|
30
|
+
VALUE k = rb_utf8_str_new(view.data(), view.size());
|
31
|
+
if (symbolize_keys)
|
32
|
+
{
|
33
|
+
k = ID2SYM(rb_intern_str(k));
|
34
|
+
}
|
35
|
+
VALUE v = make_ruby_object(field.value, symbolize_keys);
|
36
|
+
rb_hash_aset(hash, k, v);
|
34
37
|
}
|
35
38
|
return hash;
|
36
39
|
}
|
37
|
-
|
40
|
+
case dom::element_type::INT64:
|
38
41
|
{
|
39
42
|
return LONG2NUM(element.get<int64_t>());
|
40
43
|
}
|
41
|
-
|
44
|
+
case dom::element_type::UINT64:
|
42
45
|
{
|
43
46
|
return ULONG2NUM(element.get<uint64_t>());
|
44
47
|
}
|
45
|
-
|
48
|
+
case dom::element_type::DOUBLE:
|
46
49
|
{
|
47
50
|
return DBL2NUM(double(element));
|
48
51
|
}
|
49
|
-
|
52
|
+
case dom::element_type::STRING:
|
50
53
|
{
|
51
54
|
std::string_view view(element);
|
52
|
-
return
|
55
|
+
return rb_utf8_str_new(view.data(), view.size());
|
53
56
|
}
|
54
|
-
|
57
|
+
case dom::element_type::BOOL:
|
55
58
|
{
|
56
59
|
return bool(element) ? Qtrue : Qfalse;
|
57
60
|
}
|
58
|
-
|
61
|
+
case dom::element_type::NULL_VALUE:
|
59
62
|
{
|
60
63
|
return Qnil;
|
61
64
|
}
|
65
|
+
}
|
62
66
|
// unknown case (bug)
|
63
67
|
rb_raise(rb_eException, "[BUG] must not happen");
|
64
68
|
}
|
65
69
|
|
66
|
-
static VALUE rb_fast_jsonparser_parse(VALUE self, VALUE arg)
|
70
|
+
static VALUE rb_fast_jsonparser_parse(VALUE self, VALUE arg, VALUE symbolize_keys)
|
67
71
|
{
|
68
72
|
Check_Type(arg, T_STRING);
|
69
73
|
|
70
74
|
dom::parser parser;
|
71
75
|
auto [doc, error] = parser.parse(RSTRING_PTR(arg), RSTRING_LEN(arg));
|
72
|
-
if (error
|
76
|
+
if (error != SUCCESS)
|
73
77
|
{
|
74
|
-
|
78
|
+
rb_raise(rb_eFastJsonparserParseError, "%s", error_message(error));
|
75
79
|
}
|
76
|
-
|
77
|
-
rb_raise(rb_eFastJsonparserParseError, "parse error");
|
78
|
-
return Qnil;
|
80
|
+
return make_ruby_object(doc, RTEST(symbolize_keys));
|
79
81
|
}
|
80
82
|
|
81
|
-
static VALUE rb_fast_jsonparser_load(VALUE self, VALUE arg)
|
83
|
+
static VALUE rb_fast_jsonparser_load(VALUE self, VALUE arg, VALUE symbolize_keys)
|
82
84
|
{
|
83
85
|
Check_Type(arg, T_STRING);
|
84
86
|
|
85
87
|
dom::parser parser;
|
86
88
|
auto [doc, error] = parser.load(RSTRING_PTR(arg));
|
87
|
-
if (error
|
89
|
+
if (error != SUCCESS)
|
88
90
|
{
|
89
|
-
|
91
|
+
rb_raise(rb_eFastJsonparserParseError, "%s", error_message(error));
|
90
92
|
}
|
91
|
-
|
92
|
-
rb_raise(rb_eFastJsonparserParseError, "parse error");
|
93
|
-
return Qnil;
|
93
|
+
return make_ruby_object(doc, RTEST(symbolize_keys));
|
94
94
|
}
|
95
95
|
|
96
|
-
static VALUE rb_fast_jsonparser_load_many(VALUE self, VALUE arg)
|
96
|
+
static VALUE rb_fast_jsonparser_load_many(VALUE self, VALUE arg, VALUE symbolize_keys, VALUE batch_size)
|
97
97
|
{
|
98
98
|
Check_Type(arg, T_STRING);
|
99
|
+
Check_Type(batch_size, T_FIXNUM);
|
99
100
|
|
100
|
-
|
101
|
-
auto [docs, error] = parser.load_many(RSTRING_PTR(arg));
|
102
|
-
if (error == SUCCESS)
|
101
|
+
try
|
103
102
|
{
|
103
|
+
dom::parser parser;
|
104
|
+
auto [docs, error] = parser.load_many(RSTRING_PTR(arg), FIX2INT(batch_size));
|
105
|
+
if (error != SUCCESS)
|
106
|
+
{
|
107
|
+
rb_raise(rb_eFastJsonparserParseError, "%s", error_message(error));
|
108
|
+
}
|
109
|
+
|
104
110
|
for (dom::element doc : docs)
|
105
111
|
{
|
106
|
-
|
107
|
-
{
|
108
|
-
rb_yield(make_ruby_object(doc));
|
109
|
-
}
|
112
|
+
rb_yield(make_ruby_object(doc, RTEST(symbolize_keys)));
|
110
113
|
}
|
114
|
+
|
111
115
|
return Qnil;
|
112
116
|
}
|
113
|
-
|
114
|
-
|
117
|
+
catch (simdjson::simdjson_error error)
|
118
|
+
{
|
119
|
+
rb_raise(rb_eFastJsonparserUnknownError, "%s", error.what());
|
120
|
+
}
|
115
121
|
}
|
116
122
|
|
117
123
|
extern "C"
|
@@ -119,10 +125,15 @@ extern "C"
|
|
119
125
|
|
120
126
|
void Init_fast_jsonparser(void)
|
121
127
|
{
|
122
|
-
rb_mFastJsonparser =
|
123
|
-
|
124
|
-
rb_define_module_function(rb_mFastJsonparser, "
|
125
|
-
rb_define_module_function(rb_mFastJsonparser, "
|
126
|
-
rb_define_module_function(rb_mFastJsonparser, "
|
128
|
+
VALUE rb_mFastJsonparser = rb_const_get(rb_cObject, rb_intern("FastJsonparser"));
|
129
|
+
|
130
|
+
rb_define_module_function(rb_mFastJsonparser, "_parse", reinterpret_cast<VALUE (*)(...)>(rb_fast_jsonparser_parse), 2);
|
131
|
+
rb_define_module_function(rb_mFastJsonparser, "_load", reinterpret_cast<VALUE (*)(...)>(rb_fast_jsonparser_load), 2);
|
132
|
+
rb_define_module_function(rb_mFastJsonparser, "_load_many", reinterpret_cast<VALUE (*)(...)>(rb_fast_jsonparser_load_many), 3);
|
133
|
+
|
134
|
+
rb_eFastJsonparserParseError = rb_const_get(rb_mFastJsonparser, rb_intern("ParseError"));
|
135
|
+
rb_global_variable(&rb_eFastJsonparserParseError);
|
136
|
+
rb_eFastJsonparserUnknownError = rb_const_get(rb_mFastJsonparser, rb_intern("UnknownError"));
|
137
|
+
rb_global_variable(&rb_eFastJsonparserUnknownError);
|
127
138
|
}
|
128
139
|
}
|
data/lib/fast_jsonparser.rb
CHANGED
@@ -1,8 +1,36 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "fast_jsonparser/version"
|
2
4
|
|
3
5
|
module FastJsonparser
|
4
|
-
|
5
|
-
|
6
|
-
|
6
|
+
Error = Class.new(StandardError)
|
7
|
+
ParseError = Class.new(Error)
|
8
|
+
UnknownError = Class.new(Error)
|
9
|
+
BatchSizeTooSmall = Class.new(Error)
|
10
|
+
|
11
|
+
DEFAULT_BATCH_SIZE = 1_000_000 # from include/simdjson/dom/parser.h
|
12
|
+
|
13
|
+
class << self
|
14
|
+
def parse(source, symbolize_keys: true)
|
15
|
+
_parse(source, symbolize_keys)
|
16
|
+
end
|
7
17
|
|
8
|
-
|
18
|
+
def load(source, symbolize_keys: true)
|
19
|
+
_load(source, symbolize_keys)
|
20
|
+
end
|
21
|
+
|
22
|
+
def load_many(source, symbolize_keys: true, batch_size: DEFAULT_BATCH_SIZE, &block)
|
23
|
+
_load_many(source, symbolize_keys, batch_size, &block)
|
24
|
+
rescue UnknownError => error
|
25
|
+
case error.message
|
26
|
+
when "This parser can't support a document that big"
|
27
|
+
raise BatchSizeTooSmall, "One of the documents was bigger than the batch size (#{batch_size}B), try increasing it."
|
28
|
+
else
|
29
|
+
raise
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
require "fast_jsonparser/fast_jsonparser" # loads cpp extension
|
34
|
+
private :_parse, :_load, :_load_many
|
35
|
+
end
|
36
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fast_jsonparser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Anil Maurya
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-07-
|
11
|
+
date: 2020-07-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -104,6 +104,7 @@ extra_rdoc_files: []
|
|
104
104
|
files:
|
105
105
|
- ".gitignore"
|
106
106
|
- ".travis.yml"
|
107
|
+
- CHANGELOG.md
|
107
108
|
- CODE_OF_CONDUCT.md
|
108
109
|
- Gemfile
|
109
110
|
- Gemfile.lock
|
@@ -119,7 +120,6 @@ files:
|
|
119
120
|
- ext/fast_jsonparser/simdjson.h
|
120
121
|
- fast_jsonparser.gemspec
|
121
122
|
- lib/fast_jsonparser.rb
|
122
|
-
- lib/fast_jsonparser/fast_jsonparser.bundle
|
123
123
|
- lib/fast_jsonparser/version.rb
|
124
124
|
homepage: https://github.com/anilmaurya/fast_jsonparser
|
125
125
|
licenses:
|
Binary file
|