yaji 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/HISTORY.markdown +7 -0
- data/README.markdown +27 -0
- data/ext/yaji/parser_ext.c +55 -29
- data/ext/yaji/parser_ext.h +3 -1
- data/lib/yaji/version.rb +1 -1
- data/test/test_parser.rb +9 -0
- data/yaji.gemspec +1 -0
- metadata +17 -5
data/HISTORY.markdown
ADDED
data/README.markdown
CHANGED
@@ -3,6 +3,18 @@ Yet another JSON iterator
|
|
3
3
|
|
4
4
|
YAJI is a ruby wrapper to YAJL providing iterator interface to streaming JSON parser.
|
5
5
|
|
6
|
+
INSTALL
|
7
|
+
-------
|
8
|
+
|
9
|
+
This gem depend on [yajl][1]. So you need development headers installed
|
10
|
+
on your system to build this gem. For Debian GNU/Linux family it will be something like:
|
11
|
+
|
12
|
+
sudo apt-get install libyajl-dev
|
13
|
+
|
14
|
+
Now you ready to install YAJI gem:
|
15
|
+
|
16
|
+
gem install yaji
|
17
|
+
|
6
18
|
USAGE
|
7
19
|
-----
|
8
20
|
|
@@ -13,6 +25,17 @@ YAJI::Parser initializer accepts `IO` instance or `String`.
|
|
13
25
|
YAJI::Parser.new('{"foo":"bar"}')
|
14
26
|
YAJI::Parser.new(File.open('data.json'))
|
15
27
|
|
28
|
+
There is integration with [curb][2], so you can pass `Curl::Easy` instance to
|
29
|
+
as input for parser.
|
30
|
+
|
31
|
+
require 'curl'
|
32
|
+
curl = Curl::Easy.new('http://avsej.net/test.json')
|
33
|
+
parser = YAJI::Parser.new(curl)
|
34
|
+
parser.each.to_a.first #=> {"foo"=>"bar", "baz"=>{"nums"=>[42, 3.1415]}}
|
35
|
+
|
36
|
+
There no strict requirement though, it could be any instance responding
|
37
|
+
to `#on_body` and `#perform`.
|
38
|
+
|
16
39
|
Parser instance provides two iterators to get JSON data: event-oriented
|
17
40
|
and object-oriented. `YAJI::Parser#parse` yields tuple `[path, event,
|
18
41
|
value] describing some parser event. For example, this code
|
@@ -66,6 +89,7 @@ code above will print two lines:
|
|
66
89
|
You can use this iterator when the data is huge and you'd like to allow
|
67
90
|
GC to collect yielded object before parser finish its job.
|
68
91
|
|
92
|
+
|
69
93
|
LICENSE
|
70
94
|
-------
|
71
95
|
|
@@ -82,3 +106,6 @@ distributed under the License is distributed on an "AS IS" BASIS,
|
|
82
106
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
83
107
|
See the License for the specific language governing permissions and
|
84
108
|
limitations under the License.
|
109
|
+
|
110
|
+
[1]: http://lloyd.github.com/yajl/
|
111
|
+
[2]: https://rubygems.org/gems/curb/
|
data/ext/yaji/parser_ext.c
CHANGED
@@ -22,8 +22,21 @@
|
|
22
22
|
|
23
23
|
#define RB_P(OBJ) \
|
24
24
|
rb_funcall(rb_stderr, rb_intern("print"), 1, rb_funcall(OBJ, rb_intern("object_id"), 0)); \
|
25
|
+
rb_funcall(rb_stderr, rb_intern("print"), 1, rb_str_new_cstr(" ")); \
|
26
|
+
rb_funcall(rb_stderr, rb_intern("print"), 1, rb_funcall(OBJ, rb_intern("class"), 0)); \
|
27
|
+
rb_funcall(rb_stderr, rb_intern("print"), 1, rb_str_new_cstr(" ")); \
|
25
28
|
rb_funcall(rb_stderr, rb_intern("puts"), 1, rb_funcall(OBJ, rb_intern("inspect"), 0));
|
26
29
|
|
30
|
+
#define RERAISE_PARSER_ERROR(parser) \
|
31
|
+
{ \
|
32
|
+
unsigned char* emsg = yajl_get_error(parser->handle, 1, \
|
33
|
+
(const unsigned char*)RSTRING_PTR(p->chunk), \
|
34
|
+
RSTRING_LEN(p->chunk)); \
|
35
|
+
VALUE errobj = rb_exc_new2(c_parse_error, (const char*) emsg); \
|
36
|
+
yajl_free_error(parser->handle, emsg); \
|
37
|
+
rb_exc_raise(errobj); \
|
38
|
+
}
|
39
|
+
|
27
40
|
static int yaji_null(void *ctx)
|
28
41
|
{
|
29
42
|
yaji_parser* p = (yaji_parser*) DATA_PTR(ctx);
|
@@ -143,6 +156,26 @@ static int yaji_end_array(void *ctx)
|
|
143
156
|
return STATUS_CONTINUE;
|
144
157
|
}
|
145
158
|
|
159
|
+
static VALUE rb_yaji_parser_parse_chunk(VALUE chunk, VALUE self)
|
160
|
+
{
|
161
|
+
yajl_status rc;
|
162
|
+
yaji_parser* p = (yaji_parser*) DATA_PTR(self);
|
163
|
+
const char* buf = RSTRING_PTR(chunk);
|
164
|
+
unsigned int len = RSTRING_LEN(chunk);
|
165
|
+
int i;
|
166
|
+
|
167
|
+
p->events = rb_ary_new();
|
168
|
+
p->chunk = chunk;
|
169
|
+
rc = yajl_parse(p->handle, (const unsigned char*)buf, len);
|
170
|
+
if (rc == yajl_status_error) {
|
171
|
+
RERAISE_PARSER_ERROR(p);
|
172
|
+
}
|
173
|
+
for (i=0; i<RARRAY_LEN(p->events); i++) {
|
174
|
+
rb_funcall(p->parser_cb, id_call, 1, RARRAY_PTR(p->events)[i]);
|
175
|
+
}
|
176
|
+
return rb_funcall(chunk, id_bytesize, 0, NULL);
|
177
|
+
}
|
178
|
+
|
146
179
|
static VALUE rb_yaji_parser_new(int argc, VALUE *argv, VALUE klass)
|
147
180
|
{
|
148
181
|
yaji_parser* p;
|
@@ -155,12 +188,16 @@ static VALUE rb_yaji_parser_new(int argc, VALUE *argv, VALUE klass)
|
|
155
188
|
p->symbolize_keys = 0;
|
156
189
|
p->rbufsize = Qnil;
|
157
190
|
p->input = Qnil;
|
191
|
+
p->parser_cb = Qnil;
|
158
192
|
|
159
193
|
rb_scan_args(argc, argv, "11", &p->input, &opts);
|
160
194
|
if (TYPE(p->input) == T_STRING) {
|
161
195
|
p->input = rb_class_new_instance(1, &p->input, c_stringio);
|
196
|
+
} else if (rb_respond_to(p->input, id_perform) && rb_respond_to(p->input, id_on_body)) {
|
197
|
+
rb_block_call(p->input, id_on_body, 0, NULL, rb_yaji_parser_parse_chunk, obj);
|
162
198
|
} else if (!rb_respond_to(p->input, id_read)) {
|
163
|
-
rb_raise(c_parse_error, "input must be a String or IO"
|
199
|
+
rb_raise(c_parse_error, "input must be a String or IO or "
|
200
|
+
"something responding to #perform and #on_body e.g. Curl::Easy");
|
164
201
|
}
|
165
202
|
if (!NIL_P(opts)) {
|
166
203
|
Check_Type(opts, T_HASH);
|
@@ -190,50 +227,35 @@ static VALUE rb_yaji_parser_init(int argc, VALUE *argv, VALUE self)
|
|
190
227
|
return self;
|
191
228
|
}
|
192
229
|
|
193
|
-
#define RERAISE_PARSER_ERROR(parser, chunk, len) \
|
194
|
-
{ \
|
195
|
-
unsigned char* emsg = yajl_get_error(parser, 1, chunk, len); \
|
196
|
-
VALUE errobj = rb_exc_new2(c_parse_error, (const char*) emsg); \
|
197
|
-
yajl_free_error(parser, emsg); \
|
198
|
-
rb_exc_raise(errobj); \
|
199
|
-
}
|
200
|
-
|
201
230
|
static VALUE rb_yaji_parser_parse(int argc, VALUE* argv, VALUE self)
|
202
231
|
{
|
203
232
|
yajl_status rc;
|
204
|
-
int i;
|
205
233
|
yaji_parser* p = (yaji_parser*) DATA_PTR(self);
|
206
|
-
|
207
|
-
const char* chunk = NULL;
|
208
|
-
unsigned int len = 0;
|
234
|
+
int i;
|
209
235
|
|
210
|
-
rb_scan_args(argc, argv, "00&", &
|
236
|
+
rb_scan_args(argc, argv, "00&", &p->parser_cb);
|
211
237
|
RETURN_ENUMERATOR(self, argc, argv);
|
212
238
|
|
213
|
-
rbuf = rb_str_new(NULL, 0);
|
214
239
|
p->path = rb_ary_new();
|
215
240
|
p->path_str = rb_str_new("", 0);
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
}
|
225
|
-
for (i=0; i<RARRAY_LEN(p->events); i++) {
|
226
|
-
rb_funcall(proc, id_call, 1, RARRAY_PTR(p->events)[i]);
|
241
|
+
p->chunk = Qnil;
|
242
|
+
|
243
|
+
if (rb_respond_to(p->input, id_perform)) {
|
244
|
+
rb_funcall(p->input, id_perform, 0);
|
245
|
+
} else {
|
246
|
+
p->chunk = rb_str_new(NULL, 0);
|
247
|
+
while (rb_funcall(p->input, id_read, 2, p->rbufsize, p->chunk) != Qnil) {
|
248
|
+
rb_yaji_parser_parse_chunk(p->chunk, self);
|
227
249
|
}
|
228
250
|
}
|
229
251
|
|
230
252
|
p->events = rb_ary_new();
|
231
253
|
rc = yajl_parse_complete(p->handle);
|
232
254
|
if (rc == yajl_status_insufficient_data || rc == yajl_status_error) {
|
233
|
-
RERAISE_PARSER_ERROR(p
|
255
|
+
RERAISE_PARSER_ERROR(p);
|
234
256
|
}
|
235
257
|
for (i=0; i<RARRAY_LEN(p->events); i++) {
|
236
|
-
rb_funcall(
|
258
|
+
rb_funcall(p->parser_cb, id_call, 1, RARRAY_PTR(p->events)[i]);
|
237
259
|
}
|
238
260
|
|
239
261
|
return Qnil;
|
@@ -326,6 +348,8 @@ static void rb_yaji_parser_mark(void *parser)
|
|
326
348
|
rb_gc_mark(p->events);
|
327
349
|
rb_gc_mark(p->path);
|
328
350
|
rb_gc_mark(p->path_str);
|
351
|
+
rb_gc_mark(p->parser_cb);
|
352
|
+
rb_gc_mark(p->chunk);
|
329
353
|
}
|
330
354
|
}
|
331
355
|
|
@@ -345,7 +369,9 @@ void Init_parser_ext() {
|
|
345
369
|
id_call = rb_intern("call");
|
346
370
|
id_read = rb_intern("read");
|
347
371
|
id_parse = rb_intern("parse");
|
348
|
-
|
372
|
+
id_perform = rb_intern("perform");
|
373
|
+
id_on_body = rb_intern("on_body");
|
374
|
+
id_bytesize = rb_intern("bytesize");
|
349
375
|
|
350
376
|
sym_allow_comments = ID2SYM(rb_intern("allow_comments"));
|
351
377
|
sym_check_utf8 = ID2SYM(rb_intern("check_utf8"));
|
data/ext/yaji/parser_ext.h
CHANGED
@@ -41,7 +41,7 @@ static rb_encoding *utf8_encoding;
|
|
41
41
|
|
42
42
|
static VALUE m_yaji, c_yaji_parser, c_parse_error, c_stringio;
|
43
43
|
|
44
|
-
static ID id_call, id_read, id_parse,
|
44
|
+
static ID id_call, id_read, id_parse, id_perform, id_on_body, id_bytesize;
|
45
45
|
static ID sym_allow_comments, sym_check_utf8, sym_symbolize_keys,
|
46
46
|
sym_read_buffer_size, sym_null, sym_boolean, sym_number, sym_string,
|
47
47
|
sym_hash_key, sym_start_hash, sym_end_hash, sym_start_array,
|
@@ -79,6 +79,8 @@ typedef struct {
|
|
79
79
|
VALUE events;
|
80
80
|
VALUE path;
|
81
81
|
VALUE path_str;
|
82
|
+
VALUE parser_cb;
|
83
|
+
VALUE chunk;
|
82
84
|
yajl_handle handle;
|
83
85
|
yajl_parser_config config;
|
84
86
|
} yaji_parser;
|
data/lib/yaji/version.rb
CHANGED
data/test/test_parser.rb
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
require 'minitest/autorun'
|
2
2
|
require 'yaji'
|
3
|
+
require 'curb'
|
3
4
|
|
4
5
|
class TestParser < MiniTest::Unit::TestCase
|
5
6
|
|
@@ -136,6 +137,14 @@ class TestParser < MiniTest::Unit::TestCase
|
|
136
137
|
assert expected, objects
|
137
138
|
end
|
138
139
|
|
140
|
+
def test_it_could_curb_async_approach
|
141
|
+
curl = Curl::Easy.new('http://avsej.net/test.json')
|
142
|
+
parser = YAJI::Parser.new(curl)
|
143
|
+
object = parser.each.to_a.first
|
144
|
+
expected = {"foo"=>"bar", "baz"=>{"nums"=>[42, 3.1415]}}
|
145
|
+
assert expected, object
|
146
|
+
end
|
147
|
+
|
139
148
|
protected
|
140
149
|
|
141
150
|
def toys_json_str
|
data/yaji.gemspec
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: yaji
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -13,7 +13,7 @@ date: 2011-08-19 00:00:00.000000000Z
|
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rake-compiler
|
16
|
-
requirement: &
|
16
|
+
requirement: &8959040 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: '0'
|
22
22
|
type: :development
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *8959040
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: minitest
|
27
|
-
requirement: &
|
27
|
+
requirement: &8958360 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ! '>='
|
@@ -32,7 +32,18 @@ dependencies:
|
|
32
32
|
version: '0'
|
33
33
|
type: :development
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *8958360
|
36
|
+
- !ruby/object:Gem::Dependency
|
37
|
+
name: curb
|
38
|
+
requirement: &8957820 !ruby/object:Gem::Requirement
|
39
|
+
none: false
|
40
|
+
requirements:
|
41
|
+
- - ! '>='
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: '0'
|
44
|
+
type: :development
|
45
|
+
prerelease: false
|
46
|
+
version_requirements: *8957820
|
36
47
|
description: YAJI is a ruby wrapper to YAJL providing iterator interface to streaming
|
37
48
|
JSON parser
|
38
49
|
email: info@couchbase.com
|
@@ -43,6 +54,7 @@ extra_rdoc_files: []
|
|
43
54
|
files:
|
44
55
|
- .gitignore
|
45
56
|
- Gemfile
|
57
|
+
- HISTORY.markdown
|
46
58
|
- LICENSE
|
47
59
|
- README.markdown
|
48
60
|
- Rakefile
|