yaji 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/HISTORY.markdown +7 -0
- data/README.markdown +27 -0
- data/ext/yaji/parser_ext.c +55 -29
- data/ext/yaji/parser_ext.h +3 -1
- data/lib/yaji/version.rb +1 -1
- data/test/test_parser.rb +9 -0
- data/yaji.gemspec +1 -0
- metadata +17 -5
data/HISTORY.markdown
ADDED
data/README.markdown
CHANGED
@@ -3,6 +3,18 @@ Yet another JSON iterator
|
|
3
3
|
|
4
4
|
YAJI is a ruby wrapper to YAJL providing iterator interface to streaming JSON parser.
|
5
5
|
|
6
|
+
INSTALL
|
7
|
+
-------
|
8
|
+
|
9
|
+
This gem depend on [yajl][1]. So you need development headers installed
|
10
|
+
on your system to build this gem. For Debian GNU/Linux family it will be something like:
|
11
|
+
|
12
|
+
sudo apt-get install libyajl-dev
|
13
|
+
|
14
|
+
Now you ready to install YAJI gem:
|
15
|
+
|
16
|
+
gem install yaji
|
17
|
+
|
6
18
|
USAGE
|
7
19
|
-----
|
8
20
|
|
@@ -13,6 +25,17 @@ YAJI::Parser initializer accepts `IO` instance or `String`.
|
|
13
25
|
YAJI::Parser.new('{"foo":"bar"}')
|
14
26
|
YAJI::Parser.new(File.open('data.json'))
|
15
27
|
|
28
|
+
There is integration with [curb][2], so you can pass `Curl::Easy` instance to
|
29
|
+
as input for parser.
|
30
|
+
|
31
|
+
require 'curl'
|
32
|
+
curl = Curl::Easy.new('http://avsej.net/test.json')
|
33
|
+
parser = YAJI::Parser.new(curl)
|
34
|
+
parser.each.to_a.first #=> {"foo"=>"bar", "baz"=>{"nums"=>[42, 3.1415]}}
|
35
|
+
|
36
|
+
There no strict requirement though, it could be any instance responding
|
37
|
+
to `#on_body` and `#perform`.
|
38
|
+
|
16
39
|
Parser instance provides two iterators to get JSON data: event-oriented
|
17
40
|
and object-oriented. `YAJI::Parser#parse` yields tuple `[path, event,
|
18
41
|
value] describing some parser event. For example, this code
|
@@ -66,6 +89,7 @@ code above will print two lines:
|
|
66
89
|
You can use this iterator when the data is huge and you'd like to allow
|
67
90
|
GC to collect yielded object before parser finish its job.
|
68
91
|
|
92
|
+
|
69
93
|
LICENSE
|
70
94
|
-------
|
71
95
|
|
@@ -82,3 +106,6 @@ distributed under the License is distributed on an "AS IS" BASIS,
|
|
82
106
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
83
107
|
See the License for the specific language governing permissions and
|
84
108
|
limitations under the License.
|
109
|
+
|
110
|
+
[1]: http://lloyd.github.com/yajl/
|
111
|
+
[2]: https://rubygems.org/gems/curb/
|
data/ext/yaji/parser_ext.c
CHANGED
@@ -22,8 +22,21 @@
|
|
22
22
|
|
23
23
|
#define RB_P(OBJ) \
|
24
24
|
rb_funcall(rb_stderr, rb_intern("print"), 1, rb_funcall(OBJ, rb_intern("object_id"), 0)); \
|
25
|
+
rb_funcall(rb_stderr, rb_intern("print"), 1, rb_str_new_cstr(" ")); \
|
26
|
+
rb_funcall(rb_stderr, rb_intern("print"), 1, rb_funcall(OBJ, rb_intern("class"), 0)); \
|
27
|
+
rb_funcall(rb_stderr, rb_intern("print"), 1, rb_str_new_cstr(" ")); \
|
25
28
|
rb_funcall(rb_stderr, rb_intern("puts"), 1, rb_funcall(OBJ, rb_intern("inspect"), 0));
|
26
29
|
|
30
|
+
#define RERAISE_PARSER_ERROR(parser) \
|
31
|
+
{ \
|
32
|
+
unsigned char* emsg = yajl_get_error(parser->handle, 1, \
|
33
|
+
(const unsigned char*)RSTRING_PTR(p->chunk), \
|
34
|
+
RSTRING_LEN(p->chunk)); \
|
35
|
+
VALUE errobj = rb_exc_new2(c_parse_error, (const char*) emsg); \
|
36
|
+
yajl_free_error(parser->handle, emsg); \
|
37
|
+
rb_exc_raise(errobj); \
|
38
|
+
}
|
39
|
+
|
27
40
|
static int yaji_null(void *ctx)
|
28
41
|
{
|
29
42
|
yaji_parser* p = (yaji_parser*) DATA_PTR(ctx);
|
@@ -143,6 +156,26 @@ static int yaji_end_array(void *ctx)
|
|
143
156
|
return STATUS_CONTINUE;
|
144
157
|
}
|
145
158
|
|
159
|
+
static VALUE rb_yaji_parser_parse_chunk(VALUE chunk, VALUE self)
|
160
|
+
{
|
161
|
+
yajl_status rc;
|
162
|
+
yaji_parser* p = (yaji_parser*) DATA_PTR(self);
|
163
|
+
const char* buf = RSTRING_PTR(chunk);
|
164
|
+
unsigned int len = RSTRING_LEN(chunk);
|
165
|
+
int i;
|
166
|
+
|
167
|
+
p->events = rb_ary_new();
|
168
|
+
p->chunk = chunk;
|
169
|
+
rc = yajl_parse(p->handle, (const unsigned char*)buf, len);
|
170
|
+
if (rc == yajl_status_error) {
|
171
|
+
RERAISE_PARSER_ERROR(p);
|
172
|
+
}
|
173
|
+
for (i=0; i<RARRAY_LEN(p->events); i++) {
|
174
|
+
rb_funcall(p->parser_cb, id_call, 1, RARRAY_PTR(p->events)[i]);
|
175
|
+
}
|
176
|
+
return rb_funcall(chunk, id_bytesize, 0, NULL);
|
177
|
+
}
|
178
|
+
|
146
179
|
static VALUE rb_yaji_parser_new(int argc, VALUE *argv, VALUE klass)
|
147
180
|
{
|
148
181
|
yaji_parser* p;
|
@@ -155,12 +188,16 @@ static VALUE rb_yaji_parser_new(int argc, VALUE *argv, VALUE klass)
|
|
155
188
|
p->symbolize_keys = 0;
|
156
189
|
p->rbufsize = Qnil;
|
157
190
|
p->input = Qnil;
|
191
|
+
p->parser_cb = Qnil;
|
158
192
|
|
159
193
|
rb_scan_args(argc, argv, "11", &p->input, &opts);
|
160
194
|
if (TYPE(p->input) == T_STRING) {
|
161
195
|
p->input = rb_class_new_instance(1, &p->input, c_stringio);
|
196
|
+
} else if (rb_respond_to(p->input, id_perform) && rb_respond_to(p->input, id_on_body)) {
|
197
|
+
rb_block_call(p->input, id_on_body, 0, NULL, rb_yaji_parser_parse_chunk, obj);
|
162
198
|
} else if (!rb_respond_to(p->input, id_read)) {
|
163
|
-
rb_raise(c_parse_error, "input must be a String or IO"
|
199
|
+
rb_raise(c_parse_error, "input must be a String or IO or "
|
200
|
+
"something responding to #perform and #on_body e.g. Curl::Easy");
|
164
201
|
}
|
165
202
|
if (!NIL_P(opts)) {
|
166
203
|
Check_Type(opts, T_HASH);
|
@@ -190,50 +227,35 @@ static VALUE rb_yaji_parser_init(int argc, VALUE *argv, VALUE self)
|
|
190
227
|
return self;
|
191
228
|
}
|
192
229
|
|
193
|
-
#define RERAISE_PARSER_ERROR(parser, chunk, len) \
|
194
|
-
{ \
|
195
|
-
unsigned char* emsg = yajl_get_error(parser, 1, chunk, len); \
|
196
|
-
VALUE errobj = rb_exc_new2(c_parse_error, (const char*) emsg); \
|
197
|
-
yajl_free_error(parser, emsg); \
|
198
|
-
rb_exc_raise(errobj); \
|
199
|
-
}
|
200
|
-
|
201
230
|
static VALUE rb_yaji_parser_parse(int argc, VALUE* argv, VALUE self)
|
202
231
|
{
|
203
232
|
yajl_status rc;
|
204
|
-
int i;
|
205
233
|
yaji_parser* p = (yaji_parser*) DATA_PTR(self);
|
206
|
-
|
207
|
-
const char* chunk = NULL;
|
208
|
-
unsigned int len = 0;
|
234
|
+
int i;
|
209
235
|
|
210
|
-
rb_scan_args(argc, argv, "00&", &
|
236
|
+
rb_scan_args(argc, argv, "00&", &p->parser_cb);
|
211
237
|
RETURN_ENUMERATOR(self, argc, argv);
|
212
238
|
|
213
|
-
rbuf = rb_str_new(NULL, 0);
|
214
239
|
p->path = rb_ary_new();
|
215
240
|
p->path_str = rb_str_new("", 0);
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
}
|
225
|
-
for (i=0; i<RARRAY_LEN(p->events); i++) {
|
226
|
-
rb_funcall(proc, id_call, 1, RARRAY_PTR(p->events)[i]);
|
241
|
+
p->chunk = Qnil;
|
242
|
+
|
243
|
+
if (rb_respond_to(p->input, id_perform)) {
|
244
|
+
rb_funcall(p->input, id_perform, 0);
|
245
|
+
} else {
|
246
|
+
p->chunk = rb_str_new(NULL, 0);
|
247
|
+
while (rb_funcall(p->input, id_read, 2, p->rbufsize, p->chunk) != Qnil) {
|
248
|
+
rb_yaji_parser_parse_chunk(p->chunk, self);
|
227
249
|
}
|
228
250
|
}
|
229
251
|
|
230
252
|
p->events = rb_ary_new();
|
231
253
|
rc = yajl_parse_complete(p->handle);
|
232
254
|
if (rc == yajl_status_insufficient_data || rc == yajl_status_error) {
|
233
|
-
RERAISE_PARSER_ERROR(p
|
255
|
+
RERAISE_PARSER_ERROR(p);
|
234
256
|
}
|
235
257
|
for (i=0; i<RARRAY_LEN(p->events); i++) {
|
236
|
-
rb_funcall(
|
258
|
+
rb_funcall(p->parser_cb, id_call, 1, RARRAY_PTR(p->events)[i]);
|
237
259
|
}
|
238
260
|
|
239
261
|
return Qnil;
|
@@ -326,6 +348,8 @@ static void rb_yaji_parser_mark(void *parser)
|
|
326
348
|
rb_gc_mark(p->events);
|
327
349
|
rb_gc_mark(p->path);
|
328
350
|
rb_gc_mark(p->path_str);
|
351
|
+
rb_gc_mark(p->parser_cb);
|
352
|
+
rb_gc_mark(p->chunk);
|
329
353
|
}
|
330
354
|
}
|
331
355
|
|
@@ -345,7 +369,9 @@ void Init_parser_ext() {
|
|
345
369
|
id_call = rb_intern("call");
|
346
370
|
id_read = rb_intern("read");
|
347
371
|
id_parse = rb_intern("parse");
|
348
|
-
|
372
|
+
id_perform = rb_intern("perform");
|
373
|
+
id_on_body = rb_intern("on_body");
|
374
|
+
id_bytesize = rb_intern("bytesize");
|
349
375
|
|
350
376
|
sym_allow_comments = ID2SYM(rb_intern("allow_comments"));
|
351
377
|
sym_check_utf8 = ID2SYM(rb_intern("check_utf8"));
|
data/ext/yaji/parser_ext.h
CHANGED
@@ -41,7 +41,7 @@ static rb_encoding *utf8_encoding;
|
|
41
41
|
|
42
42
|
static VALUE m_yaji, c_yaji_parser, c_parse_error, c_stringio;
|
43
43
|
|
44
|
-
static ID id_call, id_read, id_parse,
|
44
|
+
static ID id_call, id_read, id_parse, id_perform, id_on_body, id_bytesize;
|
45
45
|
static ID sym_allow_comments, sym_check_utf8, sym_symbolize_keys,
|
46
46
|
sym_read_buffer_size, sym_null, sym_boolean, sym_number, sym_string,
|
47
47
|
sym_hash_key, sym_start_hash, sym_end_hash, sym_start_array,
|
@@ -79,6 +79,8 @@ typedef struct {
|
|
79
79
|
VALUE events;
|
80
80
|
VALUE path;
|
81
81
|
VALUE path_str;
|
82
|
+
VALUE parser_cb;
|
83
|
+
VALUE chunk;
|
82
84
|
yajl_handle handle;
|
83
85
|
yajl_parser_config config;
|
84
86
|
} yaji_parser;
|
data/lib/yaji/version.rb
CHANGED
data/test/test_parser.rb
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
require 'minitest/autorun'
|
2
2
|
require 'yaji'
|
3
|
+
require 'curb'
|
3
4
|
|
4
5
|
class TestParser < MiniTest::Unit::TestCase
|
5
6
|
|
@@ -136,6 +137,14 @@ class TestParser < MiniTest::Unit::TestCase
|
|
136
137
|
assert expected, objects
|
137
138
|
end
|
138
139
|
|
140
|
+
def test_it_could_curb_async_approach
|
141
|
+
curl = Curl::Easy.new('http://avsej.net/test.json')
|
142
|
+
parser = YAJI::Parser.new(curl)
|
143
|
+
object = parser.each.to_a.first
|
144
|
+
expected = {"foo"=>"bar", "baz"=>{"nums"=>[42, 3.1415]}}
|
145
|
+
assert expected, object
|
146
|
+
end
|
147
|
+
|
139
148
|
protected
|
140
149
|
|
141
150
|
def toys_json_str
|
data/yaji.gemspec
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: yaji
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -13,7 +13,7 @@ date: 2011-08-19 00:00:00.000000000Z
|
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rake-compiler
|
16
|
-
requirement: &
|
16
|
+
requirement: &8959040 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: '0'
|
22
22
|
type: :development
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *8959040
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: minitest
|
27
|
-
requirement: &
|
27
|
+
requirement: &8958360 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ! '>='
|
@@ -32,7 +32,18 @@ dependencies:
|
|
32
32
|
version: '0'
|
33
33
|
type: :development
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *8958360
|
36
|
+
- !ruby/object:Gem::Dependency
|
37
|
+
name: curb
|
38
|
+
requirement: &8957820 !ruby/object:Gem::Requirement
|
39
|
+
none: false
|
40
|
+
requirements:
|
41
|
+
- - ! '>='
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: '0'
|
44
|
+
type: :development
|
45
|
+
prerelease: false
|
46
|
+
version_requirements: *8957820
|
36
47
|
description: YAJI is a ruby wrapper to YAJL providing iterator interface to streaming
|
37
48
|
JSON parser
|
38
49
|
email: info@couchbase.com
|
@@ -43,6 +54,7 @@ extra_rdoc_files: []
|
|
43
54
|
files:
|
44
55
|
- .gitignore
|
45
56
|
- Gemfile
|
57
|
+
- HISTORY.markdown
|
46
58
|
- LICENSE
|
47
59
|
- README.markdown
|
48
60
|
- Rakefile
|