brianmario-yajl-ruby 0.4.8 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG.rdoc +21 -0
- data/README.rdoc +13 -0
- data/VERSION.yml +2 -2
- data/benchmark/encode.rb +4 -3
- data/benchmark/encode_json_and_marshal.rb +4 -3
- data/benchmark/encode_json_and_yaml.rb +4 -3
- data/benchmark/parse.rb +5 -4
- data/benchmark/parse_json_and_marshal.rb +4 -3
- data/benchmark/parse_json_and_yaml.rb +4 -3
- data/benchmark/parse_stream.rb +48 -0
- data/benchmark/subjects/twitter_stream.json +430 -0
- data/examples/http/twitter_search_api.rb +15 -0
- data/examples/http/twitter_stream_api.rb +24 -0
- data/examples/parsing/from_file.rb +14 -0
- data/examples/parsing/from_stdin.rb +9 -0
- data/examples/parsing/from_string.rb +15 -0
- data/ext/api/yajl_parse.h +3 -0
- data/ext/extconf.rb +2 -1
- data/ext/yajl.c +5 -0
- data/ext/yajl_ext.c +235 -122
- data/ext/yajl_ext.h +49 -36
- data/ext/yajl_lex.c +7 -0
- data/ext/yajl_lex.h +2 -0
- data/ext/yajl_parser.c +3 -1
- data/lib/yajl.rb +18 -17
- data/lib/yajl/bzip2.rb +1 -1
- data/lib/yajl/bzip2/stream_reader.rb +1 -1
- data/lib/yajl/bzip2/stream_writer.rb +1 -1
- data/lib/yajl/deflate.rb +1 -1
- data/lib/yajl/deflate/stream_reader.rb +1 -1
- data/lib/yajl/deflate/stream_writer.rb +1 -1
- data/lib/yajl/gzip.rb +1 -1
- data/lib/yajl/gzip/stream_reader.rb +1 -1
- data/lib/yajl/gzip/stream_writer.rb +1 -1
- data/lib/yajl/http_stream.rb +21 -5
- data/spec/encoding/encoding_spec.rb +14 -9
- data/spec/http/http_spec.rb +1 -5
- data/spec/parsing/active_support_spec.rb +5 -3
- data/spec/parsing/chunked_spec.rb +72 -0
- data/spec/parsing/fixtures_spec.rb +4 -2
- data/spec/parsing/one_off_spec.rb +2 -1
- data/spec/spec_helper.rb +8 -1
- data/yajl-ruby.gemspec +17 -3
- metadata +16 -2
data/ext/yajl_ext.h
CHANGED
@@ -2,45 +2,58 @@
|
|
2
2
|
#include "api/yajl_gen.h"
|
3
3
|
#include <ruby.h>
|
4
4
|
|
5
|
-
#define READ_BUFSIZE
|
6
|
-
|
7
|
-
static VALUE cParseError, mYajl, mStream, mChunked;
|
8
|
-
static ID intern_io_read, intern_eof, intern_respond_to, intern_call, intern_keys, intern_to_s;
|
9
|
-
static int readBufferSize = READ_BUFSIZE;
|
10
|
-
static yajl_parser_config cfg = {1, 1};
|
11
|
-
|
12
|
-
yajl_handle streamParser, chunkedParser;
|
13
|
-
VALUE context = Qnil;
|
14
|
-
VALUE parse_complete_callback = Qnil;
|
15
|
-
|
16
|
-
void check_and_fire_callback(void * ctx);
|
17
|
-
void set_static_value(void * ctx, VALUE val);
|
18
|
-
|
19
|
-
static int found_null(void * ctx);
|
20
|
-
static int found_boolean(void * ctx, int boolean);
|
21
|
-
static int found_number(void * ctx, const char * numberVal, unsigned int numberLen);
|
22
|
-
static int found_string(void * ctx, const unsigned char * stringVal, unsigned int stringLen);
|
23
|
-
static int found_hash_key(void * ctx, const unsigned char * stringVal, unsigned int stringLen);
|
24
|
-
static int found_start_hash(void * ctx);
|
25
|
-
static int found_end_hash(void * ctx);
|
26
|
-
static int found_start_array(void * ctx);
|
27
|
-
static int found_end_array(void * ctx);
|
5
|
+
#define READ_BUFSIZE 65536
|
28
6
|
|
7
|
+
static VALUE cParseError, mYajl, cParser, cEncoder;
|
8
|
+
static ID intern_io_read, intern_eof, intern_call, intern_keys, intern_to_s,
|
9
|
+
sym_allow_comments, sym_check_utf8, sym_pretty, sym_indent;
|
10
|
+
|
11
|
+
#define GetParser(obj, sval) (sval = (struct yajl_parser_wrapper*)DATA_PTR(obj));
|
12
|
+
#define GetEncoder(obj, sval) (sval = (yajl_gen*)DATA_PTR(obj));
|
13
|
+
|
14
|
+
void yajl_check_and_fire_callback(void * ctx);
|
15
|
+
void yajl_set_static_value(void * ctx, VALUE val);
|
16
|
+
void yajl_encode_part(yajl_gen hand, VALUE obj, VALUE io);
|
17
|
+
|
18
|
+
static int yajl_found_null(void * ctx);
|
19
|
+
static int yajl_found_boolean(void * ctx, int boolean);
|
20
|
+
static int yajl_found_number(void * ctx, const char * numberVal, unsigned int numberLen);
|
21
|
+
static int yajl_found_string(void * ctx, const unsigned char * stringVal, unsigned int stringLen);
|
22
|
+
static int yajl_found_hash_key(void * ctx, const unsigned char * stringVal, unsigned int stringLen);
|
23
|
+
static int yajl_found_start_hash(void * ctx);
|
24
|
+
static int yajl_found_end_hash(void * ctx);
|
25
|
+
static int yajl_found_start_array(void * ctx);
|
26
|
+
static int yajl_found_end_array(void * ctx);
|
29
27
|
static yajl_callbacks callbacks = {
|
30
|
-
|
31
|
-
|
28
|
+
yajl_found_null,
|
29
|
+
yajl_found_boolean,
|
32
30
|
NULL,
|
33
31
|
NULL,
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
32
|
+
yajl_found_number,
|
33
|
+
yajl_found_string,
|
34
|
+
yajl_found_start_hash,
|
35
|
+
yajl_found_hash_key,
|
36
|
+
yajl_found_end_hash,
|
37
|
+
yajl_found_start_array,
|
38
|
+
yajl_found_end_array
|
41
39
|
};
|
42
40
|
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
41
|
+
struct yajl_parser_wrapper {
|
42
|
+
VALUE builderStack;
|
43
|
+
VALUE parse_complete_callback;
|
44
|
+
int nestedArrayLevel;
|
45
|
+
int nestedHashLevel;
|
46
|
+
yajl_handle parser;
|
47
|
+
};
|
48
|
+
static void yajl_parser_wrapper_free(void * wrapper);
|
49
|
+
static void yajl_parser_wrapper_mark(void * wrapper);
|
50
|
+
|
51
|
+
static VALUE rb_yajl_parser_new(int argc, VALUE * argv, VALUE self);
|
52
|
+
static VALUE rb_yajl_parser_init(int argc, VALUE * argv, VALUE self);
|
53
|
+
static VALUE rb_yajl_parser_parse(int argc, VALUE * argv, VALUE self);
|
54
|
+
static VALUE rb_yajl_parser_parse_chunk(VALUE self, VALUE chunk);
|
55
|
+
static VALUE rb_yajl_set_complete_cb(VALUE self, VALUE callback);
|
56
|
+
|
57
|
+
static VALUE rb_yajl_encoder_new(int argc, VALUE * argv, VALUE klass);
|
58
|
+
static VALUE rb_yajl_encoder_init(int argc, VALUE * argv, VALUE self);
|
59
|
+
static VALUE rb_yajl_encoder_encode(VALUE self, VALUE obj, VALUE io);
|
data/ext/yajl_lex.c
CHANGED
@@ -129,6 +129,13 @@ yajl_lex_alloc(yajl_alloc_funcs * alloc,
|
|
129
129
|
return lxr;
|
130
130
|
}
|
131
131
|
|
132
|
+
yajl_lexer
|
133
|
+
yajl_lex_realloc(yajl_lexer orig) {
|
134
|
+
yajl_lexer newLxr = yajl_lex_alloc(orig->alloc, orig->allowComments, orig->validateUTF8);
|
135
|
+
yajl_lex_free(orig);
|
136
|
+
return newLxr;
|
137
|
+
}
|
138
|
+
|
132
139
|
void
|
133
140
|
yajl_lex_free(yajl_lexer lxr)
|
134
141
|
{
|
data/ext/yajl_lex.h
CHANGED
data/ext/yajl_parser.c
CHANGED
@@ -307,7 +307,9 @@ yajl_do_parse(yajl_handle hand, unsigned int * offset,
|
|
307
307
|
{
|
308
308
|
yajl_state s = yajl_bs_current(hand->stateStack);
|
309
309
|
if (s == yajl_state_start) {
|
310
|
-
|
310
|
+
// HACK: is this even safe to do?
|
311
|
+
// yajl_bs_set(hand->stateStack, yajl_state_parse_complete);
|
312
|
+
yajl_reset_parser(hand);
|
311
313
|
} else if (s == yajl_state_map_need_val) {
|
312
314
|
yajl_bs_set(hand->stateStack, yajl_state_map_got_val);
|
313
315
|
} else {
|
data/lib/yajl.rb
CHANGED
@@ -13,28 +13,29 @@ require 'yajl_ext'
|
|
13
13
|
#
|
14
14
|
# Ruby bindings to the excellent Yajl (Yet Another JSON Parser) ANSI C library.
|
15
15
|
module Yajl
|
16
|
-
VERSION = "0.
|
16
|
+
VERSION = "0.5.0"
|
17
17
|
|
18
|
-
# == Yajl::
|
19
|
-
#
|
20
|
-
# This module contains methods for parsing JSON in chunks.
|
21
|
-
# The use case here is that the caller may not be able to get access to the IO to which
|
22
|
-
# JSON content is being received. Rendering Yajl::Stream dead to them.
|
23
|
-
#
|
24
|
-
# With the methods in this module, the caller will be able to pass in chunks of JSON content
|
25
|
-
# until a full object has been parsed from said content.
|
26
|
-
#
|
27
|
-
# In order for this process to work correctly, the caller needs to specify a callback which
|
28
|
-
# is passed the constructed object. The only requirement currently of this callback is that
|
29
|
-
# it respond to #call and accept a single parameter (the object that was created from parsing).
|
30
|
-
module Chunked; end
|
31
|
-
|
32
|
-
# == Yajl::Stream
|
18
|
+
# == Yajl::Parser
|
33
19
|
#
|
34
20
|
# This module contains methods for parsing JSON directly from an IO object.
|
35
21
|
#
|
36
22
|
# The only basic requirment currently is that the IO object respond to #read(len) and eof?
|
37
23
|
#
|
38
24
|
# The IO is parsed until a complete JSON object has been read and a ruby object will be returned.
|
39
|
-
|
25
|
+
class Parser; end
|
26
|
+
|
27
|
+
# Deprecated
|
28
|
+
module Stream
|
29
|
+
# Deprecated
|
30
|
+
def self.parse(io)
|
31
|
+
STDERR.puts "WARNING: Yajl::Stream has be deprecated and will most likely be gone in the next release. Use the Yajl::Parser class instead."
|
32
|
+
Parser.new.parse(io)
|
33
|
+
end
|
34
|
+
|
35
|
+
# Deprecated
|
36
|
+
def self.encode(obj, io)
|
37
|
+
STDERR.puts "WARNING: Yajl::Stream has be deprecated and will most likely be gone in the next release. Use the Yajl::Encoder class instead."
|
38
|
+
Encoder.new.encode(obj, io)
|
39
|
+
end
|
40
|
+
end
|
40
41
|
end
|
data/lib/yajl/bzip2.rb
CHANGED
data/lib/yajl/deflate.rb
CHANGED
data/lib/yajl/gzip.rb
CHANGED
data/lib/yajl/http_stream.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# encoding: UTF-8
|
2
2
|
require 'socket' unless defined?(Socket)
|
3
|
-
require 'yajl' unless defined?(Yajl::
|
3
|
+
require 'yajl' unless defined?(Yajl::Parser)
|
4
4
|
|
5
5
|
module Yajl
|
6
6
|
# == Yajl::HttpStream
|
@@ -24,7 +24,7 @@ module Yajl
|
|
24
24
|
# 3. the response is read until the end of the headers
|
25
25
|
# 4. the _socket itself_ is passed directly to Yajl, for direct parsing off the stream;
|
26
26
|
# As it's being received over the wire!
|
27
|
-
def self.get(uri, opts = {})
|
27
|
+
def self.get(uri, opts = {}, &block)
|
28
28
|
user_agent = opts.has_key?(['User-Agent']) ? opts['User-Agent'] : "Yajl::HttpStream #{Yajl::VERSION}"
|
29
29
|
|
30
30
|
socket = TCPSocket.new(uri.host, uri.port)
|
@@ -61,9 +61,25 @@ module Yajl
|
|
61
61
|
end
|
62
62
|
end
|
63
63
|
end
|
64
|
-
|
64
|
+
parser = Yajl::Parser.new
|
65
65
|
if response_head[:headers]["Transfer-Encoding"] == 'chunked'
|
66
|
-
|
66
|
+
if block_given?
|
67
|
+
parser.on_parse_complete = block
|
68
|
+
chunkLeft = 0
|
69
|
+
while !socket.eof? && (size = socket.gets.hex)
|
70
|
+
next if size == 0
|
71
|
+
json = socket.read(size)
|
72
|
+
chunkLeft = size-json.size
|
73
|
+
if chunkLeft == 0
|
74
|
+
parser << json
|
75
|
+
else
|
76
|
+
# received only part of the chunk, grab the rest
|
77
|
+
parser << socket.read(chunkLeft)
|
78
|
+
end
|
79
|
+
end
|
80
|
+
else
|
81
|
+
raise Exception, "Chunked responses detected, but no block given to handle the chunks."
|
82
|
+
end
|
67
83
|
else
|
68
84
|
content_type = response_head[:headers]["Content-Type"].split('; ')
|
69
85
|
content_type = content_type.first
|
@@ -76,7 +92,7 @@ module Yajl
|
|
76
92
|
when "bzip2"
|
77
93
|
return Yajl::Bzip2::StreamReader.parse(socket)
|
78
94
|
else
|
79
|
-
return Yajl::
|
95
|
+
return Yajl::Parser.new.parse(socket)
|
80
96
|
end
|
81
97
|
else
|
82
98
|
raise InvalidContentType, "The response MIME type #{content_type}"
|
@@ -6,18 +6,23 @@ describe "Yajl JSON encoder" do
|
|
6
6
|
|
7
7
|
FILES.each do |file|
|
8
8
|
it "should encode #{File.basename(file)}" do
|
9
|
-
|
10
|
-
|
9
|
+
# we don't care about testing the stream subject as it has multiple JSON strings in it
|
10
|
+
if File.basename(file) != 'twitter_stream.json'
|
11
|
+
input = File.new(File.expand_path(file), 'r')
|
12
|
+
io = StringIO.new
|
13
|
+
parser = Yajl::Parser.new
|
14
|
+
encoder = Yajl::Encoder.new
|
11
15
|
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
+
hash = parser.parse(input)
|
17
|
+
output = encoder.encode(hash, io)
|
18
|
+
io.rewind
|
19
|
+
hash2 = parser.parse(io)
|
16
20
|
|
17
|
-
|
18
|
-
|
21
|
+
io.close
|
22
|
+
input.close
|
19
23
|
|
20
|
-
|
24
|
+
hash.should == hash2
|
25
|
+
end
|
21
26
|
end
|
22
27
|
end
|
23
28
|
end
|
data/spec/http/http_spec.rb
CHANGED
@@ -21,7 +21,7 @@ describe "Yajl HTTP GET request" do
|
|
21
21
|
@gzip = File.new(File.expand_path(File.dirname(__FILE__) + '/fixtures/http.gzip.dump'), 'r')
|
22
22
|
|
23
23
|
parse_off_headers(@raw)
|
24
|
-
@raw_template_hash = Yajl::
|
24
|
+
@raw_template_hash = Yajl::Parser.new.parse(@raw)
|
25
25
|
@raw.rewind
|
26
26
|
end
|
27
27
|
|
@@ -32,10 +32,6 @@ describe "Yajl HTTP GET request" do
|
|
32
32
|
@gzip.close unless @gzip.closed?
|
33
33
|
end
|
34
34
|
|
35
|
-
after(:each) do
|
36
|
-
GC.start
|
37
|
-
end
|
38
|
-
|
39
35
|
it "should parse a raw response" do
|
40
36
|
file = File.expand_path(File.dirname(__FILE__) + '/http/http.raw.dump')
|
41
37
|
uri = 'file://'+file
|
@@ -37,14 +37,16 @@ describe "ActiveSupport test cases" do
|
|
37
37
|
TESTS.each do |json, expected|
|
38
38
|
it "should be able to parse #{json}" do
|
39
39
|
lambda {
|
40
|
-
Yajl::
|
40
|
+
parser = Yajl::Parser.new
|
41
|
+
parser.parse(StringIO.new(json)).should == expected
|
41
42
|
}.should_not raise_error(Yajl::ParseError)
|
42
43
|
end
|
43
44
|
end
|
44
45
|
|
45
46
|
it "should fail parsing {: 1}" do
|
46
47
|
lambda {
|
47
|
-
Yajl::
|
48
|
-
|
48
|
+
parser = Yajl::Parser.new
|
49
|
+
parser.parse(StringIO.new("{: 1}"))
|
50
|
+
}.should raise_error(Yajl::ParseError)
|
49
51
|
end
|
50
52
|
end
|
@@ -0,0 +1,72 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
require File.expand_path(File.dirname(__FILE__) + '/../spec_helper.rb')
|
3
|
+
require 'stringio'
|
4
|
+
|
5
|
+
describe "Chunked parser" do
|
6
|
+
before(:all) do
|
7
|
+
@final = [{"abc" => 123}, {"def" => 456}]
|
8
|
+
end
|
9
|
+
|
10
|
+
before(:each) do
|
11
|
+
@callback = lambda { |hash|
|
12
|
+
# no-op
|
13
|
+
}
|
14
|
+
@parser = Yajl::Parser.new
|
15
|
+
@parser.on_parse_complete = @callback
|
16
|
+
end
|
17
|
+
|
18
|
+
it "should parse a single chunk" do
|
19
|
+
@callback.should_receive(:call).with(@final)
|
20
|
+
@parser << '[{"abc": 123},{"def": 456}]'
|
21
|
+
end
|
22
|
+
|
23
|
+
it "should parse a single chunk, 3 times" do
|
24
|
+
@callback.should_receive(:call).with(@final).exactly(3).times
|
25
|
+
@parser << '[{"abc": 123},{"def": 456}]'
|
26
|
+
@parser << '[{"abc": 123},{"def": 456}]'
|
27
|
+
@parser << '[{"abc": 123},{"def": 456}]'
|
28
|
+
end
|
29
|
+
|
30
|
+
it "should parse in two chunks" do
|
31
|
+
@callback.should_receive(:call).with(@final)
|
32
|
+
@parser << '[{"abc": 123},'
|
33
|
+
@parser << '{"def": 456}]'
|
34
|
+
end
|
35
|
+
|
36
|
+
it "should parse in 2 chunks, twice" do
|
37
|
+
@callback.should_receive(:call).with(@final).exactly(2).times
|
38
|
+
@parser << '[{"abc": 123},'
|
39
|
+
@parser << '{"def": 456}]'
|
40
|
+
@parser << '[{"abc": 123},'
|
41
|
+
@parser << '{"def": 456}]'
|
42
|
+
end
|
43
|
+
|
44
|
+
it "should parse 2 JSON strings, in 3 chunks" do
|
45
|
+
@callback.should_receive(:call).with(@final).exactly(2).times
|
46
|
+
@parser << '[{"abc": 123},'
|
47
|
+
@parser << '{"def": 456}][{"abc": 123},{"def":'
|
48
|
+
@parser << ' 456}]'
|
49
|
+
end
|
50
|
+
|
51
|
+
it "should parse 2 JSON strings in 1 chunk" do
|
52
|
+
@callback.should_receive(:call).with(@final).exactly(2).times
|
53
|
+
@parser << '[{"abc": 123},{"def": 456}][{"abc": 123},{"def": 456}]'
|
54
|
+
end
|
55
|
+
|
56
|
+
it "should parse 2 JSON strings from an IO" do
|
57
|
+
@callback.should_receive(:call).with(@final).exactly(2).times
|
58
|
+
@parser.parse(StringIO.new('[{"abc": 123},{"def": 456}][{"abc": 123},{"def": 456}]'))
|
59
|
+
end
|
60
|
+
|
61
|
+
it "should parse a JSON string an IO and fire callback once" do
|
62
|
+
@callback.should_receive(:call).with(@final)
|
63
|
+
@parser.parse(StringIO.new('[{"abc": 123},{"def": 456}]'))
|
64
|
+
end
|
65
|
+
|
66
|
+
it "should parse twitter_stream.json and fire callback 430 times" do
|
67
|
+
path = File.expand_path(File.dirname(__FILE__) + '/../../benchmark/subjects/twitter_stream.json')
|
68
|
+
json = File.new(path, 'r')
|
69
|
+
@callback.should_receive(:call).exactly(430).times
|
70
|
+
@parser.parse(json)
|
71
|
+
end
|
72
|
+
end
|