brianmario-yajl-ruby 0.4.8 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG.rdoc +21 -0
- data/README.rdoc +13 -0
- data/VERSION.yml +2 -2
- data/benchmark/encode.rb +4 -3
- data/benchmark/encode_json_and_marshal.rb +4 -3
- data/benchmark/encode_json_and_yaml.rb +4 -3
- data/benchmark/parse.rb +5 -4
- data/benchmark/parse_json_and_marshal.rb +4 -3
- data/benchmark/parse_json_and_yaml.rb +4 -3
- data/benchmark/parse_stream.rb +48 -0
- data/benchmark/subjects/twitter_stream.json +430 -0
- data/examples/http/twitter_search_api.rb +15 -0
- data/examples/http/twitter_stream_api.rb +24 -0
- data/examples/parsing/from_file.rb +14 -0
- data/examples/parsing/from_stdin.rb +9 -0
- data/examples/parsing/from_string.rb +15 -0
- data/ext/api/yajl_parse.h +3 -0
- data/ext/extconf.rb +2 -1
- data/ext/yajl.c +5 -0
- data/ext/yajl_ext.c +235 -122
- data/ext/yajl_ext.h +49 -36
- data/ext/yajl_lex.c +7 -0
- data/ext/yajl_lex.h +2 -0
- data/ext/yajl_parser.c +3 -1
- data/lib/yajl.rb +18 -17
- data/lib/yajl/bzip2.rb +1 -1
- data/lib/yajl/bzip2/stream_reader.rb +1 -1
- data/lib/yajl/bzip2/stream_writer.rb +1 -1
- data/lib/yajl/deflate.rb +1 -1
- data/lib/yajl/deflate/stream_reader.rb +1 -1
- data/lib/yajl/deflate/stream_writer.rb +1 -1
- data/lib/yajl/gzip.rb +1 -1
- data/lib/yajl/gzip/stream_reader.rb +1 -1
- data/lib/yajl/gzip/stream_writer.rb +1 -1
- data/lib/yajl/http_stream.rb +21 -5
- data/spec/encoding/encoding_spec.rb +14 -9
- data/spec/http/http_spec.rb +1 -5
- data/spec/parsing/active_support_spec.rb +5 -3
- data/spec/parsing/chunked_spec.rb +72 -0
- data/spec/parsing/fixtures_spec.rb +4 -2
- data/spec/parsing/one_off_spec.rb +2 -1
- data/spec/spec_helper.rb +8 -1
- data/yajl-ruby.gemspec +17 -3
- metadata +16 -2
data/ext/yajl_ext.h
CHANGED
@@ -2,45 +2,58 @@
|
|
2
2
|
#include "api/yajl_gen.h"
|
3
3
|
#include <ruby.h>
|
4
4
|
|
5
|
-
#define READ_BUFSIZE
|
6
|
-
|
7
|
-
static VALUE cParseError, mYajl, mStream, mChunked;
|
8
|
-
static ID intern_io_read, intern_eof, intern_respond_to, intern_call, intern_keys, intern_to_s;
|
9
|
-
static int readBufferSize = READ_BUFSIZE;
|
10
|
-
static yajl_parser_config cfg = {1, 1};
|
11
|
-
|
12
|
-
yajl_handle streamParser, chunkedParser;
|
13
|
-
VALUE context = Qnil;
|
14
|
-
VALUE parse_complete_callback = Qnil;
|
15
|
-
|
16
|
-
void check_and_fire_callback(void * ctx);
|
17
|
-
void set_static_value(void * ctx, VALUE val);
|
18
|
-
|
19
|
-
static int found_null(void * ctx);
|
20
|
-
static int found_boolean(void * ctx, int boolean);
|
21
|
-
static int found_number(void * ctx, const char * numberVal, unsigned int numberLen);
|
22
|
-
static int found_string(void * ctx, const unsigned char * stringVal, unsigned int stringLen);
|
23
|
-
static int found_hash_key(void * ctx, const unsigned char * stringVal, unsigned int stringLen);
|
24
|
-
static int found_start_hash(void * ctx);
|
25
|
-
static int found_end_hash(void * ctx);
|
26
|
-
static int found_start_array(void * ctx);
|
27
|
-
static int found_end_array(void * ctx);
|
5
|
+
#define READ_BUFSIZE 65536
|
28
6
|
|
7
|
+
static VALUE cParseError, mYajl, cParser, cEncoder;
|
8
|
+
static ID intern_io_read, intern_eof, intern_call, intern_keys, intern_to_s,
|
9
|
+
sym_allow_comments, sym_check_utf8, sym_pretty, sym_indent;
|
10
|
+
|
11
|
+
#define GetParser(obj, sval) (sval = (struct yajl_parser_wrapper*)DATA_PTR(obj));
|
12
|
+
#define GetEncoder(obj, sval) (sval = (yajl_gen*)DATA_PTR(obj));
|
13
|
+
|
14
|
+
void yajl_check_and_fire_callback(void * ctx);
|
15
|
+
void yajl_set_static_value(void * ctx, VALUE val);
|
16
|
+
void yajl_encode_part(yajl_gen hand, VALUE obj, VALUE io);
|
17
|
+
|
18
|
+
static int yajl_found_null(void * ctx);
|
19
|
+
static int yajl_found_boolean(void * ctx, int boolean);
|
20
|
+
static int yajl_found_number(void * ctx, const char * numberVal, unsigned int numberLen);
|
21
|
+
static int yajl_found_string(void * ctx, const unsigned char * stringVal, unsigned int stringLen);
|
22
|
+
static int yajl_found_hash_key(void * ctx, const unsigned char * stringVal, unsigned int stringLen);
|
23
|
+
static int yajl_found_start_hash(void * ctx);
|
24
|
+
static int yajl_found_end_hash(void * ctx);
|
25
|
+
static int yajl_found_start_array(void * ctx);
|
26
|
+
static int yajl_found_end_array(void * ctx);
|
29
27
|
static yajl_callbacks callbacks = {
|
30
|
-
|
31
|
-
|
28
|
+
yajl_found_null,
|
29
|
+
yajl_found_boolean,
|
32
30
|
NULL,
|
33
31
|
NULL,
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
32
|
+
yajl_found_number,
|
33
|
+
yajl_found_string,
|
34
|
+
yajl_found_start_hash,
|
35
|
+
yajl_found_hash_key,
|
36
|
+
yajl_found_end_hash,
|
37
|
+
yajl_found_start_array,
|
38
|
+
yajl_found_end_array
|
41
39
|
};
|
42
40
|
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
41
|
+
struct yajl_parser_wrapper {
|
42
|
+
VALUE builderStack;
|
43
|
+
VALUE parse_complete_callback;
|
44
|
+
int nestedArrayLevel;
|
45
|
+
int nestedHashLevel;
|
46
|
+
yajl_handle parser;
|
47
|
+
};
|
48
|
+
static void yajl_parser_wrapper_free(void * wrapper);
|
49
|
+
static void yajl_parser_wrapper_mark(void * wrapper);
|
50
|
+
|
51
|
+
static VALUE rb_yajl_parser_new(int argc, VALUE * argv, VALUE self);
|
52
|
+
static VALUE rb_yajl_parser_init(int argc, VALUE * argv, VALUE self);
|
53
|
+
static VALUE rb_yajl_parser_parse(int argc, VALUE * argv, VALUE self);
|
54
|
+
static VALUE rb_yajl_parser_parse_chunk(VALUE self, VALUE chunk);
|
55
|
+
static VALUE rb_yajl_set_complete_cb(VALUE self, VALUE callback);
|
56
|
+
|
57
|
+
static VALUE rb_yajl_encoder_new(int argc, VALUE * argv, VALUE klass);
|
58
|
+
static VALUE rb_yajl_encoder_init(int argc, VALUE * argv, VALUE self);
|
59
|
+
static VALUE rb_yajl_encoder_encode(VALUE self, VALUE obj, VALUE io);
|
data/ext/yajl_lex.c
CHANGED
@@ -129,6 +129,13 @@ yajl_lex_alloc(yajl_alloc_funcs * alloc,
|
|
129
129
|
return lxr;
|
130
130
|
}
|
131
131
|
|
132
|
+
yajl_lexer
|
133
|
+
yajl_lex_realloc(yajl_lexer orig) {
|
134
|
+
yajl_lexer newLxr = yajl_lex_alloc(orig->alloc, orig->allowComments, orig->validateUTF8);
|
135
|
+
yajl_lex_free(orig);
|
136
|
+
return newLxr;
|
137
|
+
}
|
138
|
+
|
132
139
|
void
|
133
140
|
yajl_lex_free(yajl_lexer lxr)
|
134
141
|
{
|
data/ext/yajl_lex.h
CHANGED
data/ext/yajl_parser.c
CHANGED
@@ -307,7 +307,9 @@ yajl_do_parse(yajl_handle hand, unsigned int * offset,
|
|
307
307
|
{
|
308
308
|
yajl_state s = yajl_bs_current(hand->stateStack);
|
309
309
|
if (s == yajl_state_start) {
|
310
|
-
|
310
|
+
// HACK: is this even safe to do?
|
311
|
+
// yajl_bs_set(hand->stateStack, yajl_state_parse_complete);
|
312
|
+
yajl_reset_parser(hand);
|
311
313
|
} else if (s == yajl_state_map_need_val) {
|
312
314
|
yajl_bs_set(hand->stateStack, yajl_state_map_got_val);
|
313
315
|
} else {
|
data/lib/yajl.rb
CHANGED
@@ -13,28 +13,29 @@ require 'yajl_ext'
|
|
13
13
|
#
|
14
14
|
# Ruby bindings to the excellent Yajl (Yet Another JSON Parser) ANSI C library.
|
15
15
|
module Yajl
|
16
|
-
VERSION = "0.
|
16
|
+
VERSION = "0.5.0"
|
17
17
|
|
18
|
-
# == Yajl::
|
19
|
-
#
|
20
|
-
# This module contains methods for parsing JSON in chunks.
|
21
|
-
# The use case here is that the caller may not be able to get access to the IO to which
|
22
|
-
# JSON content is being received. Rendering Yajl::Stream dead to them.
|
23
|
-
#
|
24
|
-
# With the methods in this module, the caller will be able to pass in chunks of JSON content
|
25
|
-
# until a full object has been parsed from said content.
|
26
|
-
#
|
27
|
-
# In order for this process to work correctly, the caller needs to specify a callback which
|
28
|
-
# is passed the constructed object. The only requirement currently of this callback is that
|
29
|
-
# it respond to #call and accept a single parameter (the object that was created from parsing).
|
30
|
-
module Chunked; end
|
31
|
-
|
32
|
-
# == Yajl::Stream
|
18
|
+
# == Yajl::Parser
|
33
19
|
#
|
34
20
|
# This module contains methods for parsing JSON directly from an IO object.
|
35
21
|
#
|
36
22
|
# The only basic requirment currently is that the IO object respond to #read(len) and eof?
|
37
23
|
#
|
38
24
|
# The IO is parsed until a complete JSON object has been read and a ruby object will be returned.
|
39
|
-
|
25
|
+
class Parser; end
|
26
|
+
|
27
|
+
# Deprecated
|
28
|
+
module Stream
|
29
|
+
# Deprecated
|
30
|
+
def self.parse(io)
|
31
|
+
STDERR.puts "WARNING: Yajl::Stream has be deprecated and will most likely be gone in the next release. Use the Yajl::Parser class instead."
|
32
|
+
Parser.new.parse(io)
|
33
|
+
end
|
34
|
+
|
35
|
+
# Deprecated
|
36
|
+
def self.encode(obj, io)
|
37
|
+
STDERR.puts "WARNING: Yajl::Stream has be deprecated and will most likely be gone in the next release. Use the Yajl::Encoder class instead."
|
38
|
+
Encoder.new.encode(obj, io)
|
39
|
+
end
|
40
|
+
end
|
40
41
|
end
|
data/lib/yajl/bzip2.rb
CHANGED
data/lib/yajl/deflate.rb
CHANGED
data/lib/yajl/gzip.rb
CHANGED
data/lib/yajl/http_stream.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# encoding: UTF-8
|
2
2
|
require 'socket' unless defined?(Socket)
|
3
|
-
require 'yajl' unless defined?(Yajl::
|
3
|
+
require 'yajl' unless defined?(Yajl::Parser)
|
4
4
|
|
5
5
|
module Yajl
|
6
6
|
# == Yajl::HttpStream
|
@@ -24,7 +24,7 @@ module Yajl
|
|
24
24
|
# 3. the response is read until the end of the headers
|
25
25
|
# 4. the _socket itself_ is passed directly to Yajl, for direct parsing off the stream;
|
26
26
|
# As it's being received over the wire!
|
27
|
-
def self.get(uri, opts = {})
|
27
|
+
def self.get(uri, opts = {}, &block)
|
28
28
|
user_agent = opts.has_key?(['User-Agent']) ? opts['User-Agent'] : "Yajl::HttpStream #{Yajl::VERSION}"
|
29
29
|
|
30
30
|
socket = TCPSocket.new(uri.host, uri.port)
|
@@ -61,9 +61,25 @@ module Yajl
|
|
61
61
|
end
|
62
62
|
end
|
63
63
|
end
|
64
|
-
|
64
|
+
parser = Yajl::Parser.new
|
65
65
|
if response_head[:headers]["Transfer-Encoding"] == 'chunked'
|
66
|
-
|
66
|
+
if block_given?
|
67
|
+
parser.on_parse_complete = block
|
68
|
+
chunkLeft = 0
|
69
|
+
while !socket.eof? && (size = socket.gets.hex)
|
70
|
+
next if size == 0
|
71
|
+
json = socket.read(size)
|
72
|
+
chunkLeft = size-json.size
|
73
|
+
if chunkLeft == 0
|
74
|
+
parser << json
|
75
|
+
else
|
76
|
+
# received only part of the chunk, grab the rest
|
77
|
+
parser << socket.read(chunkLeft)
|
78
|
+
end
|
79
|
+
end
|
80
|
+
else
|
81
|
+
raise Exception, "Chunked responses detected, but no block given to handle the chunks."
|
82
|
+
end
|
67
83
|
else
|
68
84
|
content_type = response_head[:headers]["Content-Type"].split('; ')
|
69
85
|
content_type = content_type.first
|
@@ -76,7 +92,7 @@ module Yajl
|
|
76
92
|
when "bzip2"
|
77
93
|
return Yajl::Bzip2::StreamReader.parse(socket)
|
78
94
|
else
|
79
|
-
return Yajl::
|
95
|
+
return Yajl::Parser.new.parse(socket)
|
80
96
|
end
|
81
97
|
else
|
82
98
|
raise InvalidContentType, "The response MIME type #{content_type}"
|
@@ -6,18 +6,23 @@ describe "Yajl JSON encoder" do
|
|
6
6
|
|
7
7
|
FILES.each do |file|
|
8
8
|
it "should encode #{File.basename(file)}" do
|
9
|
-
|
10
|
-
|
9
|
+
# we don't care about testing the stream subject as it has multiple JSON strings in it
|
10
|
+
if File.basename(file) != 'twitter_stream.json'
|
11
|
+
input = File.new(File.expand_path(file), 'r')
|
12
|
+
io = StringIO.new
|
13
|
+
parser = Yajl::Parser.new
|
14
|
+
encoder = Yajl::Encoder.new
|
11
15
|
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
+
hash = parser.parse(input)
|
17
|
+
output = encoder.encode(hash, io)
|
18
|
+
io.rewind
|
19
|
+
hash2 = parser.parse(io)
|
16
20
|
|
17
|
-
|
18
|
-
|
21
|
+
io.close
|
22
|
+
input.close
|
19
23
|
|
20
|
-
|
24
|
+
hash.should == hash2
|
25
|
+
end
|
21
26
|
end
|
22
27
|
end
|
23
28
|
end
|
data/spec/http/http_spec.rb
CHANGED
@@ -21,7 +21,7 @@ describe "Yajl HTTP GET request" do
|
|
21
21
|
@gzip = File.new(File.expand_path(File.dirname(__FILE__) + '/fixtures/http.gzip.dump'), 'r')
|
22
22
|
|
23
23
|
parse_off_headers(@raw)
|
24
|
-
@raw_template_hash = Yajl::
|
24
|
+
@raw_template_hash = Yajl::Parser.new.parse(@raw)
|
25
25
|
@raw.rewind
|
26
26
|
end
|
27
27
|
|
@@ -32,10 +32,6 @@ describe "Yajl HTTP GET request" do
|
|
32
32
|
@gzip.close unless @gzip.closed?
|
33
33
|
end
|
34
34
|
|
35
|
-
after(:each) do
|
36
|
-
GC.start
|
37
|
-
end
|
38
|
-
|
39
35
|
it "should parse a raw response" do
|
40
36
|
file = File.expand_path(File.dirname(__FILE__) + '/http/http.raw.dump')
|
41
37
|
uri = 'file://'+file
|
@@ -37,14 +37,16 @@ describe "ActiveSupport test cases" do
|
|
37
37
|
TESTS.each do |json, expected|
|
38
38
|
it "should be able to parse #{json}" do
|
39
39
|
lambda {
|
40
|
-
Yajl::
|
40
|
+
parser = Yajl::Parser.new
|
41
|
+
parser.parse(StringIO.new(json)).should == expected
|
41
42
|
}.should_not raise_error(Yajl::ParseError)
|
42
43
|
end
|
43
44
|
end
|
44
45
|
|
45
46
|
it "should fail parsing {: 1}" do
|
46
47
|
lambda {
|
47
|
-
Yajl::
|
48
|
-
|
48
|
+
parser = Yajl::Parser.new
|
49
|
+
parser.parse(StringIO.new("{: 1}"))
|
50
|
+
}.should raise_error(Yajl::ParseError)
|
49
51
|
end
|
50
52
|
end
|
@@ -0,0 +1,72 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
require File.expand_path(File.dirname(__FILE__) + '/../spec_helper.rb')
|
3
|
+
require 'stringio'
|
4
|
+
|
5
|
+
describe "Chunked parser" do
|
6
|
+
before(:all) do
|
7
|
+
@final = [{"abc" => 123}, {"def" => 456}]
|
8
|
+
end
|
9
|
+
|
10
|
+
before(:each) do
|
11
|
+
@callback = lambda { |hash|
|
12
|
+
# no-op
|
13
|
+
}
|
14
|
+
@parser = Yajl::Parser.new
|
15
|
+
@parser.on_parse_complete = @callback
|
16
|
+
end
|
17
|
+
|
18
|
+
it "should parse a single chunk" do
|
19
|
+
@callback.should_receive(:call).with(@final)
|
20
|
+
@parser << '[{"abc": 123},{"def": 456}]'
|
21
|
+
end
|
22
|
+
|
23
|
+
it "should parse a single chunk, 3 times" do
|
24
|
+
@callback.should_receive(:call).with(@final).exactly(3).times
|
25
|
+
@parser << '[{"abc": 123},{"def": 456}]'
|
26
|
+
@parser << '[{"abc": 123},{"def": 456}]'
|
27
|
+
@parser << '[{"abc": 123},{"def": 456}]'
|
28
|
+
end
|
29
|
+
|
30
|
+
it "should parse in two chunks" do
|
31
|
+
@callback.should_receive(:call).with(@final)
|
32
|
+
@parser << '[{"abc": 123},'
|
33
|
+
@parser << '{"def": 456}]'
|
34
|
+
end
|
35
|
+
|
36
|
+
it "should parse in 2 chunks, twice" do
|
37
|
+
@callback.should_receive(:call).with(@final).exactly(2).times
|
38
|
+
@parser << '[{"abc": 123},'
|
39
|
+
@parser << '{"def": 456}]'
|
40
|
+
@parser << '[{"abc": 123},'
|
41
|
+
@parser << '{"def": 456}]'
|
42
|
+
end
|
43
|
+
|
44
|
+
it "should parse 2 JSON strings, in 3 chunks" do
|
45
|
+
@callback.should_receive(:call).with(@final).exactly(2).times
|
46
|
+
@parser << '[{"abc": 123},'
|
47
|
+
@parser << '{"def": 456}][{"abc": 123},{"def":'
|
48
|
+
@parser << ' 456}]'
|
49
|
+
end
|
50
|
+
|
51
|
+
it "should parse 2 JSON strings in 1 chunk" do
|
52
|
+
@callback.should_receive(:call).with(@final).exactly(2).times
|
53
|
+
@parser << '[{"abc": 123},{"def": 456}][{"abc": 123},{"def": 456}]'
|
54
|
+
end
|
55
|
+
|
56
|
+
it "should parse 2 JSON strings from an IO" do
|
57
|
+
@callback.should_receive(:call).with(@final).exactly(2).times
|
58
|
+
@parser.parse(StringIO.new('[{"abc": 123},{"def": 456}][{"abc": 123},{"def": 456}]'))
|
59
|
+
end
|
60
|
+
|
61
|
+
it "should parse a JSON string an IO and fire callback once" do
|
62
|
+
@callback.should_receive(:call).with(@final)
|
63
|
+
@parser.parse(StringIO.new('[{"abc": 123},{"def": 456}]'))
|
64
|
+
end
|
65
|
+
|
66
|
+
it "should parse twitter_stream.json and fire callback 430 times" do
|
67
|
+
path = File.expand_path(File.dirname(__FILE__) + '/../../benchmark/subjects/twitter_stream.json')
|
68
|
+
json = File.new(path, 'r')
|
69
|
+
@callback.should_receive(:call).exactly(430).times
|
70
|
+
@parser.parse(json)
|
71
|
+
end
|
72
|
+
end
|