brianmario-yajl-ruby 0.4.8 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. data/CHANGELOG.rdoc +21 -0
  2. data/README.rdoc +13 -0
  3. data/VERSION.yml +2 -2
  4. data/benchmark/encode.rb +4 -3
  5. data/benchmark/encode_json_and_marshal.rb +4 -3
  6. data/benchmark/encode_json_and_yaml.rb +4 -3
  7. data/benchmark/parse.rb +5 -4
  8. data/benchmark/parse_json_and_marshal.rb +4 -3
  9. data/benchmark/parse_json_and_yaml.rb +4 -3
  10. data/benchmark/parse_stream.rb +48 -0
  11. data/benchmark/subjects/twitter_stream.json +430 -0
  12. data/examples/http/twitter_search_api.rb +15 -0
  13. data/examples/http/twitter_stream_api.rb +24 -0
  14. data/examples/parsing/from_file.rb +14 -0
  15. data/examples/parsing/from_stdin.rb +9 -0
  16. data/examples/parsing/from_string.rb +15 -0
  17. data/ext/api/yajl_parse.h +3 -0
  18. data/ext/extconf.rb +2 -1
  19. data/ext/yajl.c +5 -0
  20. data/ext/yajl_ext.c +235 -122
  21. data/ext/yajl_ext.h +49 -36
  22. data/ext/yajl_lex.c +7 -0
  23. data/ext/yajl_lex.h +2 -0
  24. data/ext/yajl_parser.c +3 -1
  25. data/lib/yajl.rb +18 -17
  26. data/lib/yajl/bzip2.rb +1 -1
  27. data/lib/yajl/bzip2/stream_reader.rb +1 -1
  28. data/lib/yajl/bzip2/stream_writer.rb +1 -1
  29. data/lib/yajl/deflate.rb +1 -1
  30. data/lib/yajl/deflate/stream_reader.rb +1 -1
  31. data/lib/yajl/deflate/stream_writer.rb +1 -1
  32. data/lib/yajl/gzip.rb +1 -1
  33. data/lib/yajl/gzip/stream_reader.rb +1 -1
  34. data/lib/yajl/gzip/stream_writer.rb +1 -1
  35. data/lib/yajl/http_stream.rb +21 -5
  36. data/spec/encoding/encoding_spec.rb +14 -9
  37. data/spec/http/http_spec.rb +1 -5
  38. data/spec/parsing/active_support_spec.rb +5 -3
  39. data/spec/parsing/chunked_spec.rb +72 -0
  40. data/spec/parsing/fixtures_spec.rb +4 -2
  41. data/spec/parsing/one_off_spec.rb +2 -1
  42. data/spec/spec_helper.rb +8 -1
  43. data/yajl-ruby.gemspec +17 -3
  44. metadata +16 -2
data/ext/yajl_ext.h CHANGED
@@ -2,45 +2,58 @@
2
2
  #include "api/yajl_gen.h"
3
3
  #include <ruby.h>
4
4
 
5
- #define READ_BUFSIZE 4096
6
-
7
- static VALUE cParseError, mYajl, mStream, mChunked;
8
- static ID intern_io_read, intern_eof, intern_respond_to, intern_call, intern_keys, intern_to_s;
9
- static int readBufferSize = READ_BUFSIZE;
10
- static yajl_parser_config cfg = {1, 1};
11
-
12
- yajl_handle streamParser, chunkedParser;
13
- VALUE context = Qnil;
14
- VALUE parse_complete_callback = Qnil;
15
-
16
- void check_and_fire_callback(void * ctx);
17
- void set_static_value(void * ctx, VALUE val);
18
-
19
- static int found_null(void * ctx);
20
- static int found_boolean(void * ctx, int boolean);
21
- static int found_number(void * ctx, const char * numberVal, unsigned int numberLen);
22
- static int found_string(void * ctx, const unsigned char * stringVal, unsigned int stringLen);
23
- static int found_hash_key(void * ctx, const unsigned char * stringVal, unsigned int stringLen);
24
- static int found_start_hash(void * ctx);
25
- static int found_end_hash(void * ctx);
26
- static int found_start_array(void * ctx);
27
- static int found_end_array(void * ctx);
5
+ #define READ_BUFSIZE 65536
28
6
 
7
+ static VALUE cParseError, mYajl, cParser, cEncoder;
8
+ static ID intern_io_read, intern_eof, intern_call, intern_keys, intern_to_s,
9
+ sym_allow_comments, sym_check_utf8, sym_pretty, sym_indent;
10
+
11
+ #define GetParser(obj, sval) (sval = (struct yajl_parser_wrapper*)DATA_PTR(obj));
12
+ #define GetEncoder(obj, sval) (sval = (yajl_gen*)DATA_PTR(obj));
13
+
14
+ void yajl_check_and_fire_callback(void * ctx);
15
+ void yajl_set_static_value(void * ctx, VALUE val);
16
+ void yajl_encode_part(yajl_gen hand, VALUE obj, VALUE io);
17
+
18
+ static int yajl_found_null(void * ctx);
19
+ static int yajl_found_boolean(void * ctx, int boolean);
20
+ static int yajl_found_number(void * ctx, const char * numberVal, unsigned int numberLen);
21
+ static int yajl_found_string(void * ctx, const unsigned char * stringVal, unsigned int stringLen);
22
+ static int yajl_found_hash_key(void * ctx, const unsigned char * stringVal, unsigned int stringLen);
23
+ static int yajl_found_start_hash(void * ctx);
24
+ static int yajl_found_end_hash(void * ctx);
25
+ static int yajl_found_start_array(void * ctx);
26
+ static int yajl_found_end_array(void * ctx);
29
27
  static yajl_callbacks callbacks = {
30
- found_null,
31
- found_boolean,
28
+ yajl_found_null,
29
+ yajl_found_boolean,
32
30
  NULL,
33
31
  NULL,
34
- found_number,
35
- found_string,
36
- found_start_hash,
37
- found_hash_key,
38
- found_end_hash,
39
- found_start_array,
40
- found_end_array
32
+ yajl_found_number,
33
+ yajl_found_string,
34
+ yajl_found_start_hash,
35
+ yajl_found_hash_key,
36
+ yajl_found_end_hash,
37
+ yajl_found_start_array,
38
+ yajl_found_end_array
41
39
  };
42
40
 
43
- static VALUE t_setParseComplete(VALUE self, VALUE callback);
44
- static VALUE t_parseSome(VALUE self, VALUE string);
45
- static VALUE t_parse(VALUE self, VALUE io);
46
- static VALUE t_encode(VALUE self, VALUE obj, VALUE io);
41
+ struct yajl_parser_wrapper {
42
+ VALUE builderStack;
43
+ VALUE parse_complete_callback;
44
+ int nestedArrayLevel;
45
+ int nestedHashLevel;
46
+ yajl_handle parser;
47
+ };
48
+ static void yajl_parser_wrapper_free(void * wrapper);
49
+ static void yajl_parser_wrapper_mark(void * wrapper);
50
+
51
+ static VALUE rb_yajl_parser_new(int argc, VALUE * argv, VALUE self);
52
+ static VALUE rb_yajl_parser_init(int argc, VALUE * argv, VALUE self);
53
+ static VALUE rb_yajl_parser_parse(int argc, VALUE * argv, VALUE self);
54
+ static VALUE rb_yajl_parser_parse_chunk(VALUE self, VALUE chunk);
55
+ static VALUE rb_yajl_set_complete_cb(VALUE self, VALUE callback);
56
+
57
+ static VALUE rb_yajl_encoder_new(int argc, VALUE * argv, VALUE klass);
58
+ static VALUE rb_yajl_encoder_init(int argc, VALUE * argv, VALUE self);
59
+ static VALUE rb_yajl_encoder_encode(VALUE self, VALUE obj, VALUE io);
data/ext/yajl_lex.c CHANGED
@@ -129,6 +129,13 @@ yajl_lex_alloc(yajl_alloc_funcs * alloc,
129
129
  return lxr;
130
130
  }
131
131
 
132
+ yajl_lexer
133
+ yajl_lex_realloc(yajl_lexer orig) {
134
+ yajl_lexer newLxr = yajl_lex_alloc(orig->alloc, orig->allowComments, orig->validateUTF8);
135
+ yajl_lex_free(orig);
136
+ return newLxr;
137
+ }
138
+
132
139
  void
133
140
  yajl_lex_free(yajl_lexer lxr)
134
141
  {
data/ext/yajl_lex.h CHANGED
@@ -67,6 +67,8 @@ yajl_lexer yajl_lex_alloc(yajl_alloc_funcs * alloc,
67
67
  unsigned int allowComments,
68
68
  unsigned int validateUTF8);
69
69
 
70
+ yajl_lexer yajl_lex_realloc(yajl_lexer orig);
71
+
70
72
  void yajl_lex_free(yajl_lexer lexer);
71
73
 
72
74
  /**
data/ext/yajl_parser.c CHANGED
@@ -307,7 +307,9 @@ yajl_do_parse(yajl_handle hand, unsigned int * offset,
307
307
  {
308
308
  yajl_state s = yajl_bs_current(hand->stateStack);
309
309
  if (s == yajl_state_start) {
310
- yajl_bs_set(hand->stateStack, yajl_state_parse_complete);
310
+ // HACK: is this even safe to do?
311
+ // yajl_bs_set(hand->stateStack, yajl_state_parse_complete);
312
+ yajl_reset_parser(hand);
311
313
  } else if (s == yajl_state_map_need_val) {
312
314
  yajl_bs_set(hand->stateStack, yajl_state_map_got_val);
313
315
  } else {
data/lib/yajl.rb CHANGED
@@ -13,28 +13,29 @@ require 'yajl_ext'
13
13
  #
14
14
  # Ruby bindings to the excellent Yajl (Yet Another JSON Parser) ANSI C library.
15
15
  module Yajl
16
- VERSION = "0.4.8"
16
+ VERSION = "0.5.0"
17
17
 
18
- # == Yajl::Chunked
19
- #
20
- # This module contains methods for parsing JSON in chunks.
21
- # The use case here is that the caller may not be able to get access to the IO to which
22
- # JSON content is being received. Rendering Yajl::Stream dead to them.
23
- #
24
- # With the methods in this module, the caller will be able to pass in chunks of JSON content
25
- # until a full object has been parsed from said content.
26
- #
27
- # In order for this process to work correctly, the caller needs to specify a callback which
28
- # is passed the constructed object. The only requirement currently of this callback is that
29
- # it respond to #call and accept a single parameter (the object that was created from parsing).
30
- module Chunked; end
31
-
32
- # == Yajl::Stream
18
+ # == Yajl::Parser
33
19
  #
34
20
  # This module contains methods for parsing JSON directly from an IO object.
35
21
  #
36
22
  # The only basic requirment currently is that the IO object respond to #read(len) and eof?
37
23
  #
38
24
  # The IO is parsed until a complete JSON object has been read and a ruby object will be returned.
39
- module Stream; end
25
+ class Parser; end
26
+
27
+ # Deprecated
28
+ module Stream
29
+ # Deprecated
30
+ def self.parse(io)
31
+ STDERR.puts "WARNING: Yajl::Stream has be deprecated and will most likely be gone in the next release. Use the Yajl::Parser class instead."
32
+ Parser.new.parse(io)
33
+ end
34
+
35
+ # Deprecated
36
+ def self.encode(obj, io)
37
+ STDERR.puts "WARNING: Yajl::Stream has be deprecated and will most likely be gone in the next release. Use the Yajl::Encoder class instead."
38
+ Encoder.new.encode(obj, io)
39
+ end
40
+ end
40
41
  end
data/lib/yajl/bzip2.rb CHANGED
@@ -1,6 +1,6 @@
1
1
  # encoding: UTF-8
2
2
 
3
- require 'yajl' unless defined?(Yajl::Stream)
3
+ require 'yajl' unless defined?(Yajl::Parser)
4
4
 
5
5
  begin
6
6
  require 'bzip2' unless defined?(Bzip2)
@@ -15,7 +15,7 @@ module Yajl
15
15
  end
16
16
 
17
17
  def self.parse(io)
18
- Yajl::Stream.parse(new(io))
18
+ Yajl::Parser.new.parse(new(io))
19
19
  end
20
20
  end
21
21
  end
@@ -4,7 +4,7 @@ module Yajl
4
4
  # === Yajl::Bzip2::StreamWriter
5
5
  class StreamWriter < ::Bzip2::Writer
6
6
  def self.encode(obj, io)
7
- Yajl::Stream.encode(obj, new(io))
7
+ Yajl::Encoder.new.encode(obj, new(io))
8
8
  end
9
9
  end
10
10
  end
data/lib/yajl/deflate.rb CHANGED
@@ -1,6 +1,6 @@
1
1
  # encoding: UTF-8
2
2
 
3
- require 'yajl' unless defined?(Yajl::Stream)
3
+ require 'yajl' unless defined?(Yajl::Parser)
4
4
  require 'zlib' unless defined?(Zlib)
5
5
  require 'yajl/deflate/stream_reader.rb'
6
6
  require 'yajl/deflate/stream_writer.rb'
@@ -19,7 +19,7 @@ module Yajl
19
19
  alias :eof? :finished?
20
20
 
21
21
  def self.parse(io, options=nil)
22
- Yajl::Stream.parse(new(io, options))
22
+ Yajl::Parser.new.parse(new(io, options))
23
23
  end
24
24
  end
25
25
  end
@@ -9,7 +9,7 @@ module Yajl
9
9
  end
10
10
 
11
11
  def self.encode(obj, io)
12
- Yajl::Stream.encode(obj, new(io))
12
+ Yajl::Encoder.new.encode(obj, new(io))
13
13
  end
14
14
  end
15
15
  end
data/lib/yajl/gzip.rb CHANGED
@@ -1,6 +1,6 @@
1
1
  # encoding: UTF-8
2
2
 
3
- require 'yajl' unless defined?(Yajl::Stream)
3
+ require 'yajl' unless defined?(Yajl::Parser)
4
4
  require 'zlib' unless defined?(Zlib)
5
5
  require 'yajl/gzip/stream_reader.rb'
6
6
  require 'yajl/gzip/stream_writer.rb'
@@ -15,7 +15,7 @@ module Yajl
15
15
  end
16
16
 
17
17
  def self.parse(io)
18
- Yajl::Stream.parse(new(io))
18
+ Yajl::Parser.new.parse(new(io))
19
19
  end
20
20
  end
21
21
  end
@@ -4,7 +4,7 @@ module Yajl
4
4
  # === Yajl::Gzip::StreamWriter
5
5
  class StreamWriter < ::Zlib::GzipWriter
6
6
  def self.encode(obj, io)
7
- Yajl::Stream.encode(obj, new(io))
7
+ Yajl::Encoder.new.encode(obj, new(io))
8
8
  end
9
9
  end
10
10
  end
@@ -1,6 +1,6 @@
1
1
  # encoding: UTF-8
2
2
  require 'socket' unless defined?(Socket)
3
- require 'yajl' unless defined?(Yajl::Stream)
3
+ require 'yajl' unless defined?(Yajl::Parser)
4
4
 
5
5
  module Yajl
6
6
  # == Yajl::HttpStream
@@ -24,7 +24,7 @@ module Yajl
24
24
  # 3. the response is read until the end of the headers
25
25
  # 4. the _socket itself_ is passed directly to Yajl, for direct parsing off the stream;
26
26
  # As it's being received over the wire!
27
- def self.get(uri, opts = {})
27
+ def self.get(uri, opts = {}, &block)
28
28
  user_agent = opts.has_key?(['User-Agent']) ? opts['User-Agent'] : "Yajl::HttpStream #{Yajl::VERSION}"
29
29
 
30
30
  socket = TCPSocket.new(uri.host, uri.port)
@@ -61,9 +61,25 @@ module Yajl
61
61
  end
62
62
  end
63
63
  end
64
-
64
+ parser = Yajl::Parser.new
65
65
  if response_head[:headers]["Transfer-Encoding"] == 'chunked'
66
- raise Exception, "Chunked responses not supported yet (I'm working on this)"
66
+ if block_given?
67
+ parser.on_parse_complete = block
68
+ chunkLeft = 0
69
+ while !socket.eof? && (size = socket.gets.hex)
70
+ next if size == 0
71
+ json = socket.read(size)
72
+ chunkLeft = size-json.size
73
+ if chunkLeft == 0
74
+ parser << json
75
+ else
76
+ # received only part of the chunk, grab the rest
77
+ parser << socket.read(chunkLeft)
78
+ end
79
+ end
80
+ else
81
+ raise Exception, "Chunked responses detected, but no block given to handle the chunks."
82
+ end
67
83
  else
68
84
  content_type = response_head[:headers]["Content-Type"].split('; ')
69
85
  content_type = content_type.first
@@ -76,7 +92,7 @@ module Yajl
76
92
  when "bzip2"
77
93
  return Yajl::Bzip2::StreamReader.parse(socket)
78
94
  else
79
- return Yajl::Stream.parse(socket)
95
+ return Yajl::Parser.new.parse(socket)
80
96
  end
81
97
  else
82
98
  raise InvalidContentType, "The response MIME type #{content_type}"
@@ -6,18 +6,23 @@ describe "Yajl JSON encoder" do
6
6
 
7
7
  FILES.each do |file|
8
8
  it "should encode #{File.basename(file)}" do
9
- input = File.new(File.expand_path(file), 'r')
10
- hash = Yajl::Stream.parse(input)
9
+ # we don't care about testing the stream subject as it has multiple JSON strings in it
10
+ if File.basename(file) != 'twitter_stream.json'
11
+ input = File.new(File.expand_path(file), 'r')
12
+ io = StringIO.new
13
+ parser = Yajl::Parser.new
14
+ encoder = Yajl::Encoder.new
11
15
 
12
- io = StringIO.new
13
- output = Yajl::Stream.encode(hash, io)
14
- io.rewind
15
- hash2 = Yajl::Stream.parse(io)
16
+ hash = parser.parse(input)
17
+ output = encoder.encode(hash, io)
18
+ io.rewind
19
+ hash2 = parser.parse(io)
16
20
 
17
- io.close
18
- input.close
21
+ io.close
22
+ input.close
19
23
 
20
- hash.should == hash2
24
+ hash.should == hash2
25
+ end
21
26
  end
22
27
  end
23
28
  end
@@ -21,7 +21,7 @@ describe "Yajl HTTP GET request" do
21
21
  @gzip = File.new(File.expand_path(File.dirname(__FILE__) + '/fixtures/http.gzip.dump'), 'r')
22
22
 
23
23
  parse_off_headers(@raw)
24
- @raw_template_hash = Yajl::Stream.parse(@raw)
24
+ @raw_template_hash = Yajl::Parser.new.parse(@raw)
25
25
  @raw.rewind
26
26
  end
27
27
 
@@ -32,10 +32,6 @@ describe "Yajl HTTP GET request" do
32
32
  @gzip.close unless @gzip.closed?
33
33
  end
34
34
 
35
- after(:each) do
36
- GC.start
37
- end
38
-
39
35
  it "should parse a raw response" do
40
36
  file = File.expand_path(File.dirname(__FILE__) + '/http/http.raw.dump')
41
37
  uri = 'file://'+file
@@ -37,14 +37,16 @@ describe "ActiveSupport test cases" do
37
37
  TESTS.each do |json, expected|
38
38
  it "should be able to parse #{json}" do
39
39
  lambda {
40
- Yajl::Stream.parse(StringIO.new(json)).should == expected
40
+ parser = Yajl::Parser.new
41
+ parser.parse(StringIO.new(json)).should == expected
41
42
  }.should_not raise_error(Yajl::ParseError)
42
43
  end
43
44
  end
44
45
 
45
46
  it "should fail parsing {: 1}" do
46
47
  lambda {
47
- Yajl::Stream.parse(StringIO.new("{: 1}"))
48
- }.should raise_error(Yajl::ParseError)
48
+ parser = Yajl::Parser.new
49
+ parser.parse(StringIO.new("{: 1}"))
50
+ }.should raise_error(Yajl::ParseError)
49
51
  end
50
52
  end
@@ -0,0 +1,72 @@
1
+ # encoding: UTF-8
2
+ require File.expand_path(File.dirname(__FILE__) + '/../spec_helper.rb')
3
+ require 'stringio'
4
+
5
+ describe "Chunked parser" do
6
+ before(:all) do
7
+ @final = [{"abc" => 123}, {"def" => 456}]
8
+ end
9
+
10
+ before(:each) do
11
+ @callback = lambda { |hash|
12
+ # no-op
13
+ }
14
+ @parser = Yajl::Parser.new
15
+ @parser.on_parse_complete = @callback
16
+ end
17
+
18
+ it "should parse a single chunk" do
19
+ @callback.should_receive(:call).with(@final)
20
+ @parser << '[{"abc": 123},{"def": 456}]'
21
+ end
22
+
23
+ it "should parse a single chunk, 3 times" do
24
+ @callback.should_receive(:call).with(@final).exactly(3).times
25
+ @parser << '[{"abc": 123},{"def": 456}]'
26
+ @parser << '[{"abc": 123},{"def": 456}]'
27
+ @parser << '[{"abc": 123},{"def": 456}]'
28
+ end
29
+
30
+ it "should parse in two chunks" do
31
+ @callback.should_receive(:call).with(@final)
32
+ @parser << '[{"abc": 123},'
33
+ @parser << '{"def": 456}]'
34
+ end
35
+
36
+ it "should parse in 2 chunks, twice" do
37
+ @callback.should_receive(:call).with(@final).exactly(2).times
38
+ @parser << '[{"abc": 123},'
39
+ @parser << '{"def": 456}]'
40
+ @parser << '[{"abc": 123},'
41
+ @parser << '{"def": 456}]'
42
+ end
43
+
44
+ it "should parse 2 JSON strings, in 3 chunks" do
45
+ @callback.should_receive(:call).with(@final).exactly(2).times
46
+ @parser << '[{"abc": 123},'
47
+ @parser << '{"def": 456}][{"abc": 123},{"def":'
48
+ @parser << ' 456}]'
49
+ end
50
+
51
+ it "should parse 2 JSON strings in 1 chunk" do
52
+ @callback.should_receive(:call).with(@final).exactly(2).times
53
+ @parser << '[{"abc": 123},{"def": 456}][{"abc": 123},{"def": 456}]'
54
+ end
55
+
56
+ it "should parse 2 JSON strings from an IO" do
57
+ @callback.should_receive(:call).with(@final).exactly(2).times
58
+ @parser.parse(StringIO.new('[{"abc": 123},{"def": 456}][{"abc": 123},{"def": 456}]'))
59
+ end
60
+
61
+ it "should parse a JSON string an IO and fire callback once" do
62
+ @callback.should_receive(:call).with(@final)
63
+ @parser.parse(StringIO.new('[{"abc": 123},{"def": 456}]'))
64
+ end
65
+
66
+ it "should parse twitter_stream.json and fire callback 430 times" do
67
+ path = File.expand_path(File.dirname(__FILE__) + '/../../benchmark/subjects/twitter_stream.json')
68
+ json = File.new(path, 'r')
69
+ @callback.should_receive(:call).exactly(430).times
70
+ @parser.parse(json)
71
+ end
72
+ end