brianmario-yajl-ruby 0.4.8 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (44) hide show
  1. data/CHANGELOG.rdoc +21 -0
  2. data/README.rdoc +13 -0
  3. data/VERSION.yml +2 -2
  4. data/benchmark/encode.rb +4 -3
  5. data/benchmark/encode_json_and_marshal.rb +4 -3
  6. data/benchmark/encode_json_and_yaml.rb +4 -3
  7. data/benchmark/parse.rb +5 -4
  8. data/benchmark/parse_json_and_marshal.rb +4 -3
  9. data/benchmark/parse_json_and_yaml.rb +4 -3
  10. data/benchmark/parse_stream.rb +48 -0
  11. data/benchmark/subjects/twitter_stream.json +430 -0
  12. data/examples/http/twitter_search_api.rb +15 -0
  13. data/examples/http/twitter_stream_api.rb +24 -0
  14. data/examples/parsing/from_file.rb +14 -0
  15. data/examples/parsing/from_stdin.rb +9 -0
  16. data/examples/parsing/from_string.rb +15 -0
  17. data/ext/api/yajl_parse.h +3 -0
  18. data/ext/extconf.rb +2 -1
  19. data/ext/yajl.c +5 -0
  20. data/ext/yajl_ext.c +235 -122
  21. data/ext/yajl_ext.h +49 -36
  22. data/ext/yajl_lex.c +7 -0
  23. data/ext/yajl_lex.h +2 -0
  24. data/ext/yajl_parser.c +3 -1
  25. data/lib/yajl.rb +18 -17
  26. data/lib/yajl/bzip2.rb +1 -1
  27. data/lib/yajl/bzip2/stream_reader.rb +1 -1
  28. data/lib/yajl/bzip2/stream_writer.rb +1 -1
  29. data/lib/yajl/deflate.rb +1 -1
  30. data/lib/yajl/deflate/stream_reader.rb +1 -1
  31. data/lib/yajl/deflate/stream_writer.rb +1 -1
  32. data/lib/yajl/gzip.rb +1 -1
  33. data/lib/yajl/gzip/stream_reader.rb +1 -1
  34. data/lib/yajl/gzip/stream_writer.rb +1 -1
  35. data/lib/yajl/http_stream.rb +21 -5
  36. data/spec/encoding/encoding_spec.rb +14 -9
  37. data/spec/http/http_spec.rb +1 -5
  38. data/spec/parsing/active_support_spec.rb +5 -3
  39. data/spec/parsing/chunked_spec.rb +72 -0
  40. data/spec/parsing/fixtures_spec.rb +4 -2
  41. data/spec/parsing/one_off_spec.rb +2 -1
  42. data/spec/spec_helper.rb +8 -1
  43. data/yajl-ruby.gemspec +17 -3
  44. metadata +16 -2
data/ext/yajl_ext.h CHANGED
@@ -2,45 +2,58 @@
2
2
  #include "api/yajl_gen.h"
3
3
  #include <ruby.h>
4
4
 
5
- #define READ_BUFSIZE 4096
6
-
7
- static VALUE cParseError, mYajl, mStream, mChunked;
8
- static ID intern_io_read, intern_eof, intern_respond_to, intern_call, intern_keys, intern_to_s;
9
- static int readBufferSize = READ_BUFSIZE;
10
- static yajl_parser_config cfg = {1, 1};
11
-
12
- yajl_handle streamParser, chunkedParser;
13
- VALUE context = Qnil;
14
- VALUE parse_complete_callback = Qnil;
15
-
16
- void check_and_fire_callback(void * ctx);
17
- void set_static_value(void * ctx, VALUE val);
18
-
19
- static int found_null(void * ctx);
20
- static int found_boolean(void * ctx, int boolean);
21
- static int found_number(void * ctx, const char * numberVal, unsigned int numberLen);
22
- static int found_string(void * ctx, const unsigned char * stringVal, unsigned int stringLen);
23
- static int found_hash_key(void * ctx, const unsigned char * stringVal, unsigned int stringLen);
24
- static int found_start_hash(void * ctx);
25
- static int found_end_hash(void * ctx);
26
- static int found_start_array(void * ctx);
27
- static int found_end_array(void * ctx);
5
+ #define READ_BUFSIZE 65536
28
6
 
7
+ static VALUE cParseError, mYajl, cParser, cEncoder;
8
+ static ID intern_io_read, intern_eof, intern_call, intern_keys, intern_to_s,
9
+ sym_allow_comments, sym_check_utf8, sym_pretty, sym_indent;
10
+
11
+ #define GetParser(obj, sval) (sval = (struct yajl_parser_wrapper*)DATA_PTR(obj));
12
+ #define GetEncoder(obj, sval) (sval = (yajl_gen*)DATA_PTR(obj));
13
+
14
+ void yajl_check_and_fire_callback(void * ctx);
15
+ void yajl_set_static_value(void * ctx, VALUE val);
16
+ void yajl_encode_part(yajl_gen hand, VALUE obj, VALUE io);
17
+
18
+ static int yajl_found_null(void * ctx);
19
+ static int yajl_found_boolean(void * ctx, int boolean);
20
+ static int yajl_found_number(void * ctx, const char * numberVal, unsigned int numberLen);
21
+ static int yajl_found_string(void * ctx, const unsigned char * stringVal, unsigned int stringLen);
22
+ static int yajl_found_hash_key(void * ctx, const unsigned char * stringVal, unsigned int stringLen);
23
+ static int yajl_found_start_hash(void * ctx);
24
+ static int yajl_found_end_hash(void * ctx);
25
+ static int yajl_found_start_array(void * ctx);
26
+ static int yajl_found_end_array(void * ctx);
29
27
  static yajl_callbacks callbacks = {
30
- found_null,
31
- found_boolean,
28
+ yajl_found_null,
29
+ yajl_found_boolean,
32
30
  NULL,
33
31
  NULL,
34
- found_number,
35
- found_string,
36
- found_start_hash,
37
- found_hash_key,
38
- found_end_hash,
39
- found_start_array,
40
- found_end_array
32
+ yajl_found_number,
33
+ yajl_found_string,
34
+ yajl_found_start_hash,
35
+ yajl_found_hash_key,
36
+ yajl_found_end_hash,
37
+ yajl_found_start_array,
38
+ yajl_found_end_array
41
39
  };
42
40
 
43
- static VALUE t_setParseComplete(VALUE self, VALUE callback);
44
- static VALUE t_parseSome(VALUE self, VALUE string);
45
- static VALUE t_parse(VALUE self, VALUE io);
46
- static VALUE t_encode(VALUE self, VALUE obj, VALUE io);
41
+ struct yajl_parser_wrapper {
42
+ VALUE builderStack;
43
+ VALUE parse_complete_callback;
44
+ int nestedArrayLevel;
45
+ int nestedHashLevel;
46
+ yajl_handle parser;
47
+ };
48
+ static void yajl_parser_wrapper_free(void * wrapper);
49
+ static void yajl_parser_wrapper_mark(void * wrapper);
50
+
51
+ static VALUE rb_yajl_parser_new(int argc, VALUE * argv, VALUE self);
52
+ static VALUE rb_yajl_parser_init(int argc, VALUE * argv, VALUE self);
53
+ static VALUE rb_yajl_parser_parse(int argc, VALUE * argv, VALUE self);
54
+ static VALUE rb_yajl_parser_parse_chunk(VALUE self, VALUE chunk);
55
+ static VALUE rb_yajl_set_complete_cb(VALUE self, VALUE callback);
56
+
57
+ static VALUE rb_yajl_encoder_new(int argc, VALUE * argv, VALUE klass);
58
+ static VALUE rb_yajl_encoder_init(int argc, VALUE * argv, VALUE self);
59
+ static VALUE rb_yajl_encoder_encode(VALUE self, VALUE obj, VALUE io);
data/ext/yajl_lex.c CHANGED
@@ -129,6 +129,13 @@ yajl_lex_alloc(yajl_alloc_funcs * alloc,
129
129
  return lxr;
130
130
  }
131
131
 
132
+ yajl_lexer
133
+ yajl_lex_realloc(yajl_lexer orig) {
134
+ yajl_lexer newLxr = yajl_lex_alloc(orig->alloc, orig->allowComments, orig->validateUTF8);
135
+ yajl_lex_free(orig);
136
+ return newLxr;
137
+ }
138
+
132
139
  void
133
140
  yajl_lex_free(yajl_lexer lxr)
134
141
  {
data/ext/yajl_lex.h CHANGED
@@ -67,6 +67,8 @@ yajl_lexer yajl_lex_alloc(yajl_alloc_funcs * alloc,
67
67
  unsigned int allowComments,
68
68
  unsigned int validateUTF8);
69
69
 
70
+ yajl_lexer yajl_lex_realloc(yajl_lexer orig);
71
+
70
72
  void yajl_lex_free(yajl_lexer lexer);
71
73
 
72
74
  /**
data/ext/yajl_parser.c CHANGED
@@ -307,7 +307,9 @@ yajl_do_parse(yajl_handle hand, unsigned int * offset,
307
307
  {
308
308
  yajl_state s = yajl_bs_current(hand->stateStack);
309
309
  if (s == yajl_state_start) {
310
- yajl_bs_set(hand->stateStack, yajl_state_parse_complete);
310
+ // HACK: is this even safe to do?
311
+ // yajl_bs_set(hand->stateStack, yajl_state_parse_complete);
312
+ yajl_reset_parser(hand);
311
313
  } else if (s == yajl_state_map_need_val) {
312
314
  yajl_bs_set(hand->stateStack, yajl_state_map_got_val);
313
315
  } else {
data/lib/yajl.rb CHANGED
@@ -13,28 +13,29 @@ require 'yajl_ext'
13
13
  #
14
14
  # Ruby bindings to the excellent Yajl (Yet Another JSON Parser) ANSI C library.
15
15
  module Yajl
16
- VERSION = "0.4.8"
16
+ VERSION = "0.5.0"
17
17
 
18
- # == Yajl::Chunked
19
- #
20
- # This module contains methods for parsing JSON in chunks.
21
- # The use case here is that the caller may not be able to get access to the IO to which
22
- # JSON content is being received. Rendering Yajl::Stream dead to them.
23
- #
24
- # With the methods in this module, the caller will be able to pass in chunks of JSON content
25
- # until a full object has been parsed from said content.
26
- #
27
- # In order for this process to work correctly, the caller needs to specify a callback which
28
- # is passed the constructed object. The only requirement currently of this callback is that
29
- # it respond to #call and accept a single parameter (the object that was created from parsing).
30
- module Chunked; end
31
-
32
- # == Yajl::Stream
18
+ # == Yajl::Parser
33
19
  #
34
20
  # This module contains methods for parsing JSON directly from an IO object.
35
21
  #
36
22
  # The only basic requirment currently is that the IO object respond to #read(len) and eof?
37
23
  #
38
24
  # The IO is parsed until a complete JSON object has been read and a ruby object will be returned.
39
- module Stream; end
25
+ class Parser; end
26
+
27
+ # Deprecated
28
+ module Stream
29
+ # Deprecated
30
+ def self.parse(io)
31
+ STDERR.puts "WARNING: Yajl::Stream has be deprecated and will most likely be gone in the next release. Use the Yajl::Parser class instead."
32
+ Parser.new.parse(io)
33
+ end
34
+
35
+ # Deprecated
36
+ def self.encode(obj, io)
37
+ STDERR.puts "WARNING: Yajl::Stream has be deprecated and will most likely be gone in the next release. Use the Yajl::Encoder class instead."
38
+ Encoder.new.encode(obj, io)
39
+ end
40
+ end
40
41
  end
data/lib/yajl/bzip2.rb CHANGED
@@ -1,6 +1,6 @@
1
1
  # encoding: UTF-8
2
2
 
3
- require 'yajl' unless defined?(Yajl::Stream)
3
+ require 'yajl' unless defined?(Yajl::Parser)
4
4
 
5
5
  begin
6
6
  require 'bzip2' unless defined?(Bzip2)
@@ -15,7 +15,7 @@ module Yajl
15
15
  end
16
16
 
17
17
  def self.parse(io)
18
- Yajl::Stream.parse(new(io))
18
+ Yajl::Parser.new.parse(new(io))
19
19
  end
20
20
  end
21
21
  end
@@ -4,7 +4,7 @@ module Yajl
4
4
  # === Yajl::Bzip2::StreamWriter
5
5
  class StreamWriter < ::Bzip2::Writer
6
6
  def self.encode(obj, io)
7
- Yajl::Stream.encode(obj, new(io))
7
+ Yajl::Encoder.new.encode(obj, new(io))
8
8
  end
9
9
  end
10
10
  end
data/lib/yajl/deflate.rb CHANGED
@@ -1,6 +1,6 @@
1
1
  # encoding: UTF-8
2
2
 
3
- require 'yajl' unless defined?(Yajl::Stream)
3
+ require 'yajl' unless defined?(Yajl::Parser)
4
4
  require 'zlib' unless defined?(Zlib)
5
5
  require 'yajl/deflate/stream_reader.rb'
6
6
  require 'yajl/deflate/stream_writer.rb'
@@ -19,7 +19,7 @@ module Yajl
19
19
  alias :eof? :finished?
20
20
 
21
21
  def self.parse(io, options=nil)
22
- Yajl::Stream.parse(new(io, options))
22
+ Yajl::Parser.new.parse(new(io, options))
23
23
  end
24
24
  end
25
25
  end
@@ -9,7 +9,7 @@ module Yajl
9
9
  end
10
10
 
11
11
  def self.encode(obj, io)
12
- Yajl::Stream.encode(obj, new(io))
12
+ Yajl::Encoder.new.encode(obj, new(io))
13
13
  end
14
14
  end
15
15
  end
data/lib/yajl/gzip.rb CHANGED
@@ -1,6 +1,6 @@
1
1
  # encoding: UTF-8
2
2
 
3
- require 'yajl' unless defined?(Yajl::Stream)
3
+ require 'yajl' unless defined?(Yajl::Parser)
4
4
  require 'zlib' unless defined?(Zlib)
5
5
  require 'yajl/gzip/stream_reader.rb'
6
6
  require 'yajl/gzip/stream_writer.rb'
@@ -15,7 +15,7 @@ module Yajl
15
15
  end
16
16
 
17
17
  def self.parse(io)
18
- Yajl::Stream.parse(new(io))
18
+ Yajl::Parser.new.parse(new(io))
19
19
  end
20
20
  end
21
21
  end
@@ -4,7 +4,7 @@ module Yajl
4
4
  # === Yajl::Gzip::StreamWriter
5
5
  class StreamWriter < ::Zlib::GzipWriter
6
6
  def self.encode(obj, io)
7
- Yajl::Stream.encode(obj, new(io))
7
+ Yajl::Encoder.new.encode(obj, new(io))
8
8
  end
9
9
  end
10
10
  end
@@ -1,6 +1,6 @@
1
1
  # encoding: UTF-8
2
2
  require 'socket' unless defined?(Socket)
3
- require 'yajl' unless defined?(Yajl::Stream)
3
+ require 'yajl' unless defined?(Yajl::Parser)
4
4
 
5
5
  module Yajl
6
6
  # == Yajl::HttpStream
@@ -24,7 +24,7 @@ module Yajl
24
24
  # 3. the response is read until the end of the headers
25
25
  # 4. the _socket itself_ is passed directly to Yajl, for direct parsing off the stream;
26
26
  # As it's being received over the wire!
27
- def self.get(uri, opts = {})
27
+ def self.get(uri, opts = {}, &block)
28
28
  user_agent = opts.has_key?(['User-Agent']) ? opts['User-Agent'] : "Yajl::HttpStream #{Yajl::VERSION}"
29
29
 
30
30
  socket = TCPSocket.new(uri.host, uri.port)
@@ -61,9 +61,25 @@ module Yajl
61
61
  end
62
62
  end
63
63
  end
64
-
64
+ parser = Yajl::Parser.new
65
65
  if response_head[:headers]["Transfer-Encoding"] == 'chunked'
66
- raise Exception, "Chunked responses not supported yet (I'm working on this)"
66
+ if block_given?
67
+ parser.on_parse_complete = block
68
+ chunkLeft = 0
69
+ while !socket.eof? && (size = socket.gets.hex)
70
+ next if size == 0
71
+ json = socket.read(size)
72
+ chunkLeft = size-json.size
73
+ if chunkLeft == 0
74
+ parser << json
75
+ else
76
+ # received only part of the chunk, grab the rest
77
+ parser << socket.read(chunkLeft)
78
+ end
79
+ end
80
+ else
81
+ raise Exception, "Chunked responses detected, but no block given to handle the chunks."
82
+ end
67
83
  else
68
84
  content_type = response_head[:headers]["Content-Type"].split('; ')
69
85
  content_type = content_type.first
@@ -76,7 +92,7 @@ module Yajl
76
92
  when "bzip2"
77
93
  return Yajl::Bzip2::StreamReader.parse(socket)
78
94
  else
79
- return Yajl::Stream.parse(socket)
95
+ return Yajl::Parser.new.parse(socket)
80
96
  end
81
97
  else
82
98
  raise InvalidContentType, "The response MIME type #{content_type}"
@@ -6,18 +6,23 @@ describe "Yajl JSON encoder" do
6
6
 
7
7
  FILES.each do |file|
8
8
  it "should encode #{File.basename(file)}" do
9
- input = File.new(File.expand_path(file), 'r')
10
- hash = Yajl::Stream.parse(input)
9
+ # we don't care about testing the stream subject as it has multiple JSON strings in it
10
+ if File.basename(file) != 'twitter_stream.json'
11
+ input = File.new(File.expand_path(file), 'r')
12
+ io = StringIO.new
13
+ parser = Yajl::Parser.new
14
+ encoder = Yajl::Encoder.new
11
15
 
12
- io = StringIO.new
13
- output = Yajl::Stream.encode(hash, io)
14
- io.rewind
15
- hash2 = Yajl::Stream.parse(io)
16
+ hash = parser.parse(input)
17
+ output = encoder.encode(hash, io)
18
+ io.rewind
19
+ hash2 = parser.parse(io)
16
20
 
17
- io.close
18
- input.close
21
+ io.close
22
+ input.close
19
23
 
20
- hash.should == hash2
24
+ hash.should == hash2
25
+ end
21
26
  end
22
27
  end
23
28
  end
@@ -21,7 +21,7 @@ describe "Yajl HTTP GET request" do
21
21
  @gzip = File.new(File.expand_path(File.dirname(__FILE__) + '/fixtures/http.gzip.dump'), 'r')
22
22
 
23
23
  parse_off_headers(@raw)
24
- @raw_template_hash = Yajl::Stream.parse(@raw)
24
+ @raw_template_hash = Yajl::Parser.new.parse(@raw)
25
25
  @raw.rewind
26
26
  end
27
27
 
@@ -32,10 +32,6 @@ describe "Yajl HTTP GET request" do
32
32
  @gzip.close unless @gzip.closed?
33
33
  end
34
34
 
35
- after(:each) do
36
- GC.start
37
- end
38
-
39
35
  it "should parse a raw response" do
40
36
  file = File.expand_path(File.dirname(__FILE__) + '/http/http.raw.dump')
41
37
  uri = 'file://'+file
@@ -37,14 +37,16 @@ describe "ActiveSupport test cases" do
37
37
  TESTS.each do |json, expected|
38
38
  it "should be able to parse #{json}" do
39
39
  lambda {
40
- Yajl::Stream.parse(StringIO.new(json)).should == expected
40
+ parser = Yajl::Parser.new
41
+ parser.parse(StringIO.new(json)).should == expected
41
42
  }.should_not raise_error(Yajl::ParseError)
42
43
  end
43
44
  end
44
45
 
45
46
  it "should fail parsing {: 1}" do
46
47
  lambda {
47
- Yajl::Stream.parse(StringIO.new("{: 1}"))
48
- }.should raise_error(Yajl::ParseError)
48
+ parser = Yajl::Parser.new
49
+ parser.parse(StringIO.new("{: 1}"))
50
+ }.should raise_error(Yajl::ParseError)
49
51
  end
50
52
  end
@@ -0,0 +1,72 @@
1
+ # encoding: UTF-8
2
+ require File.expand_path(File.dirname(__FILE__) + '/../spec_helper.rb')
3
+ require 'stringio'
4
+
5
+ describe "Chunked parser" do
6
+ before(:all) do
7
+ @final = [{"abc" => 123}, {"def" => 456}]
8
+ end
9
+
10
+ before(:each) do
11
+ @callback = lambda { |hash|
12
+ # no-op
13
+ }
14
+ @parser = Yajl::Parser.new
15
+ @parser.on_parse_complete = @callback
16
+ end
17
+
18
+ it "should parse a single chunk" do
19
+ @callback.should_receive(:call).with(@final)
20
+ @parser << '[{"abc": 123},{"def": 456}]'
21
+ end
22
+
23
+ it "should parse a single chunk, 3 times" do
24
+ @callback.should_receive(:call).with(@final).exactly(3).times
25
+ @parser << '[{"abc": 123},{"def": 456}]'
26
+ @parser << '[{"abc": 123},{"def": 456}]'
27
+ @parser << '[{"abc": 123},{"def": 456}]'
28
+ end
29
+
30
+ it "should parse in two chunks" do
31
+ @callback.should_receive(:call).with(@final)
32
+ @parser << '[{"abc": 123},'
33
+ @parser << '{"def": 456}]'
34
+ end
35
+
36
+ it "should parse in 2 chunks, twice" do
37
+ @callback.should_receive(:call).with(@final).exactly(2).times
38
+ @parser << '[{"abc": 123},'
39
+ @parser << '{"def": 456}]'
40
+ @parser << '[{"abc": 123},'
41
+ @parser << '{"def": 456}]'
42
+ end
43
+
44
+ it "should parse 2 JSON strings, in 3 chunks" do
45
+ @callback.should_receive(:call).with(@final).exactly(2).times
46
+ @parser << '[{"abc": 123},'
47
+ @parser << '{"def": 456}][{"abc": 123},{"def":'
48
+ @parser << ' 456}]'
49
+ end
50
+
51
+ it "should parse 2 JSON strings in 1 chunk" do
52
+ @callback.should_receive(:call).with(@final).exactly(2).times
53
+ @parser << '[{"abc": 123},{"def": 456}][{"abc": 123},{"def": 456}]'
54
+ end
55
+
56
+ it "should parse 2 JSON strings from an IO" do
57
+ @callback.should_receive(:call).with(@final).exactly(2).times
58
+ @parser.parse(StringIO.new('[{"abc": 123},{"def": 456}][{"abc": 123},{"def": 456}]'))
59
+ end
60
+
61
+ it "should parse a JSON string an IO and fire callback once" do
62
+ @callback.should_receive(:call).with(@final)
63
+ @parser.parse(StringIO.new('[{"abc": 123},{"def": 456}]'))
64
+ end
65
+
66
+ it "should parse twitter_stream.json and fire callback 430 times" do
67
+ path = File.expand_path(File.dirname(__FILE__) + '/../../benchmark/subjects/twitter_stream.json')
68
+ json = File.new(path, 'r')
69
+ @callback.should_receive(:call).exactly(430).times
70
+ @parser.parse(json)
71
+ end
72
+ end