java_bin 0.3.0 → 0.3.1
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +3 -0
- data/README.rdoc +10 -3
- data/VERSION +1 -1
- data/ext/java_bin/ext/parser.c +25 -14
- data/ext/java_bin/ext/parser.h +2 -11
- data/java_bin.gemspec +2 -2
- data/test/test_java_bin_parser.rb +8 -2
- metadata +2 -2
data/CHANGELOG
CHANGED
data/README.rdoc
CHANGED
@@ -4,7 +4,7 @@ This is an Apache Solr JavaBin format (binary format) implementation for Ruby.
|
|
4
4
|
|
5
5
|
== Features
|
6
6
|
|
7
|
-
* fast parse, and less network traffic.
|
7
|
+
* fast parse(2.5 - 5 times faster than ruby eval), and less network traffic.
|
8
8
|
MRI 1.8.7
|
9
9
|
[data1]
|
10
10
|
ruby eval parse. 5000 times. elapsed time 1.282744
|
@@ -45,8 +45,8 @@ This is an Apache Solr JavaBin format (binary format) implementation for Ruby.
|
|
45
45
|
== Requirements
|
46
46
|
|
47
47
|
* Ruby1.8.7 or later (include 1.9.x)
|
48
|
-
* (recommended) C compiler: you can also use java_bin without c extension, but 'pure' is 30 times slower than 'ext'.
|
49
|
-
* JavaBin has been tested with MRI 1.8.7, REE 1.8.7, YARV 1.9.2 pre1 on Ubuntu Linux 9.10 (32bit) and MRI 1.8.6 on Windows Vista
|
48
|
+
* (recommended) C compiler (gcc or vc++): you can also use java_bin without c extension, but 'pure' is 30 times slower than 'ext'.
|
49
|
+
* JavaBin has been tested with MRI 1.8.7, REE 1.8.7, YARV 1.9.2 pre1 on Ubuntu Linux 9.10 (32bit) and MRI 1.8.6 on Windows Vista (32bit), and Apache Solr 1.4
|
50
50
|
|
51
51
|
== Install
|
52
52
|
|
@@ -55,6 +55,11 @@ This is an Apache Solr JavaBin format (binary format) implementation for Ruby.
|
|
55
55
|
gem install java_bin
|
56
56
|
|
57
57
|
(Windows)
|
58
|
+
fix (path to)\ruby\lib\ruby\1.8\i386-mswin32\config.h
|
59
|
+
#if _MSC_VER != 1200
|
60
|
+
↓
|
61
|
+
#if _MSC_VER < 1200
|
62
|
+
|
58
63
|
start => visual studio c++ 200x xxxx edition => visual studio tools => visual studio 200x command prompt
|
59
64
|
gem sources -a http://gemcutter.org
|
60
65
|
gem install java_bin
|
@@ -86,6 +91,8 @@ This is an Apache Solr JavaBin format (binary format) implementation for Ruby.
|
|
86
91
|
|
87
92
|
* more parse speed
|
88
93
|
* license
|
94
|
+
* pure parse encoding bug (1.9.x)
|
95
|
+
* shared string over parsing
|
89
96
|
* windows build(1.9.x)
|
90
97
|
* 64bit build
|
91
98
|
* builder(writer)作成
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.3.
|
1
|
+
0.3.1
|
data/ext/java_bin/ext/parser.c
CHANGED
@@ -72,11 +72,11 @@ static VALUE JavaBinParser_read_small_long(JAVA_BIN_PARSER* ptr) {
|
|
72
72
|
static VALUE JavaBinParser_read_string(JAVA_BIN_PARSER* ptr) {
|
73
73
|
int size;
|
74
74
|
int i;
|
75
|
-
|
75
|
+
int start;
|
76
76
|
unsigned char b;
|
77
77
|
|
78
78
|
size = JavaBinParser_read_size(ptr);
|
79
|
-
|
79
|
+
start = ptr->current;
|
80
80
|
for (i = 0; i < size; i++) {
|
81
81
|
/* HINT. read utf-8 char */
|
82
82
|
b = _getbyte(ptr);
|
@@ -87,8 +87,7 @@ static VALUE JavaBinParser_read_string(JAVA_BIN_PARSER* ptr) {
|
|
87
87
|
_skipbytes(ptr, 2);
|
88
88
|
} /* TODO 4byte以上のケース? */
|
89
89
|
}
|
90
|
-
ptr->
|
91
|
-
return _utf8_string((const char*) &ptr->data[ptr->last_string_offset], ptr->last_string_len);
|
90
|
+
return _utf8_string((const char*) &ptr->data[start], ptr->current - start);
|
92
91
|
}
|
93
92
|
|
94
93
|
static VALUE JavaBinParser_read_byte(JAVA_BIN_PARSER* ptr) {
|
@@ -140,7 +139,7 @@ static VALUE JavaBinParser_read_double(JAVA_BIN_PARSER* ptr) {
|
|
140
139
|
}
|
141
140
|
|
142
141
|
static void JavaBinParser_extend_cache(JAVA_BIN_PARSER* ptr) {
|
143
|
-
|
142
|
+
VALUE* newP;
|
144
143
|
int next_size;
|
145
144
|
if (ptr->cache == NULL) {
|
146
145
|
next_size = 64;
|
@@ -148,13 +147,13 @@ static void JavaBinParser_extend_cache(JAVA_BIN_PARSER* ptr) {
|
|
148
147
|
next_size = ptr->cache_size * 2;
|
149
148
|
}
|
150
149
|
|
151
|
-
newP = (
|
150
|
+
newP = (VALUE*) malloc(next_size * sizeof(VALUE));
|
152
151
|
if (!newP) {
|
153
152
|
rb_raise(rb_eRuntimeError, "JavaBinParser_extend_cache - allocate error");
|
154
153
|
}
|
155
154
|
|
156
155
|
if (ptr->cache) {
|
157
|
-
memcpy(newP, ptr->cache, sizeof(
|
156
|
+
memcpy(newP, ptr->cache, sizeof(VALUE) * ptr->cache_size);
|
158
157
|
}
|
159
158
|
ptr->cache = newP;
|
160
159
|
ptr->cache_size = next_size;
|
@@ -186,17 +185,19 @@ static VALUE JavaBinParser_read_val(JAVA_BIN_PARSER* ptr) {
|
|
186
185
|
/* rubyの文字列 */
|
187
186
|
value = JavaBinParser_read_val(ptr);
|
188
187
|
|
189
|
-
/*
|
190
|
-
ptr->cache[ptr->cache_index]
|
191
|
-
ptr->cache[ptr->cache_index].len = ptr->last_string_len;
|
192
|
-
ptr->cache_index ++;
|
188
|
+
/* 参照文字列として文字列を保持 */
|
189
|
+
ptr->cache[ptr->cache_index++] = value;
|
193
190
|
/* 参照文字列用のcacheを拡張する */
|
194
191
|
if (ptr->cache_size <= ptr->cache_index) {
|
195
192
|
JavaBinParser_extend_cache(ptr);
|
196
193
|
}
|
197
194
|
return value;
|
198
195
|
} else {
|
199
|
-
return
|
196
|
+
return rb_str_new4(ptr->cache[size - 1]); // freeze共有
|
197
|
+
//return rb_str_new3(ptr->cache[size - 1]); // 共有(変更があったら分裂)
|
198
|
+
|
199
|
+
//return ptr->cache[size - 1]; // 同じ物
|
200
|
+
//return rb_str_dup(ptr->cache[size - 1]); // コピー
|
200
201
|
}
|
201
202
|
case SHIFTED_ORDERED_MAP:
|
202
203
|
case SHIFTED_NAMED_LST:
|
@@ -268,6 +269,7 @@ static VALUE JavaBinParser_read_val(JAVA_BIN_PARSER* ptr) {
|
|
268
269
|
case SOLRDOCLST:
|
269
270
|
hash = rb_hash_new();
|
270
271
|
value = JavaBinParser_read_val(ptr);
|
272
|
+
// TODO キーのfreeze
|
271
273
|
rb_hash_aset(hash, rb_str_new2("numFound"), rb_ary_entry(value, 0));
|
272
274
|
rb_hash_aset(hash, rb_str_new2("start"), rb_ary_entry(value, 1));
|
273
275
|
rb_hash_aset(hash, rb_str_new2("maxScore"), rb_ary_entry(value, 2));
|
@@ -288,8 +290,17 @@ static void JavaBinParser_free(JAVA_BIN_PARSER* ptr) {
|
|
288
290
|
}
|
289
291
|
}
|
290
292
|
|
293
|
+
static void JavaBinParser_mark(JAVA_BIN_PARSER* ptr) {
|
294
|
+
int i;
|
295
|
+
if (ptr) {
|
296
|
+
for (i = 0; i < ptr->cache_index; i++) {
|
297
|
+
rb_gc_mark_maybe(ptr->cache[i]);
|
298
|
+
}
|
299
|
+
}
|
300
|
+
}
|
301
|
+
|
291
302
|
static VALUE JavaBinParser_alloc(VALUE klass) {
|
292
|
-
return Data_Wrap_Struct(klass,
|
303
|
+
return Data_Wrap_Struct(klass, JavaBinParser_mark, JavaBinParser_free, NULL);
|
293
304
|
}
|
294
305
|
|
295
306
|
/*
|
@@ -358,7 +369,7 @@ static VALUE rb_cParser_initialize(VALUE self) {
|
|
358
369
|
}
|
359
370
|
DATA_PTR(self) = ptr;
|
360
371
|
|
361
|
-
/* 参照文字列の準備(ここでも初期化しておかないと、たまに
|
372
|
+
/* 参照文字列の準備(ここでも初期化しておかないと、たまにsegvしちゃいますruby 1.8.7) */
|
362
373
|
ptr->cache = NULL;
|
363
374
|
ptr->cache_index = 0;
|
364
375
|
|
data/ext/java_bin/ext/parser.h
CHANGED
@@ -55,14 +55,6 @@
|
|
55
55
|
#define SHIFTED_SINT (SINT >> 5)
|
56
56
|
#define SHIFTED_SLONG (SLONG >> 5)
|
57
57
|
|
58
|
-
/*
|
59
|
-
* 参照文字列情報保持用
|
60
|
-
*/
|
61
|
-
typedef struct _extern_string_info {
|
62
|
-
int offset;
|
63
|
-
int len;
|
64
|
-
} _EXTERN_STRING_INFO;
|
65
|
-
|
66
58
|
/*
|
67
59
|
* 読込処理データ保持構造体
|
68
60
|
*/
|
@@ -73,11 +65,9 @@ typedef struct java_bin_parser {
|
|
73
65
|
unsigned char tag_byte;
|
74
66
|
|
75
67
|
/* 外部文字列用 */
|
76
|
-
|
68
|
+
VALUE* cache;
|
77
69
|
int cache_size;
|
78
70
|
int cache_index;
|
79
|
-
int last_string_offset;
|
80
|
-
int last_string_len;
|
81
71
|
} JAVA_BIN_PARSER;
|
82
72
|
|
83
73
|
#ifdef HAVE_RUBY_ENCODING_H
|
@@ -137,6 +127,7 @@ static VALUE JavaBinParser_read_float(JAVA_BIN_PARSER* ptr);
|
|
137
127
|
static VALUE JavaBinParser_read_double(JAVA_BIN_PARSER* ptr);
|
138
128
|
static VALUE JavaBinParser_read_val(JAVA_BIN_PARSER* ptr);
|
139
129
|
static void JavaBinParser_free(JAVA_BIN_PARSER* ptr);
|
130
|
+
static void JavaBinParser_mark(JAVA_BIN_PARSER* ptr);
|
140
131
|
static VALUE JavaBinParser_alloc(VALUE klass);
|
141
132
|
static void JavaBinParser_extend_cache(JAVA_BIN_PARSER* ptr);
|
142
133
|
static VALUE rb_cParser_parse(VALUE self, VALUE data);
|
data/java_bin.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{java_bin}
|
8
|
-
s.version = "0.3.
|
8
|
+
s.version = "0.3.1"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["kennyj"]
|
12
|
-
s.date = %q{2010-01-
|
12
|
+
s.date = %q{2010-01-22}
|
13
13
|
s.description = %q{Apache Solr JavaBin format (binary format) implementation for Ruby.}
|
14
14
|
s.email = %q{kennyj@gmail.com}
|
15
15
|
s.extensions = ["ext/java_bin/ext/extconf.rb"]
|
@@ -238,7 +238,9 @@ class TestJavaBinParser < Test::Unit::TestCase
|
|
238
238
|
|
239
239
|
def test_ordered_map
|
240
240
|
arr = [1, (5 << 5) | 2] + [(1 << 5) | 1] + "a".unpack("C*") + [3, 8] + [(1 << 5) | 1] + "b".unpack("C*") + [3, 9]
|
241
|
-
|
241
|
+
result = @parser.parse(arr.pack("C*"))
|
242
|
+
assert_equal({"a" => 8, "b" => 9}, result)
|
243
|
+
result.each { |k,v| assert k.frozen? }
|
242
244
|
end
|
243
245
|
|
244
246
|
# def test_named_lst
|
@@ -250,7 +252,11 @@ class TestJavaBinParser < Test::Unit::TestCase
|
|
250
252
|
[(7 << 5) | 0] + [(1 << 5) | 3] + "あいa".unpack("C*") +
|
251
253
|
[(1 << 5) | 1] + "b".unpack("C*") +
|
252
254
|
[(7 << 5) | 1]
|
253
|
-
|
255
|
+
result = @parser.parse(arr.pack("C*"))
|
256
|
+
assert_equal({"a" => "あいa", "b" => "あいa"}, result)
|
257
|
+
|
258
|
+
assert_equal 'UTF-8', result['a'].encoding.to_s if result['a'].respond_to? :encoding
|
259
|
+
assert_equal 'UTF-8', result['b'].encoding.to_s if result['b'].respond_to? :encoding
|
254
260
|
end
|
255
261
|
|
256
262
|
LARGE_SIZE = 1000
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: java_bin
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- kennyj
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2010-01-
|
12
|
+
date: 2010-01-22 00:00:00 +09:00
|
13
13
|
default_executable:
|
14
14
|
dependencies: []
|
15
15
|
|