java_bin 0.3.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +3 -0
- data/README.rdoc +10 -3
- data/VERSION +1 -1
- data/ext/java_bin/ext/parser.c +25 -14
- data/ext/java_bin/ext/parser.h +2 -11
- data/java_bin.gemspec +2 -2
- data/test/test_java_bin_parser.rb +8 -2
- metadata +2 -2
data/CHANGELOG
CHANGED
data/README.rdoc
CHANGED
|
@@ -4,7 +4,7 @@ This is an Apache Solr JavaBin format (binary format) implementation for Ruby.
|
|
|
4
4
|
|
|
5
5
|
== Features
|
|
6
6
|
|
|
7
|
-
* fast parse, and less network traffic.
|
|
7
|
+
* fast parse(2.5 - 5 times faster than ruby eval), and less network traffic.
|
|
8
8
|
MRI 1.8.7
|
|
9
9
|
[data1]
|
|
10
10
|
ruby eval parse. 5000 times. elapsed time 1.282744
|
|
@@ -45,8 +45,8 @@ This is an Apache Solr JavaBin format (binary format) implementation for Ruby.
|
|
|
45
45
|
== Requirements
|
|
46
46
|
|
|
47
47
|
* Ruby1.8.7 or later (include 1.9.x)
|
|
48
|
-
* (recommended) C compiler: you can also use java_bin without c extension, but 'pure' is 30 times slower than 'ext'.
|
|
49
|
-
* JavaBin has been tested with MRI 1.8.7, REE 1.8.7, YARV 1.9.2 pre1 on Ubuntu Linux 9.10 (32bit) and MRI 1.8.6 on Windows Vista
|
|
48
|
+
* (recommended) C compiler (gcc or vc++): you can also use java_bin without c extension, but 'pure' is 30 times slower than 'ext'.
|
|
49
|
+
* JavaBin has been tested with MRI 1.8.7, REE 1.8.7, YARV 1.9.2 pre1 on Ubuntu Linux 9.10 (32bit) and MRI 1.8.6 on Windows Vista (32bit), and Apache Solr 1.4
|
|
50
50
|
|
|
51
51
|
== Install
|
|
52
52
|
|
|
@@ -55,6 +55,11 @@ This is an Apache Solr JavaBin format (binary format) implementation for Ruby.
|
|
|
55
55
|
gem install java_bin
|
|
56
56
|
|
|
57
57
|
(Windows)
|
|
58
|
+
fix (path to)\ruby\lib\ruby\1.8\i386-mswin32\config.h
|
|
59
|
+
#if _MSC_VER != 1200
|
|
60
|
+
↓
|
|
61
|
+
#if _MSC_VER < 1200
|
|
62
|
+
|
|
58
63
|
start => visual studio c++ 200x xxxx edition => visual studio tools => visual studio 200x command prompt
|
|
59
64
|
gem sources -a http://gemcutter.org
|
|
60
65
|
gem install java_bin
|
|
@@ -86,6 +91,8 @@ This is an Apache Solr JavaBin format (binary format) implementation for Ruby.
|
|
|
86
91
|
|
|
87
92
|
* more parse speed
|
|
88
93
|
* license
|
|
94
|
+
* pure parse encoding bug (1.9.x)
|
|
95
|
+
* shared string over parsing
|
|
89
96
|
* windows build(1.9.x)
|
|
90
97
|
* 64bit build
|
|
91
98
|
* builder(writer)作成
|
data/VERSION
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
0.3.
|
|
1
|
+
0.3.1
|
data/ext/java_bin/ext/parser.c
CHANGED
|
@@ -72,11 +72,11 @@ static VALUE JavaBinParser_read_small_long(JAVA_BIN_PARSER* ptr) {
|
|
|
72
72
|
static VALUE JavaBinParser_read_string(JAVA_BIN_PARSER* ptr) {
|
|
73
73
|
int size;
|
|
74
74
|
int i;
|
|
75
|
-
|
|
75
|
+
int start;
|
|
76
76
|
unsigned char b;
|
|
77
77
|
|
|
78
78
|
size = JavaBinParser_read_size(ptr);
|
|
79
|
-
|
|
79
|
+
start = ptr->current;
|
|
80
80
|
for (i = 0; i < size; i++) {
|
|
81
81
|
/* HINT. read utf-8 char */
|
|
82
82
|
b = _getbyte(ptr);
|
|
@@ -87,8 +87,7 @@ static VALUE JavaBinParser_read_string(JAVA_BIN_PARSER* ptr) {
|
|
|
87
87
|
_skipbytes(ptr, 2);
|
|
88
88
|
} /* TODO 4byte以上のケース? */
|
|
89
89
|
}
|
|
90
|
-
ptr->
|
|
91
|
-
return _utf8_string((const char*) &ptr->data[ptr->last_string_offset], ptr->last_string_len);
|
|
90
|
+
return _utf8_string((const char*) &ptr->data[start], ptr->current - start);
|
|
92
91
|
}
|
|
93
92
|
|
|
94
93
|
static VALUE JavaBinParser_read_byte(JAVA_BIN_PARSER* ptr) {
|
|
@@ -140,7 +139,7 @@ static VALUE JavaBinParser_read_double(JAVA_BIN_PARSER* ptr) {
|
|
|
140
139
|
}
|
|
141
140
|
|
|
142
141
|
static void JavaBinParser_extend_cache(JAVA_BIN_PARSER* ptr) {
|
|
143
|
-
|
|
142
|
+
VALUE* newP;
|
|
144
143
|
int next_size;
|
|
145
144
|
if (ptr->cache == NULL) {
|
|
146
145
|
next_size = 64;
|
|
@@ -148,13 +147,13 @@ static void JavaBinParser_extend_cache(JAVA_BIN_PARSER* ptr) {
|
|
|
148
147
|
next_size = ptr->cache_size * 2;
|
|
149
148
|
}
|
|
150
149
|
|
|
151
|
-
newP = (
|
|
150
|
+
newP = (VALUE*) malloc(next_size * sizeof(VALUE));
|
|
152
151
|
if (!newP) {
|
|
153
152
|
rb_raise(rb_eRuntimeError, "JavaBinParser_extend_cache - allocate error");
|
|
154
153
|
}
|
|
155
154
|
|
|
156
155
|
if (ptr->cache) {
|
|
157
|
-
memcpy(newP, ptr->cache, sizeof(
|
|
156
|
+
memcpy(newP, ptr->cache, sizeof(VALUE) * ptr->cache_size);
|
|
158
157
|
}
|
|
159
158
|
ptr->cache = newP;
|
|
160
159
|
ptr->cache_size = next_size;
|
|
@@ -186,17 +185,19 @@ static VALUE JavaBinParser_read_val(JAVA_BIN_PARSER* ptr) {
|
|
|
186
185
|
/* rubyの文字列 */
|
|
187
186
|
value = JavaBinParser_read_val(ptr);
|
|
188
187
|
|
|
189
|
-
/*
|
|
190
|
-
ptr->cache[ptr->cache_index]
|
|
191
|
-
ptr->cache[ptr->cache_index].len = ptr->last_string_len;
|
|
192
|
-
ptr->cache_index ++;
|
|
188
|
+
/* 参照文字列として文字列を保持 */
|
|
189
|
+
ptr->cache[ptr->cache_index++] = value;
|
|
193
190
|
/* 参照文字列用のcacheを拡張する */
|
|
194
191
|
if (ptr->cache_size <= ptr->cache_index) {
|
|
195
192
|
JavaBinParser_extend_cache(ptr);
|
|
196
193
|
}
|
|
197
194
|
return value;
|
|
198
195
|
} else {
|
|
199
|
-
return
|
|
196
|
+
return rb_str_new4(ptr->cache[size - 1]); // freeze共有
|
|
197
|
+
//return rb_str_new3(ptr->cache[size - 1]); // 共有(変更があったら分裂)
|
|
198
|
+
|
|
199
|
+
//return ptr->cache[size - 1]; // 同じ物
|
|
200
|
+
//return rb_str_dup(ptr->cache[size - 1]); // コピー
|
|
200
201
|
}
|
|
201
202
|
case SHIFTED_ORDERED_MAP:
|
|
202
203
|
case SHIFTED_NAMED_LST:
|
|
@@ -268,6 +269,7 @@ static VALUE JavaBinParser_read_val(JAVA_BIN_PARSER* ptr) {
|
|
|
268
269
|
case SOLRDOCLST:
|
|
269
270
|
hash = rb_hash_new();
|
|
270
271
|
value = JavaBinParser_read_val(ptr);
|
|
272
|
+
// TODO キーのfreeze
|
|
271
273
|
rb_hash_aset(hash, rb_str_new2("numFound"), rb_ary_entry(value, 0));
|
|
272
274
|
rb_hash_aset(hash, rb_str_new2("start"), rb_ary_entry(value, 1));
|
|
273
275
|
rb_hash_aset(hash, rb_str_new2("maxScore"), rb_ary_entry(value, 2));
|
|
@@ -288,8 +290,17 @@ static void JavaBinParser_free(JAVA_BIN_PARSER* ptr) {
|
|
|
288
290
|
}
|
|
289
291
|
}
|
|
290
292
|
|
|
293
|
+
static void JavaBinParser_mark(JAVA_BIN_PARSER* ptr) {
|
|
294
|
+
int i;
|
|
295
|
+
if (ptr) {
|
|
296
|
+
for (i = 0; i < ptr->cache_index; i++) {
|
|
297
|
+
rb_gc_mark_maybe(ptr->cache[i]);
|
|
298
|
+
}
|
|
299
|
+
}
|
|
300
|
+
}
|
|
301
|
+
|
|
291
302
|
static VALUE JavaBinParser_alloc(VALUE klass) {
|
|
292
|
-
return Data_Wrap_Struct(klass,
|
|
303
|
+
return Data_Wrap_Struct(klass, JavaBinParser_mark, JavaBinParser_free, NULL);
|
|
293
304
|
}
|
|
294
305
|
|
|
295
306
|
/*
|
|
@@ -358,7 +369,7 @@ static VALUE rb_cParser_initialize(VALUE self) {
|
|
|
358
369
|
}
|
|
359
370
|
DATA_PTR(self) = ptr;
|
|
360
371
|
|
|
361
|
-
/* 参照文字列の準備(ここでも初期化しておかないと、たまに
|
|
372
|
+
/* 参照文字列の準備(ここでも初期化しておかないと、たまにsegvしちゃいますruby 1.8.7) */
|
|
362
373
|
ptr->cache = NULL;
|
|
363
374
|
ptr->cache_index = 0;
|
|
364
375
|
|
data/ext/java_bin/ext/parser.h
CHANGED
|
@@ -55,14 +55,6 @@
|
|
|
55
55
|
#define SHIFTED_SINT (SINT >> 5)
|
|
56
56
|
#define SHIFTED_SLONG (SLONG >> 5)
|
|
57
57
|
|
|
58
|
-
/*
|
|
59
|
-
* 参照文字列情報保持用
|
|
60
|
-
*/
|
|
61
|
-
typedef struct _extern_string_info {
|
|
62
|
-
int offset;
|
|
63
|
-
int len;
|
|
64
|
-
} _EXTERN_STRING_INFO;
|
|
65
|
-
|
|
66
58
|
/*
|
|
67
59
|
* 読込処理データ保持構造体
|
|
68
60
|
*/
|
|
@@ -73,11 +65,9 @@ typedef struct java_bin_parser {
|
|
|
73
65
|
unsigned char tag_byte;
|
|
74
66
|
|
|
75
67
|
/* 外部文字列用 */
|
|
76
|
-
|
|
68
|
+
VALUE* cache;
|
|
77
69
|
int cache_size;
|
|
78
70
|
int cache_index;
|
|
79
|
-
int last_string_offset;
|
|
80
|
-
int last_string_len;
|
|
81
71
|
} JAVA_BIN_PARSER;
|
|
82
72
|
|
|
83
73
|
#ifdef HAVE_RUBY_ENCODING_H
|
|
@@ -137,6 +127,7 @@ static VALUE JavaBinParser_read_float(JAVA_BIN_PARSER* ptr);
|
|
|
137
127
|
static VALUE JavaBinParser_read_double(JAVA_BIN_PARSER* ptr);
|
|
138
128
|
static VALUE JavaBinParser_read_val(JAVA_BIN_PARSER* ptr);
|
|
139
129
|
static void JavaBinParser_free(JAVA_BIN_PARSER* ptr);
|
|
130
|
+
static void JavaBinParser_mark(JAVA_BIN_PARSER* ptr);
|
|
140
131
|
static VALUE JavaBinParser_alloc(VALUE klass);
|
|
141
132
|
static void JavaBinParser_extend_cache(JAVA_BIN_PARSER* ptr);
|
|
142
133
|
static VALUE rb_cParser_parse(VALUE self, VALUE data);
|
data/java_bin.gemspec
CHANGED
|
@@ -5,11 +5,11 @@
|
|
|
5
5
|
|
|
6
6
|
Gem::Specification.new do |s|
|
|
7
7
|
s.name = %q{java_bin}
|
|
8
|
-
s.version = "0.3.
|
|
8
|
+
s.version = "0.3.1"
|
|
9
9
|
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
|
11
11
|
s.authors = ["kennyj"]
|
|
12
|
-
s.date = %q{2010-01-
|
|
12
|
+
s.date = %q{2010-01-22}
|
|
13
13
|
s.description = %q{Apache Solr JavaBin format (binary format) implementation for Ruby.}
|
|
14
14
|
s.email = %q{kennyj@gmail.com}
|
|
15
15
|
s.extensions = ["ext/java_bin/ext/extconf.rb"]
|
|
@@ -238,7 +238,9 @@ class TestJavaBinParser < Test::Unit::TestCase
|
|
|
238
238
|
|
|
239
239
|
def test_ordered_map
|
|
240
240
|
arr = [1, (5 << 5) | 2] + [(1 << 5) | 1] + "a".unpack("C*") + [3, 8] + [(1 << 5) | 1] + "b".unpack("C*") + [3, 9]
|
|
241
|
-
|
|
241
|
+
result = @parser.parse(arr.pack("C*"))
|
|
242
|
+
assert_equal({"a" => 8, "b" => 9}, result)
|
|
243
|
+
result.each { |k,v| assert k.frozen? }
|
|
242
244
|
end
|
|
243
245
|
|
|
244
246
|
# def test_named_lst
|
|
@@ -250,7 +252,11 @@ class TestJavaBinParser < Test::Unit::TestCase
|
|
|
250
252
|
[(7 << 5) | 0] + [(1 << 5) | 3] + "あいa".unpack("C*") +
|
|
251
253
|
[(1 << 5) | 1] + "b".unpack("C*") +
|
|
252
254
|
[(7 << 5) | 1]
|
|
253
|
-
|
|
255
|
+
result = @parser.parse(arr.pack("C*"))
|
|
256
|
+
assert_equal({"a" => "あいa", "b" => "あいa"}, result)
|
|
257
|
+
|
|
258
|
+
assert_equal 'UTF-8', result['a'].encoding.to_s if result['a'].respond_to? :encoding
|
|
259
|
+
assert_equal 'UTF-8', result['b'].encoding.to_s if result['b'].respond_to? :encoding
|
|
254
260
|
end
|
|
255
261
|
|
|
256
262
|
LARGE_SIZE = 1000
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: java_bin
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.3.
|
|
4
|
+
version: 0.3.1
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- kennyj
|
|
@@ -9,7 +9,7 @@ autorequire:
|
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
11
|
|
|
12
|
-
date: 2010-01-
|
|
12
|
+
date: 2010-01-22 00:00:00 +09:00
|
|
13
13
|
default_executable:
|
|
14
14
|
dependencies: []
|
|
15
15
|
|