libcdb-ruby 0.1.1 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 7a350d25e790bf8b2e405067e69a154393a0749e
4
- data.tar.gz: 51c44fe36fd9ac0130e76a1c7d19de6ee4d8db94
3
+ metadata.gz: 16c0aecf11141408bf924eb8fb720274b00243ff
4
+ data.tar.gz: a18316f3428804a9e75b290343f14c58a874608f
5
5
  SHA512:
6
- metadata.gz: 73d37a569b353eac9c087b96498f4974115679824a3014b6133e8c7288675797e882442d6fb07adf6378dc02b9fb9a1bb0c0058bc551cfe2adcb1e0a09ef7759
7
- data.tar.gz: 452daa03315b780f5c2d222834982aedec3d552f942371b76c82bb3a4617f09af7a486220c19d251c08982b8371c48982652e31a023332d1e96e8bc6bba4864b
6
+ metadata.gz: a076b50bcebf0e90f5d889e14a8e615f41dd8287b2c1e699cfec742f7cf9e5596108f4bae23089551c1b583ab867976bfa993c552ce3f59f20445cf682564c8a
7
+ data.tar.gz: a776122e6ae6b44a1bba030cbb0c3073f0ea9e2636b4ebd39de9435c2c514291890bc1b4e693c4e79e97ab904c66e17da0e125d7a2386fe670c83d10f4989687
data/ChangeLog CHANGED
@@ -2,6 +2,16 @@
2
2
 
3
3
  = Revision history for libcdb-ruby
4
4
 
5
+ == 0.2.0 [2014-12-05]
6
+
7
+ * Added encoding support to LibCDB::CDB::Reader.
8
+ * Added LibCDB::CDB.load and LibCDB::CDB.load_file to create a database from a
9
+ dump.
10
+ * Added LibCDB::CDB.stats and LibCDB::CDB.print_stats to collect stats from a
11
+ database.
12
+ * Fixed that LibCDB::CDB::Reader#each_key and LibCDB::CDB::Reader#each_value
13
+ would not return an enumerator when no block was given.
14
+
5
15
  == 0.1.1 [2014-04-25]
6
16
 
7
17
  * <b>Dropped support for Ruby 1.9.2.</b>
data/README CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  == VERSION
4
4
 
5
- This documentation refers to libcdb-ruby version 0.1.1
5
+ This documentation refers to libcdb-ruby version 0.2.0
6
6
 
7
7
 
8
8
  == DESCRIPTION
@@ -74,7 +74,7 @@ creating and reading {constant databases}[http://cr.yp.to/cdb.html].
74
74
  == SUPPORTED PLATFORMS
75
75
 
76
76
  Linux:: MRI 1.9.3, 2.0 & 2.1 (Tested on 64-bit Ubuntu GNU/Linux
77
- with 1.9.3p488, 2.0.0p377 and 2.1.2p77)
77
+ with 1.9.3p550, 2.0.0p594 and 2.1.5p273)
78
78
  Windows:: MRI 1.9.3 (Tested on 32-bit Windows XP with 1.9.3p194)
79
79
 
80
80
 
@@ -82,7 +82,7 @@ Windows:: MRI 1.9.3 (Tested on 32-bit Windows XP with 1.9.3p194)
82
82
 
83
83
  CDB:: http://cr.yp.to/cdb.html
84
84
  TinyCDB:: http://www.corpit.ru/mjt/tinycdb.html
85
- Documentation:: https://blackwinter.github.io/libcdb-ruby/
85
+ Documentation:: https://blackwinter.github.com/libcdb-ruby
86
86
  Source code:: https://github.com/blackwinter/libcdb-ruby
87
87
  RubyGem:: https://rubygems.org/gems/libcdb-ruby
88
88
  Travis CI:: https://travis-ci.org/blackwinter/libcdb-ruby
data/Rakefile CHANGED
@@ -1,4 +1,4 @@
1
- require File.expand_path(%q{../lib/libcdb/version}, __FILE__)
1
+ require_relative 'lib/libcdb/version'
2
2
 
3
3
  begin
4
4
  require 'hen'
@@ -38,17 +38,23 @@ rcdb_##what##er_alloc(VALUE klass) {\
38
38
  rb_sys_fail(0);\
39
39
  }
40
40
 
41
- #define RCDB_RAISE_ARGS(min, max) \
41
+ #define RCDB_RAISE_ARGS0(min, max, argc) \
42
42
  rb_raise(rb_eArgError,\
43
43
  "wrong number of arguments (%d for " #min "-" #max ")", argc);
44
44
 
45
- #define RCDB_RETURN_ENUMERATOR(self, argc, argv, max) \
45
+ #define RCDB_RAISE_ARGS(min, max) RCDB_RAISE_ARGS0(min, max, argc)
46
+
47
+ #define RCDB_RETURN_ENUMERATOR0(argc, argv, max) \
46
48
  if (argc > max) {\
47
- RCDB_RAISE_ARGS(0, max)\
49
+ RCDB_RAISE_ARGS0(0, max, argc)\
48
50
  }\
49
51
  \
50
52
  RETURN_ENUMERATOR(self, argc, argv)
51
53
 
54
+ #define RCDB_RETURN_ENUMERATOR(max) RCDB_RETURN_ENUMERATOR0(argc, argv, max)
55
+
56
+ #define RCDB_RETURN_ENUMERATOR_NONE RCDB_RETURN_ENUMERATOR0(0, NULL, 0)
57
+
52
58
  #define RCDB_DEFINE_INSPECT(what) \
53
59
  static VALUE \
54
60
  rcdb_##what##er_inspect(VALUE self) {\
@@ -25,6 +25,7 @@ rcdb_reader_closed_p(VALUE self) {
25
25
  static VALUE
26
26
  rcdb_reader_initialize(VALUE self, VALUE io) {
27
27
  RCDB_INITIALIZE(read, READ, cdb, init)
28
+ rb_iv_set(self, "@encoding", rb_enc_default_external());
28
29
  return self;
29
30
  }
30
31
 
@@ -40,22 +41,20 @@ rcdb_reader_iter_push(VALUE val, VALUE ary) {
40
41
  /* Helper method */
41
42
  static VALUE
42
43
  rcdb_reader_iter_aset(VALUE pair, VALUE hash) {
43
- VALUE key = rb_ary_entry(pair, 0), val = rb_ary_entry(pair, 1), old;
44
-
45
- if (!st_lookup(RHASH_TBL(hash), key, 0)) {
46
- rb_hash_aset(hash, key, val);
47
- }
48
- else {
49
- old = rb_hash_aref(hash, key);
50
-
51
- switch (TYPE(old)) {
52
- case T_ARRAY:
53
- rb_ary_push(old, val);
54
- break;
55
- default:
56
- rb_hash_aset(hash, key, rb_ary_new3(2, old, val));
57
- break;
58
- }
44
+ VALUE key = rb_ary_entry(pair, 0);
45
+ VALUE val = rb_ary_entry(pair, 1);
46
+ VALUE old = rb_hash_aref(hash, key);
47
+
48
+ switch (TYPE(old)) {
49
+ case T_NIL:
50
+ rb_hash_aset(hash, key, val);
51
+ break;
52
+ case T_ARRAY:
53
+ rb_ary_push(old, val);
54
+ break;
55
+ default:
56
+ rb_hash_aset(hash, key, rb_ary_new3(2, old, val));
57
+ break;
59
58
  }
60
59
 
61
60
  return Qnil;
@@ -104,9 +103,9 @@ rcdb_reader_dump_pair(VALUE key, VALUE val) {
104
103
  VALUE str = rb_str_new2("");
105
104
 
106
105
  rb_str_cat2(str, "+");
107
- rb_str_append(str, rb_fix2str(LONG2NUM(RSTRING_LEN(key)), 10));
106
+ rb_str_append(str, RCDB_READER_STRING_LEN(key));
108
107
  rb_str_cat2(str, ",");
109
- rb_str_append(str, rb_fix2str(LONG2NUM(RSTRING_LEN(val)), 10));
108
+ rb_str_append(str, RCDB_READER_STRING_LEN(val));
110
109
  rb_str_cat2(str, ":");
111
110
  rb_str_append(str, key);
112
111
  rb_str_cat2(str, "->");
@@ -144,7 +143,7 @@ rcdb_reader_each(int argc, VALUE *argv, VALUE self) {
144
143
  unsigned cdbp;
145
144
  VALUE key;
146
145
 
147
- RCDB_RETURN_ENUMERATOR(self, argc, argv, 1);
146
+ RCDB_RETURN_ENUMERATOR(1);
148
147
  RCDB_READER_GET(self, cdb);
149
148
 
150
149
  if (rb_scan_args(argc, argv, "01", &key) == 1 && !NIL_P(key)) {
@@ -155,7 +154,7 @@ rcdb_reader_each(int argc, VALUE *argv, VALUE self) {
155
154
  }
156
155
 
157
156
  while (cdb_findnext(&cdbf) > 0) {
158
- rb_yield(rcdb_reader_read_data(cdb));
157
+ rb_yield(RCDB_READER_READ(data));
159
158
  }
160
159
  }
161
160
  else {
@@ -163,8 +162,8 @@ rcdb_reader_each(int argc, VALUE *argv, VALUE self) {
163
162
 
164
163
  while (cdb_seqnext(&cdbp, cdb) > 0) {
165
164
  rb_yield(rb_ary_new3(2,
166
- rcdb_reader_read_key(cdb),
167
- rcdb_reader_read_data(cdb)));
165
+ RCDB_READER_READ(key),
166
+ RCDB_READER_READ(data)));
168
167
  }
169
168
  }
170
169
 
@@ -184,9 +183,11 @@ static VALUE
184
183
  rcdb_reader_each_dump(int argc, VALUE *argv, VALUE self) {
185
184
  VALUE key;
186
185
 
187
- RCDB_RETURN_ENUMERATOR(self, argc, argv, 1);
186
+ RCDB_RETURN_ENUMERATOR(1);
188
187
 
189
188
  if (rb_scan_args(argc, argv, "01", &key) == 1 && !NIL_P(key)) {
189
+ StringValue(key);
190
+
190
191
  RCDB_READER_ITERATE0(each, yield_dump2, rb_ary_new3(1, key))
191
192
  }
192
193
  else {
@@ -210,11 +211,12 @@ rcdb_reader_each_key(VALUE self) {
210
211
  unsigned cdbp;
211
212
  VALUE key, hash = rb_hash_new();
212
213
 
214
+ RCDB_RETURN_ENUMERATOR_NONE;
213
215
  RCDB_READER_GET(self, cdb);
214
216
  cdb_seqinit(&cdbp, cdb);
215
217
 
216
218
  while (cdb_seqnext(&cdbp, cdb) > 0) {
217
- if (!st_lookup(RHASH_TBL(hash), key = rcdb_reader_read_key(cdb), 0)) {
219
+ if (NIL_P(rb_hash_lookup(hash, key = RCDB_READER_READ(key)))) {
218
220
  rb_hash_aset(hash, key, Qtrue);
219
221
  rb_yield(key);
220
222
  }
@@ -236,11 +238,12 @@ rcdb_reader_each_value(VALUE self) {
236
238
  struct cdb *cdb = NULL;
237
239
  unsigned cdbp;
238
240
 
241
+ RCDB_RETURN_ENUMERATOR_NONE;
239
242
  RCDB_READER_GET(self, cdb);
240
243
  cdb_seqinit(&cdbp, cdb);
241
244
 
242
245
  while (cdb_seqnext(&cdbp, cdb) > 0) {
243
- rb_yield(rcdb_reader_read_data(cdb));
246
+ rb_yield(RCDB_READER_READ(data));
244
247
  }
245
248
 
246
249
  return self;
@@ -274,7 +277,7 @@ rcdb_reader_fetch_first(VALUE self, VALUE key) {
274
277
  RCDB_READER_GET(self, cdb);
275
278
 
276
279
  if (cdb_find(cdb, RSTRING_PTR(key), RSTRING_LEN(key)) > 0) {
277
- val = rcdb_reader_read_data(cdb);
280
+ val = RCDB_READER_READ(data);
278
281
  }
279
282
 
280
283
  return val;
@@ -307,9 +310,7 @@ rcdb_reader_fetch_last(VALUE self, VALUE key) {
307
310
  }
308
311
 
309
312
  if (pos > 0) {
310
- val = rb_str_buf_new(len);
311
- cdb_read(cdb, RSTRING_PTR(val), len, pos);
312
- rb_str_set_len(val, len);
313
+ RCDB_READER_READ_POS(pos)
313
314
  }
314
315
 
315
316
  return val;
@@ -511,6 +512,8 @@ rcdb_init_reader(void) {
511
512
  rb_define_alloc_func(cCDBReader, rcdb_reader_alloc);
512
513
  rb_include_module(cCDBReader, rb_mEnumerable);
513
514
 
515
+ rb_define_attr(cCDBReader, "encoding", 1, 1);
516
+
514
517
  rb_define_method(cCDBReader, "close", rcdb_reader_close, 0);
515
518
  rb_define_method(cCDBReader, "closed?", rcdb_reader_closed_p, 0);
516
519
  rb_define_method(cCDBReader, "dump", rcdb_reader_dump, 0);
@@ -28,19 +28,28 @@
28
28
 
29
29
  #define RCDB_READER_DEFINE_READ(what) \
30
30
  static VALUE \
31
- rcdb_reader_read_##what(struct cdb *cdb) {\
32
- size_t len;\
33
- VALUE ret;\
31
+ rcdb_reader_read_##what(struct cdb *cdb, VALUE self) {\
32
+ size_t len = cdb_##what##len(cdb);\
33
+ VALUE val;\
34
34
  \
35
- len = cdb_##what##len(cdb);\
36
- ret = rb_str_buf_new(len);\
35
+ RCDB_READER_READ_POS(cdb_##what##pos(cdb))\
37
36
  \
38
- cdb_read(cdb, RSTRING_PTR(ret), len, cdb_##what##pos(cdb));\
39
- rb_str_set_len(ret, len);\
40
- \
41
- return ret;\
37
+ return val;\
42
38
  }
43
39
 
40
+ #define RCDB_READER_READ_POS(pos) \
41
+ val = rb_str_buf_new(len);\
42
+ \
43
+ cdb_read(cdb, RSTRING_PTR(val), len, pos);\
44
+ rb_str_set_len(val, len);\
45
+ \
46
+ rb_funcall(val, rb_intern("force_encoding"), 1, rb_iv_get(self, "@encoding"));
47
+
48
+ #define RCDB_READER_READ(what) rcdb_reader_read_##what(cdb, self)
49
+
50
+ #define RCDB_READER_STRING_LEN(str) \
51
+ rb_funcall(LONG2NUM(RSTRING_LEN(str)), rb_intern("to_s"), 0)
52
+
44
53
  extern VALUE cCDBReader;
45
54
  void rcdb_init_reader(void);
46
55
 
@@ -60,6 +60,8 @@ static void
60
60
  rcdb_writer_put_value(struct cdb_make *cdbm, VALUE key, VALUE val, enum cdb_put_mode mode) {
61
61
  long i;
62
62
 
63
+ StringValue(key);
64
+
63
65
  switch (TYPE(val)) {
64
66
  case T_ARRAY:
65
67
  switch (mode) {
@@ -132,8 +134,7 @@ rcdb_writer_put(int argc, VALUE *argv, VALUE self, enum cdb_put_mode mode) {
132
134
 
133
135
  break;
134
136
  case T_HASH:
135
- val = rb_ary_new();
136
- st_foreach(RHASH_TBL(arg), rcdb_writer_push_pair, val);
137
+ rb_hash_foreach(arg, rcdb_writer_push_pair, val = rb_ary_new());
137
138
 
138
139
  for (i = 0; i < RARRAY_LEN(val); i++) {
139
140
  tmp = rb_ary_entry(val, i);
@@ -5,8 +5,8 @@ module LibCDB
5
5
  module Version
6
6
 
7
7
  MAJOR = 0
8
- MINOR = 1
9
- TINY = 1
8
+ MINOR = 2
9
+ TINY = 0
10
10
 
11
11
  class << self
12
12
 
data/lib/libcdb.rb CHANGED
@@ -86,6 +86,116 @@ module LibCDB
86
86
  }
87
87
  end
88
88
 
89
+ # call-seq:
90
+ # CDB.load(path, dump) -> aCDB
91
+ #
92
+ # Opens +path+ for writing and loads +dump+ into the database. +dump+
93
+ # may be a string or an IO object. Returns the (unclosed) CDB object.
94
+ def load(path, dump)
95
+ require 'strscan'
96
+
97
+ s, n, e = nil, 0, lambda { |m| s.eos? ?
98
+ raise("Unexpected end of input (#{m} at #{n}).") :
99
+ raise("#{m} at #{n}:#{s.pos}: #{s.peek(16).inspect}") }
100
+
101
+ cdb = open(path, 'w+')
102
+
103
+ dump.each_line { |line|
104
+ n += 1
105
+
106
+ s = StringScanner.new(line)
107
+
108
+ e['Record identifier expected'] unless s.scan(/\+/)
109
+
110
+ e['Key length expected'] unless s.scan(/\d+/)
111
+ klen = s.matched.to_i
112
+
113
+ e['Length separator expected'] unless s.scan(/,/)
114
+
115
+ e['Value length expected'] unless s.scan(/\d+/)
116
+ vlen = s.matched.to_i
117
+
118
+ e['Key separator expected'] unless s.scan(/:/)
119
+
120
+ key = ''
121
+ klen.times { key << s.get_byte }
122
+
123
+ e['Value separator expected'] unless s.scan(/->/)
124
+
125
+ value = ''
126
+ vlen.times { value << s.get_byte }
127
+
128
+ e['Record terminator expected'] unless s.scan(/\n/)
129
+ e['Unexpected data'] unless s.eos?
130
+
131
+ cdb.store(key, value)
132
+ }
133
+
134
+ cdb
135
+ end
136
+
137
+ # call-seq:
138
+ # CDB.load_file(path, file) -> aCDB
139
+ #
140
+ # Loads the dump at +file+ into the database at +path+ (see #load).
141
+ def load_file(path, file)
142
+ File.open(file, 'rb') { |f| self.load(path, f) }
143
+ end
144
+
145
+ # call-seq:
146
+ # CDB.stats(path) -> aHash
147
+ #
148
+ # Returns a hash with the stats on +path+.
149
+ def stats(path)
150
+ {}.tap { |stats| open(path) { |cdb|
151
+ stats[:records] = cnt = cdb.total
152
+
153
+ stats[:keys] = khash = { min: Float::INFINITY, avg: 0, max: 0 }
154
+ stats[:values] = vhash = khash.dup
155
+
156
+ stats[:hash] = Hash.new(0).update(distances: Hash.new([0, 0]))
157
+
158
+ khash[:min] = vhash[:min] = 0 and break if cnt.zero?
159
+
160
+ ktot, vtot, update = 0, 0, lambda { |h, s| s.bytesize.tap { |l|
161
+ h[:min] = l if l < h[:min]
162
+ h[:max] = l if l > h[:max]
163
+ } }
164
+
165
+ cdb.each_key { |k| ktot += update[khash, k] }
166
+ cdb.each_value { |v| vtot += update[vhash, v] }
167
+
168
+ khash[:avg] = (ktot + cnt / 2) / cnt
169
+ vhash[:avg] = (vtot + cnt / 2) / cnt
170
+
171
+ # TODO: hash table stats
172
+ } }
173
+ end
174
+
175
+ # call-seq:
176
+ # CDB.print_stats(path) -> aHash
177
+ #
178
+ # Prints the #stats on +path+.
179
+ def print_stats(path)
180
+ stats(path).tap { |s|
181
+ r, k, v, h = s.values_at(:records, :keys, :values, :hash)
182
+
183
+ v1, v2 = [:min, :avg, :max], [:tables, :entries, :collisions]
184
+
185
+ puts 'number of records: %d' % r
186
+ puts 'key min/avg/max length: %d/%d/%d' % k.values_at(*v1)
187
+ puts 'val min/avg/max length: %d/%d/%d' % v.values_at(*v1)
188
+ next # TODO: hash table stats
189
+ puts 'hash tables/entries/collisions: %d/%d/%d' % h.values_at(*v2)
190
+ puts 'hash table min/avg/max length: %d/%d/%d' % h.values_at(*v1)
191
+ puts 'hash table distances:'
192
+
193
+ d = h[:distances]
194
+ 0.upto(9) { |i| puts ' d%d: %6d %2d%%' % [i, *d[i]] }
195
+ puts ' >9: %6d %2d%%' % d[-1]
196
+ }
197
+ end
198
+
89
199
  private
90
200
 
91
201
  def _open_args(path, mode)
@@ -145,11 +255,11 @@ module LibCDB
145
255
  end
146
256
 
147
257
  def_delegators :reader, :[], :dump, :each, :each_dump, :each_key,
148
- :each_value, :empty?, :fetch, :fetch_all,
149
- :fetch_first, :fetch_last, :get, :has_key?,
150
- :has_value?, :include?, :key, :key?, :keys,
151
- :length, :member?, :rget, :size, :to_a,
152
- :to_h, :total, :value?, :values, :values_at
258
+ :each_value, :empty?, :encoding, :encoding=,
259
+ :fetch, :fetch_all, :fetch_first, :fetch_last,
260
+ :get, :has_key?, :has_value?, :include?, :key,
261
+ :key?, :keys, :length, :member?, :rget, :size,
262
+ :to_a, :to_h, :total, :value?, :values, :values_at
153
263
 
154
264
  def_delegators :writer, :<<, :[]=, :add, :insert, :replace, :store
155
265
 
Binary file
File without changes
Binary file