rroonga 12.0.2 → 12.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9e1afd17d5a479b6e5c472729d3548f3551b15cec0db30bbf46f8a43266d96a1
4
- data.tar.gz: 05b218925dfbf29f4deb9d34c5ab6408cbcecdf11de588d2c8d45efaf0e28417
3
+ metadata.gz: a85769bf192cecc677daaaddb349b4728bdeb7d240d1ecb4fc9660495c5fa2de
4
+ data.tar.gz: 05375332c1e8f93a9e1fb1b584a9e92d8923af6848ffdef223702a17d32852a8
5
5
  SHA512:
6
- metadata.gz: ce775711f0f90338164a3f5c3f5c7731b9f8d06e8563836f0adb7de325f471d43d9be5bb09a428f2598e801dca502564fbea3b9cb285bb38d8f7310cd22a7251
7
- data.tar.gz: fe208b1bf0ee5e04fbccbe6220b2b0b44e80e9825c8b9aaab86017d7d87de8185a7a51eea026532b6d13fdcd310cab7e8484702514e5b256d6957f984107f2c2
6
+ metadata.gz: 3ec5e421bb75cdf1c944fcf36f70778f59113da9a8c4c20df7318fd56e21a1f00856fe9a9f308cab584183c32cbf055758b3e6bd52d415771c3f0c28080588bf
7
+ data.tar.gz: d7d630a97a5831323c70ececc208016d4a5a02b4ddd476e043038eca71e8b6d37410d01c7eac8d5eb71b0788d7fbcc915d76737dfa38c0f3db02f1cc92b9884f
data/doc/text/news.md CHANGED
@@ -1,5 +1,21 @@
1
1
  # NEWS
2
2
 
3
+ ## 12.1.0: 2022-12-05 {#version-12-1-0}
4
+
5
+ ### Fixes
6
+
7
+ * [{Groonga::PatriciaTrie}] Fixed a bug that `Groonga::PatriciaTrie#scan` returned wrong offsets if there are many hit words. [GitHub#207][Patch by Naoya Murakami]
8
+
9
+ ### Thanks
10
+
11
+ * Naoya Murakami
12
+
13
+ ## 12.0.8: 2022-09-28 {#version-12-0-8}
14
+
15
+ ### Improvements
16
+
17
+ * Add support for creating a {Groonga::PatriciaTrie} without database.
18
+
3
19
  ## 12.0.2: 2022-04-04 {#version-12-0-2}
4
20
 
5
21
  ### Improvements
@@ -1,7 +1,7 @@
1
1
  /* -*- coding: utf-8; mode: C; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2
2
  /* vim: set sts=4 sw=4 ts=8 noet: */
3
3
  /*
4
- Copyright (C) 2009-2021 Sutou Kouhei <kou@clear-code.com>
4
+ Copyright (C) 2009-2022 Sutou Kouhei <kou@clear-code.com>
5
5
 
6
6
  This library is free software; you can redistribute it and/or
7
7
  modify it under the terms of the GNU Lesser General Public
@@ -129,11 +129,45 @@ VALUE rb_cGrnPatriciaTrie;
129
129
  *
130
130
  * 省略した場合はShortText型をキーとして使用する。この場合、
131
131
  * 4096バイトまで使用可能である。
132
+ *
133
+ * @option options :key_size (nil) The key size in bytes or the
134
+ * max key size in bytes. If `key_variable_size: true` is also
135
+ * specified, the size is the max key size not the key size.
136
+ *
137
+ * This is used only when the `:context` isn't associated with a
138
+ * database. If `:context` is associated with a database, this
139
+ * value is ignored.
140
+ *
141
+ * @see :key_type
142
+ * @see :key_variable_size
143
+ * @since 12.0.8
144
+ *
145
+ * @option options :key_variable_size (false) Whether the key is
146
+ * variable size or not.
147
+ *
148
+ * This is used only when the `:context` isn't associated with a
149
+ * database. If `:context` is associated with a database, this
150
+ * value is ignored.
151
+ *
152
+ * @see :key_type
153
+ * @see :key_size
154
+ * @since 12.0.8
155
+ *
132
156
  * @option options :value_type
133
157
  * 値の型を指定する。省略すると値のための領域を確保しない。
134
158
  * 値を保存したい場合は必ず指定すること。
135
159
  *
136
160
  * 参考: {Groonga::Type.new}
161
+ *
162
+ * @option options :value_size (nil) The value size in bytes.
163
+ *
164
+ * This is used only when the `:context` isn't associated with a
165
+ * database. If `:context` is associated with a database, this
166
+ * value is ignored.
167
+ *
168
+ * @see :value_type
169
+ * @since 12.0.8
170
+ *
137
171
  * @option options :default_tokenizer
138
172
  * {Groonga::IndexColumn} で使用するトークナイザを指定する。
139
173
  * デフォルトでは何も設定されていないので、テーブルに
@@ -170,7 +204,10 @@ rb_grn_patricia_trie_s_create (int argc, VALUE *argv, VALUE klass)
170
204
  VALUE rb_table;
171
205
  VALUE options, rb_context, rb_name, rb_path, rb_persistent;
172
206
  VALUE rb_key_normalize, rb_key_with_sis, rb_key_type;
207
+ VALUE rb_key_size;
208
+ VALUE rb_key_variable_size;
173
209
  VALUE rb_value_type;
210
+ VALUE rb_value_size;
174
211
  VALUE rb_default_tokenizer;
175
212
  VALUE rb_token_filters;
176
213
  VALUE rb_sub_records;
@@ -186,7 +223,10 @@ rb_grn_patricia_trie_s_create (int argc, VALUE *argv, VALUE klass)
186
223
  "key_normalize", &rb_key_normalize,
187
224
  "key_with_sis", &rb_key_with_sis,
188
225
  "key_type", &rb_key_type,
226
+ "key_size", &rb_key_size,
227
+ "key_variable_size", &rb_key_variable_size,
189
228
  "value_type", &rb_value_type,
229
+ "value_size", &rb_value_size,
190
230
  "default_tokenizer", &rb_default_tokenizer,
191
231
  "token_filters", &rb_token_filters,
192
232
  "sub_records", &rb_sub_records,
@@ -227,10 +267,26 @@ rb_grn_patricia_trie_s_create (int argc, VALUE *argv, VALUE klass)
227
267
  if (RVAL2CBOOL(rb_sub_records))
228
268
  flags |= GRN_OBJ_WITH_SUBREC;
229
269
 
230
- table = grn_table_create(context, name, name_size, path,
231
- flags, key_type, value_type);
232
- if (!table)
233
- rb_grn_context_check(context, rb_ary_new_from_values(argc, argv));
270
+ if (grn_ctx_db(context)) {
271
+ table = grn_table_create(context, name, name_size, path,
272
+ flags, key_type, value_type);
273
+ if (!table)
274
+ rb_grn_context_check(context, rb_ary_new_from_values(argc, argv));
275
+ } else {
276
+ unsigned int key_size = NUM2UINT(rb_key_size);
277
+ unsigned int value_size =
278
+ RB_NIL_P(rb_value_size) ? 0 : NUM2UINT(rb_value_size);
279
+ if (RVAL2CBOOL(rb_key_variable_size)) {
280
+ flags |= GRN_OBJ_KEY_VAR_SIZE;
281
+ }
282
+ table = (grn_obj *)grn_pat_create(context,
283
+ path,
284
+ key_size,
285
+ value_size,
286
+ flags);
287
+ if (!table)
288
+ rb_grn_context_check(context, rb_ary_new_from_values(argc, argv));
289
+ }
234
290
  rb_table = GRNOBJECT2RVAL(klass, context, table, GRN_TRUE);
235
291
 
236
292
  if (!NIL_P(rb_default_tokenizer))
@@ -389,10 +445,14 @@ rb_grn_patricia_trie_scan (VALUE self, VALUE rb_string)
389
445
  grn_pat_scan_hit hits[1024];
390
446
  const char *string;
391
447
  long string_length;
448
+ const char *original_string;
449
+ long rest_offset;
392
450
  grn_bool block_given;
393
451
 
394
452
  string = StringValuePtr(rb_string);
395
453
  string_length = RSTRING_LEN(rb_string);
454
+ original_string = string;
455
+ rest_offset = 0;
396
456
 
397
457
  rb_grn_table_key_support_deconstruct(SELF(self), &table, &context,
398
458
  NULL, NULL, NULL,
@@ -425,7 +485,7 @@ rb_grn_patricia_trie_scan (VALUE self, VALUE rb_string)
425
485
  matched_info = rb_ary_new_from_args(4,
426
486
  record,
427
487
  term,
428
- UINT2NUM(hits[i].offset),
488
+ UINT2NUM(hits[i].offset + rest_offset),
429
489
  UINT2NUM(hits[i].length));
430
490
  if (block_given) {
431
491
  rb_yield(matched_info);
@@ -434,6 +494,7 @@ rb_grn_patricia_trie_scan (VALUE self, VALUE rb_string)
434
494
  }
435
495
  previous_offset = hits[i].offset;
436
496
  }
497
+ rest_offset = rest - original_string;
437
498
  string_length -= rest - string;
438
499
  string = rest;
439
500
  }
@@ -712,15 +712,11 @@ rb_grn_table_get_columns (int argc, VALUE *argv, VALUE self)
712
712
  {
713
713
  grn_ctx *context = NULL;
714
714
  grn_obj *table;
715
- grn_obj *columns;
716
- grn_obj *key_type;
717
- grn_rc rc;
715
+ grn_hash *columns;
718
716
  int n;
719
- grn_table_cursor *cursor;
720
717
  VALUE rb_prefix, rb_columns;
721
718
  char *prefix = NULL;
722
719
  unsigned prefix_size = 0;
723
- VALUE exception;
724
720
 
725
721
  rb_grn_table_deconstruct(SELF(self), &table, &context,
726
722
  NULL, NULL,
@@ -734,23 +730,27 @@ rb_grn_table_get_columns (int argc, VALUE *argv, VALUE self)
734
730
  prefix_size = RSTRING_LEN(rb_prefix);
735
731
  }
736
732
 
737
- key_type = grn_ctx_at(context, GRN_DB_SHORT_TEXT);
738
- columns = grn_table_create(context, NULL, 0, NULL, GRN_TABLE_HASH_KEY,
739
- key_type, 0);
733
+ columns = grn_hash_create(context,
734
+ NULL,
735
+ sizeof(grn_id),
736
+ 0,
737
+ GRN_OBJ_TABLE_HASH_KEY|GRN_HASH_TINY);
740
738
  rb_grn_context_check(context, self);
741
- n = grn_table_columns(context, table, prefix, prefix_size, columns);
739
+ n = grn_table_columns(context,
740
+ table,
741
+ prefix,
742
+ prefix_size,
743
+ (grn_obj *)columns);
742
744
  rb_grn_context_check(context, self);
743
745
 
744
746
  rb_columns = rb_ary_new2(n);
745
747
  if (n == 0) {
746
- grn_obj_unlink(context, columns);
748
+ grn_hash_close(context, columns);
747
749
  return rb_columns;
748
750
  }
749
751
 
750
- cursor = grn_table_cursor_open(context, columns, NULL, 0, NULL, 0,
751
- 0, -1, GRN_CURSOR_ASCENDING);
752
- rb_grn_context_check(context, self);
753
- while (grn_table_cursor_next(context, cursor) != GRN_ID_NIL) {
752
+ VALUE exception = RUBY_Qnil;
753
+ GRN_HASH_EACH_BEGIN(context, columns, cursor, id) {
754
754
  void *key;
755
755
  grn_id *column_id;
756
756
  grn_obj *column;
@@ -758,14 +758,12 @@ rb_grn_table_get_columns (int argc, VALUE *argv, VALUE self)
758
758
  grn_user_data *user_data;
759
759
  grn_bool need_to_set_name = GRN_FALSE;
760
760
 
761
- grn_table_cursor_get_key(context, cursor, &key);
761
+ grn_hash_cursor_get_key(context, cursor, &key);
762
762
  column_id = key;
763
763
  column = grn_ctx_at(context, *column_id);
764
764
  exception = rb_grn_context_to_exception(context, self);
765
- if (!NIL_P(exception)) {
766
- grn_table_cursor_close(context, cursor);
767
- grn_obj_unlink(context, columns);
768
- rb_exc_raise(exception);
765
+ if (!RB_NIL_P(exception)) {
766
+ break;
769
767
  }
770
768
 
771
769
  user_data = grn_obj_user_data(context, column);
@@ -784,13 +782,12 @@ rb_grn_table_get_columns (int argc, VALUE *argv, VALUE self)
784
782
  }
785
783
 
786
784
  rb_ary_push(rb_columns, rb_column);
785
+ } GRN_HASH_EACH_END(context, cursor);
786
+ grn_hash_close(context, columns);
787
+ if (!RB_NIL_P(exception)) {
788
+ rb_exc_raise(exception);
787
789
  }
788
- rc = grn_table_cursor_close(context, cursor);
789
- grn_obj_unlink(context, columns);
790
- if (rc != GRN_SUCCESS) {
791
- rb_grn_context_check(context, self);
792
- rb_grn_rc_check(rc, self);
793
- }
790
+ rb_grn_context_check(context, self);
794
791
 
795
792
  return rb_columns;
796
793
  }
data/ext/groonga/rb-grn.h CHANGED
@@ -92,8 +92,8 @@ RB_GRN_BEGIN_DECLS
92
92
  #define RB_GRN_HAVE_FLOAT32 GRN_VERSION_OR_LATER(10, 0, 2)
93
93
 
94
94
  #define RB_GRN_MAJOR_VERSION 12
95
- #define RB_GRN_MINOR_VERSION 0
96
- #define RB_GRN_MICRO_VERSION 2
95
+ #define RB_GRN_MINOR_VERSION 1
96
+ #define RB_GRN_MICRO_VERSION 0
97
97
 
98
98
  #define RB_GRN_OBJECT(object) ((RbGrnObject *)(object))
99
99
  #define RB_GRN_NAMED_OBJECT(object) ((RbGrnNamedObject *)(object))
@@ -1,6 +1,4 @@
1
- # -*- coding: utf-8 -*-
2
- #
3
- # Copyright (C) 2009-2014 Kouhei Sutou <kou@clear-code.com>
1
+ # Copyright (C) 2009-2022 Sutou Kouhei <kou@clear-code.com>
4
2
  #
5
3
  # This library is free software; you can redistribute it and/or
6
4
  # modify it under the terms of the GNU Lesser General Public
@@ -156,6 +154,44 @@ class PatriciaTrieTest < Test::Unit::TestCase
156
154
  words.scan('muTEki リンクの冒険 ミリバール アルパカ ガッ'))
157
155
  end
158
156
 
157
+ def test_scan_for_many_words
158
+ Groonga::Context.default_options = {:encoding => "utf-8"}
159
+ words = Groonga::PatriciaTrie.create(:key_type => "ShortText",
160
+ :key_normalize => true)
161
+ words.add("x")
162
+ dot = words.add(".")
163
+ longtext = ""
164
+ scanned = []
165
+
166
+ 1025.times.each do |i|
167
+ longtext += "."
168
+ scanned.push([dot, ".", i, 1])
169
+ end
170
+ assert_equal(scanned,
171
+ words.scan(longtext))
172
+ end
173
+
174
+ def test_scan_no_database
175
+ Groonga::Context.open(encoding: "utf-8") do |context|
176
+ Groonga::PatriciaTrie.create(context: context,
177
+ key_size: 4096,
178
+ key_variable_size: true) do |words|
179
+ words.add("リンク")
180
+ arupaka = words.add("アルパカ")
181
+ words.add("アルパカ(生物)")
182
+ adventure_of_link = words.add('リンクの冒険')
183
+ words.add('冒険')
184
+ words.add('ガッ')
185
+ words.add('MUTEKI')
186
+ assert_equal([
187
+ [adventure_of_link, "リンクの冒険", 7, 18],
188
+ [arupaka, "アルパカ", 42, 12],
189
+ ],
190
+ words.scan('muTEki リンクの冒険 ミリバール アルパカ ガッ'))
191
+ end
192
+ end
193
+ end
194
+
159
195
  def test_tag_keys
160
196
  Groonga::Context.default_options = {:encoding => "utf-8"}
161
197
  words = Groonga::PatriciaTrie.create(:key_type => "ShortText",
data/test/test-remote.rb CHANGED
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2009-2019 Kouhei Sutou <kou@clear-code.com>
1
+ # Copyright (C) 2009-2022 Sutou Kouhei <kou@clear-code.com>
2
2
  #
3
3
  # This library is free software; you can redistribute it and/or
4
4
  # modify it under the terms of the GNU Lesser General Public
@@ -100,6 +100,7 @@ class RemoteTest < Test::Unit::TestCase
100
100
  "command_version",
101
101
  "default_command_version",
102
102
  "max_command_version",
103
+ "memory_map_size",
103
104
  "n_jobs",
104
105
  "n_queries",
105
106
  "start_time",
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rroonga
3
3
  version: !ruby/object:Gem::Version
4
- version: 12.0.2
4
+ version: 12.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kouhei Sutou
@@ -9,10 +9,10 @@ authors:
9
9
  - daijiro
10
10
  - Yuto HAYAMIZU
11
11
  - SHIDARA Yoji
12
- autorequire:
12
+ autorequire:
13
13
  bindir: bin
14
14
  cert_chain: []
15
- date: 2022-04-04 00:00:00.000000000 Z
15
+ date: 2022-12-06 00:00:00.000000000 Z
16
16
  dependencies:
17
17
  - !ruby/object:Gem::Dependency
18
18
  name: groonga-client
@@ -371,7 +371,7 @@ licenses:
371
371
  - LGPL-2.1
372
372
  metadata:
373
373
  msys2_mingw_dependencies: groonga>=12.0.2
374
- post_install_message:
374
+ post_install_message:
375
375
  rdoc_options: []
376
376
  require_paths:
377
377
  - lib
@@ -387,8 +387,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
387
387
  - !ruby/object:Gem::Version
388
388
  version: '0'
389
389
  requirements: []
390
- rubygems_version: 3.4.0.dev
391
- signing_key:
390
+ rubygems_version: 3.3.24
391
+ signing_key:
392
392
  specification_version: 4
393
393
  summary: Ruby bindings for Groonga that provide full text search and column store
394
394
  features.