isomorfeus-ferret 0.17.3 → 0.17.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (33) hide show
  1. checksums.yaml +4 -4
  2. data/ext/isomorfeus_ferret_ext/frb_index.c +48 -67
  3. data/ext/isomorfeus_ferret_ext/frb_search.c +47 -47
  4. data/ext/isomorfeus_ferret_ext/frt_document.h +3 -6
  5. data/ext/isomorfeus_ferret_ext/frt_field_index.c +1 -1
  6. data/ext/isomorfeus_ferret_ext/frt_filter.c +2 -2
  7. data/ext/isomorfeus_ferret_ext/frt_ind.c +2 -2
  8. data/ext/isomorfeus_ferret_ext/frt_ind.h +1 -1
  9. data/ext/isomorfeus_ferret_ext/frt_index.c +46 -62
  10. data/ext/isomorfeus_ferret_ext/frt_index.h +3 -3
  11. data/ext/isomorfeus_ferret_ext/frt_q_boolean.c +48 -48
  12. data/ext/isomorfeus_ferret_ext/frt_q_const_score.c +2 -2
  13. data/ext/isomorfeus_ferret_ext/frt_q_filtered_query.c +4 -4
  14. data/ext/isomorfeus_ferret_ext/frt_q_match_all.c +10 -10
  15. data/ext/isomorfeus_ferret_ext/frt_q_multi_term.c +26 -26
  16. data/ext/isomorfeus_ferret_ext/frt_q_phrase.c +12 -12
  17. data/ext/isomorfeus_ferret_ext/frt_q_range.c +2 -2
  18. data/ext/isomorfeus_ferret_ext/frt_q_span.c +144 -145
  19. data/ext/isomorfeus_ferret_ext/frt_q_term.c +9 -9
  20. data/ext/isomorfeus_ferret_ext/frt_search.c +31 -31
  21. data/ext/isomorfeus_ferret_ext/frt_search.h +6 -6
  22. data/ext/isomorfeus_ferret_ext/frt_similarity.c +1 -1
  23. data/ext/isomorfeus_ferret_ext/frt_sort.c +20 -20
  24. data/ext/isomorfeus_ferret_ext/test.c +1 -1
  25. data/ext/isomorfeus_ferret_ext/test_file_deleter.c +1 -2
  26. data/ext/isomorfeus_ferret_ext/test_filter.c +5 -6
  27. data/ext/isomorfeus_ferret_ext/test_index.c +30 -32
  28. data/ext/isomorfeus_ferret_ext/test_search.c +7 -7
  29. data/ext/isomorfeus_ferret_ext/test_sort.c +3 -3
  30. data/ext/isomorfeus_ferret_ext/test_threading.c +1 -1
  31. data/lib/isomorfeus/ferret/index/index.rb +7 -7
  32. data/lib/isomorfeus/ferret/version.rb +1 -1
  33. metadata +11 -19
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: fb825b23a8f4d4422995bd20a52c24fd554c552352ba47e1af7059af60cacd33
4
- data.tar.gz: 565707f28c6ea121d2c37b77c95d9a63f4c9a6ec663c5c0c96f59c499ca26f84
3
+ metadata.gz: 5aa89943933ef18acbb6c1c12e6da3ef85e037cc41740f506c320510406e765f
4
+ data.tar.gz: 7ade42fb44635b0b4dadc40b4376142597849eb76abf474b807a54f7f97d92c5
5
5
  SHA512:
6
- metadata.gz: 3f907b655cdb94f7b69422da855b0972229cf37170e4ede27ded73ae46c77f7e16745e5d1a86856e40bc4e73dda042dccd921fd3f2757e76b8ffbedffa33bbdb
7
- data.tar.gz: ab559729203ecc983ab1fb32e8efa584110e4d5ea30f0ddacead13613138453287370244a4845dc01df222c5e0685537468f16a90a3c271e28b69fb8b95e5f58
6
+ metadata.gz: d412050da5b223cd394378bd7fcca7daa4e9ebcf41f51264fc3f48930ff70ffe013731c2192ff4750f4236f30dd77432c9735dc1d19b03c3e220f9a5a611a3c5
7
+ data.tar.gz: 5bf17019a13fafcdf14124cc47587ca720dd908840f12fc90306899e2728b35d1a493ef683f2a5638105cd5d2c8bf840beaf552a7d75fa724f0ae14199ae5b42
@@ -653,21 +653,14 @@ static VALUE frb_tde_seek_te(VALUE self, VALUE rterm_enum) {
653
653
  return self;
654
654
  }
655
655
 
656
- /*
657
- * call-seq:
658
- * term_doc_enum.doc -> doc_id
659
- *
660
- * Returns the current document number pointed to by the +term_doc_enum+.
656
+ /* Returns the current document number pointed to by the +term_doc_enum+.
661
657
  */
662
- static VALUE frb_tde_doc(VALUE self) {
658
+ static VALUE frb_tde_doc_num(VALUE self) {
663
659
  FrtTermDocEnum *tde = (FrtTermDocEnum *)DATA_PTR(self);
664
660
  return INT2FIX(tde->doc_num(tde));
665
661
  }
666
662
 
667
663
  /*
668
- * call-seq:
669
- * term_doc_enum.doc -> doc_id
670
- *
671
664
  * Returns the frequency of the current document pointed to by the
672
665
  * +term_doc_enum+.
673
666
  */
@@ -676,11 +669,7 @@ static VALUE frb_tde_freq(VALUE self) {
676
669
  return INT2FIX(tde->freq(tde));
677
670
  }
678
671
 
679
- /*
680
- * call-seq:
681
- * term_doc_enum.doc -> doc_id
682
- *
683
- * Move forward to the next document in the enumeration. Returns +true+ if
672
+ /* Move forward to the next document in the enumeration. Returns +true+ if
684
673
  * there is another document or +false+ otherwise.
685
674
  */
686
675
  static VALUE frb_tde_next(VALUE self) {
@@ -688,11 +677,7 @@ static VALUE frb_tde_next(VALUE self) {
688
677
  return tde->next(tde) ? Qtrue : Qfalse;
689
678
  }
690
679
 
691
- /*
692
- * call-seq:
693
- * term_doc_enum.doc -> doc_id
694
- *
695
- * Move forward to the next document in the enumeration. Returns +true+ if
680
+ /* Move forward to the next document in the enumeration. Returns +true+ if
696
681
  * there is another document or +false+ otherwise.
697
682
  */
698
683
  static VALUE frb_tde_next_position(VALUE self) {
@@ -709,7 +694,7 @@ static VALUE frb_tde_next_position(VALUE self) {
709
694
 
710
695
  /*
711
696
  * call-seq:
712
- * term_doc_enum.each {|doc_id, freq| do_something() } -> doc_count
697
+ * term_doc_enum.each {|doc_num, freq| do_something() } -> doc_count
713
698
  *
714
699
  * Iterate through the documents and document frequencies in the
715
700
  * +term_doc_enum+.
@@ -821,8 +806,8 @@ static VALUE frb_tde_to_json(int argc, VALUE *argv, VALUE self) {
821
806
  * used within the each method. For example, to print the terms documents and
822
807
  * positions;
823
808
  *
824
- * tde.each do |doc_id, freq|
825
- * puts "term appeared #{freq} times in document #{doc_id}:"
809
+ * tde.each do |doc_num, freq|
810
+ * puts "term appeared #{freq} times in document #{doc_num}:"
826
811
  * positions = []
827
812
  * tde.each_position {|pos| positions << pos}
828
813
  * puts " #{positions.join(', ')}"
@@ -1796,20 +1781,20 @@ static VALUE frb_ir_init(VALUE self, VALUE rdir) {
1796
1781
 
1797
1782
  /*
1798
1783
  * call-seq:
1799
- * index_reader.set_norm(doc_id, field, val)
1784
+ * index_reader.set_norm(doc_num, field, val)
1800
1785
  *
1801
- * Expert: change the boost value for a +field+ in document at +doc_id+.
1786
+ * Expert: change the boost value for a +field+ in document at +doc_num+.
1802
1787
  * +val+ should be an integer in the range 0..255 which corresponds to an
1803
1788
  * encoded float value.
1804
1789
  */
1805
1790
  static VALUE
1806
- frb_ir_set_norm(VALUE self, VALUE rdoc_id, VALUE rfield, VALUE rval) {
1791
+ frb_ir_set_norm(VALUE self, VALUE rdoc_num, VALUE rfield, VALUE rval) {
1807
1792
  int ex_code = 0;
1808
1793
  const char *msg = NULL;
1809
1794
  FrtIndexReader *ir = (FrtIndexReader *)DATA_PTR(self);
1810
1795
 
1811
1796
  FRT_TRY
1812
- frt_ir_set_norm(ir, FIX2INT(rdoc_id), frb_field(rfield), (frt_uchar)NUM2CHR(rval));
1797
+ frt_ir_set_norm(ir, FIX2INT(rdoc_num), frb_field(rfield), (frt_uchar)NUM2CHR(rval));
1813
1798
  FRT_XCATCHALL
1814
1799
  ex_code = xcontext.excode;
1815
1800
  msg = xcontext.msg;
@@ -1837,7 +1822,7 @@ frb_ir_norms(VALUE self, VALUE rfield) {
1837
1822
  frt_uchar *norms;
1838
1823
  norms = frt_ir_get_norms(ir, frb_field(rfield));
1839
1824
  if (norms) {
1840
- return rb_str_new((char *)norms, ir->max_doc(ir));
1825
+ return rb_str_new((char *)norms, ir->max_doc_num(ir));
1841
1826
  } else {
1842
1827
  return Qnil;
1843
1828
  }
@@ -1855,11 +1840,11 @@ frb_ir_get_norms_into(VALUE self, VALUE rfield, VALUE rnorms, VALUE roffset) {
1855
1840
  int offset;
1856
1841
  offset = FIX2INT(roffset);
1857
1842
  Check_Type(rnorms, T_STRING);
1858
- if (RSTRING_LEN(rnorms) < offset + ir->max_doc(ir)) {
1843
+ if (RSTRING_LEN(rnorms) < offset + ir->max_doc_num(ir)) {
1859
1844
  rb_raise(rb_eArgError, "supplied a string of length:%ld to "
1860
1845
  "IndexReader#get_norms_into but needed a string of length "
1861
1846
  "offset:%d + maxdoc:%d",
1862
- RSTRING_LEN(rnorms), offset, ir->max_doc(ir));
1847
+ RSTRING_LEN(rnorms), offset, ir->max_doc_num(ir));
1863
1848
  }
1864
1849
 
1865
1850
  frt_ir_get_norms_into(ir, frb_field(rfield),
@@ -1927,22 +1912,21 @@ frb_ir_has_deletions(VALUE self) {
1927
1912
  return ir->has_deletions(ir) ? Qtrue : Qfalse;
1928
1913
  }
1929
1914
 
1930
- /*
1931
- * call-seq:
1932
- * index_reader.delete(doc_id) -> index_reader
1915
+ /* call-seq:
1916
+ * index_reader.delete(doc_num) -> index_reader
1933
1917
  *
1934
- * Delete document referenced internally by document id +doc_id+. The
1918
+ * Delete document referenced internally by document id +doc_num+. The
1935
1919
  * document_id is the number used to reference documents in the index and is
1936
1920
  * returned by search methods.
1937
1921
  */
1938
1922
  static VALUE
1939
- frb_ir_delete(VALUE self, VALUE rdoc_id) {
1923
+ frb_ir_delete(VALUE self, VALUE rdoc_num) {
1940
1924
  int ex_code = 0;
1941
1925
  const char *msg = NULL;
1942
1926
  FrtIndexReader *ir = (FrtIndexReader *)DATA_PTR(self);
1943
1927
 
1944
1928
  FRT_TRY
1945
- frt_ir_delete_doc(ir, FIX2INT(rdoc_id));
1929
+ frt_ir_delete_doc(ir, FIX2INT(rdoc_num));
1946
1930
  FRT_XCATCHALL
1947
1931
  ex_code = xcontext.excode;
1948
1932
  msg = xcontext.msg;
@@ -1956,31 +1940,30 @@ frb_ir_delete(VALUE self, VALUE rdoc_id) {
1956
1940
  return self;
1957
1941
  }
1958
1942
 
1959
- /*
1960
- * call-seq:
1961
- * index_reader.deleted?(doc_id) -> bool
1943
+ /* call-seq:
1944
+ * index_reader.deleted?(doc_num) -> bool
1962
1945
  *
1963
- * Returns true if the document at +doc_id+ has been deleted.
1946
+ * Returns true if the document at +doc_num+ has been deleted.
1964
1947
  */
1965
1948
  static VALUE
1966
- frb_ir_is_deleted(VALUE self, VALUE rdoc_id) {
1949
+ frb_ir_is_deleted(VALUE self, VALUE rdoc_num) {
1967
1950
  FrtIndexReader *ir = (FrtIndexReader *)DATA_PTR(self);
1968
- return ir->is_deleted(ir, FIX2INT(rdoc_id)) ? Qtrue : Qfalse;
1951
+ return ir->is_deleted(ir, FIX2INT(rdoc_num)) ? Qtrue : Qfalse;
1969
1952
  }
1970
1953
 
1971
- /*
1972
- * call-seq:
1954
+ /* call-seq:
1973
1955
  * index_reader.max_doc -> number
1974
1956
  *
1975
1957
  * Returns 1 + the maximum document id in the index. It is the
1976
1958
  * document_id that will be used by the next document added to the index. If
1977
1959
  * there are no deletions, this number also refers to the number of documents
1978
1960
  * in the index.
1961
+ * TODO: Rename to next_doc_num?
1979
1962
  */
1980
1963
  static VALUE
1981
- frb_ir_max_doc(VALUE self) {
1964
+ frb_ir_max_doc_num(VALUE self) {
1982
1965
  FrtIndexReader *ir = (FrtIndexReader *)DATA_PTR(self);
1983
- return INT2FIX(ir->max_doc(ir));
1966
+ return INT2FIX(ir->max_doc_num(ir));
1984
1967
  }
1985
1968
 
1986
1969
  /*
@@ -2042,8 +2025,8 @@ frb_get_doc_range(FrtIndexReader *ir, int pos, int len, int max) {
2042
2025
 
2043
2026
  /*
2044
2027
  * call-seq:
2045
- * index_reader.get_document(doc_id) -> LazyDoc
2046
- * index_reader[doc_id] -> LazyDoc
2028
+ * index_reader.get_document(doc_num) -> LazyDoc
2029
+ * index_reader[doc_num] -> LazyDoc
2047
2030
  *
2048
2031
  * Retrieve a document from the index. See LazyDoc for more details on the
2049
2032
  * document returned. Documents are referenced internally by document ids
@@ -2054,7 +2037,7 @@ frb_ir_get_doc(int argc, VALUE *argv, VALUE self) {
2054
2037
  FrtIndexReader *ir = (FrtIndexReader *)DATA_PTR(self);
2055
2038
  VALUE arg1, arg2;
2056
2039
  long pos, len;
2057
- long max = ir->max_doc(ir);
2040
+ long max = ir->max_doc_num(ir);
2058
2041
  rb_scan_args(argc, argv, "11", &arg1, &arg2);
2059
2042
  if (argc == 1) {
2060
2043
  if (FIXNUM_P(arg1)) {
@@ -2117,19 +2100,18 @@ frb_ir_is_latest(VALUE self) {
2117
2100
  return frt_ir_is_latest(ir) ? Qtrue : Qfalse;
2118
2101
  }
2119
2102
 
2120
- /*
2121
- * call-seq:
2122
- * index_reader.term_vector(doc_id, field) -> TermVector
2103
+ /* call-seq:
2104
+ * index_reader.term_vector(doc_num, field) -> TermVector
2123
2105
  *
2124
- * Return the TermVector for the field +field+ in the document at +doc_id+ in
2106
+ * Return the TermVector for the field +field+ in the document at +doc_num+ in
2125
2107
  * the index. Return nil if no such term_vector exists. See TermVector.
2126
2108
  */
2127
2109
  static VALUE
2128
- frb_ir_term_vector(VALUE self, VALUE rdoc_id, VALUE rfield) {
2110
+ frb_ir_term_vector(VALUE self, VALUE rdoc_num, VALUE rfield) {
2129
2111
  FrtIndexReader *ir = (FrtIndexReader *)DATA_PTR(self);
2130
2112
  FrtTermVector *tv;
2131
2113
  VALUE rtv;
2132
- tv = ir->term_vector(ir, FIX2INT(rdoc_id), frb_field(rfield));
2114
+ tv = ir->term_vector(ir, FIX2INT(rdoc_num), frb_field(rfield));
2133
2115
  if (tv) {
2134
2116
  rtv = frb_get_tv(tv);
2135
2117
  frt_tv_destroy(tv);
@@ -2144,18 +2126,17 @@ frb_add_each_tv(void *key, void *value, void *rtvs) {
2144
2126
  rb_hash_aset((VALUE)rtvs, ID2SYM((ID)key), frb_get_tv(value));
2145
2127
  }
2146
2128
 
2147
- /*
2148
- * call-seq:
2149
- * index_reader.term_vectors(doc_id) -> hash of TermVector
2129
+ /* call-seq:
2130
+ * index_reader.term_vectors(doc_num) -> hash of TermVector
2150
2131
  *
2151
- * Return the TermVectors for the document at +doc_id+ in the index. The
2132
+ * Return the TermVectors for the document at +doc_num+ in the index. The
2152
2133
  * value returned is a hash of the TermVectors for each field in the document
2153
2134
  * and they are referenced by field names (as symbols).
2154
2135
  */
2155
2136
  static VALUE
2156
- frb_ir_term_vectors(VALUE self, VALUE rdoc_id) {
2137
+ frb_ir_term_vectors(VALUE self, VALUE rdoc_num) {
2157
2138
  FrtIndexReader *ir = (FrtIndexReader *)DATA_PTR(self);
2158
- FrtHash *tvs = ir->term_vectors(ir, FIX2INT(rdoc_id));
2139
+ FrtHash *tvs = ir->term_vectors(ir, FIX2INT(rdoc_num));
2159
2140
  VALUE rtvs = rb_hash_new();
2160
2141
  frt_h_each(tvs, &frb_add_each_tv, (void *)rtvs);
2161
2142
  frt_h_destroy(tvs);
@@ -2359,11 +2340,11 @@ static VALUE frb_ir_each(VALUE self) {
2359
2340
  int ex_code = 0;
2360
2341
  const char *msg = NULL;
2361
2342
  long i;
2362
- long max_doc = ir->max_doc(ir);
2343
+ int max_doc_num = ir->max_doc_num(ir);
2363
2344
  VALUE rld;
2364
2345
 
2365
2346
  FRT_TRY
2366
- for (i = 0; i < max_doc; i++) {
2347
+ for (i = 0; i < max_doc_num; i++) {
2367
2348
  if (ir->is_deleted(ir, i)) continue;
2368
2349
  rld = frb_get_lazy_doc(ir->get_lazy_doc(ir, i));
2369
2350
  rb_yield(rld);
@@ -2504,8 +2485,8 @@ Init_TermEnum(void) {
2504
2485
  *
2505
2486
  * tde = index_reader.term_docs_for(:content, "fox")
2506
2487
  *
2507
- * tde.each do |doc_id, freq|
2508
- * puts "fox appeared #{freq} times in document #{doc_id}:"
2488
+ * tde.each do |doc_num, freq|
2489
+ * puts "fox appeared #{freq} times in document #{doc_num}:"
2509
2490
  * positions = []
2510
2491
  * tde.each_position {|pos| positions << pos}
2511
2492
  * puts " #{positions.join(', ')}"
@@ -2530,7 +2511,7 @@ static void Init_TermDocEnum(void) {
2530
2511
  rb_define_alloc_func(cTermDocEnum, frb_tde_alloc);
2531
2512
  rb_define_method(cTermDocEnum, "seek", frb_tde_seek, 2);
2532
2513
  rb_define_method(cTermDocEnum, "seek_term_enum", frb_tde_seek_te, 1);
2533
- rb_define_method(cTermDocEnum, "doc", frb_tde_doc, 0);
2514
+ rb_define_method(cTermDocEnum, "doc_num", frb_tde_doc_num, 0);
2534
2515
  rb_define_method(cTermDocEnum, "freq", frb_tde_freq, 0);
2535
2516
  rb_define_method(cTermDocEnum, "next?", frb_tde_next, 0);
2536
2517
  rb_define_method(cTermDocEnum, "next_position", frb_tde_next_position, 0);
@@ -2599,7 +2580,7 @@ static void Init_TVTerm(void) {
2599
2580
  *
2600
2581
  * == Example
2601
2582
  *
2602
- * tv = index_reader.term_vector(doc_id, :content)
2583
+ * tv = index_reader.term_vector(doc_num, :content)
2603
2584
  * tv_term = tv.find {|tvt| tvt.term == "fox"}
2604
2585
  *
2605
2586
  * # get the term frequency
@@ -2825,7 +2806,7 @@ void Init_IndexReader(void) {
2825
2806
  rb_define_method(cIndexReader, "has_deletions?", frb_ir_has_deletions, 0);
2826
2807
  rb_define_method(cIndexReader, "delete", frb_ir_delete, 1);
2827
2808
  rb_define_method(cIndexReader, "deleted?", frb_ir_is_deleted, 1);
2828
- rb_define_method(cIndexReader, "max_doc", frb_ir_max_doc, 0);
2809
+ rb_define_method(cIndexReader, "max_doc_num", frb_ir_max_doc_num, 0);
2829
2810
  rb_define_method(cIndexReader, "num_docs", frb_ir_num_docs, 0);
2830
2811
  rb_define_method(cIndexReader, "undelete_all", frb_ir_undelete_all, 0);
2831
2812
  rb_define_method(cIndexReader, "latest?", frb_ir_is_latest, 0);
@@ -93,7 +93,7 @@ static VALUE sym_integer;
93
93
  static VALUE sym_float;
94
94
  static VALUE sym_string;
95
95
  static VALUE sym_auto;
96
- static VALUE sym_doc_id;
96
+ static VALUE sym_doc_num;
97
97
  static VALUE sym_score;
98
98
  static VALUE sym_byte;
99
99
 
@@ -146,7 +146,7 @@ extern VALUE frb_get_lazy_doc(FrtLazyDoc *lazy_doc);
146
146
  ****************************************************************************/
147
147
 
148
148
  static VALUE frb_get_hit(FrtHit *hit) {
149
- return rb_struct_new(cHit, INT2FIX(hit->doc), rb_float_new((double)hit->score), NULL);
149
+ return rb_struct_new(cHit, INT2FIX(hit->doc_num), rb_float_new((double)hit->score), NULL);
150
150
  }
151
151
 
152
152
  /****************************************************************************
@@ -198,10 +198,10 @@ static VALUE frb_td_to_s(int argc, VALUE *argv, VALUE self) {
198
198
 
199
199
  for (i = 0; i < len; i++) {
200
200
  VALUE rhit = RARRAY_PTR(rhits)[i];
201
- int doc_id = FIX2INT(rb_funcall(rhit, id_doc, 0));
201
+ int doc_num = FIX2INT(rb_funcall(rhit, id_doc, 0));
202
202
  const char *value = "";
203
203
  size_t value_len = 0;
204
- FrtLazyDoc *lzd = sea->get_lazy_doc(sea, doc_id);
204
+ FrtLazyDoc *lzd = sea->get_lazy_doc(sea, doc_num);
205
205
  FrtLazyDocField *lzdf = frt_lazy_doc_get(lzd, field);
206
206
  if (NULL != lzdf) {
207
207
  value = frt_lazy_df_get_data(lzdf, 0);
@@ -212,7 +212,7 @@ static VALUE frb_td_to_s(int argc, VALUE *argv, VALUE self) {
212
212
  FRT_REALLOC_N(str, char, capa);
213
213
  }
214
214
 
215
- sprintf(str + p, "\t%d \"%s\": %0.5f\n", doc_id, value,
215
+ sprintf(str + p, "\t%d \"%s\": %0.5f\n", doc_num, value,
216
216
  NUM2DBL(rb_funcall(rhit, id_score, 0)));
217
217
  p += strlen(str + p);
218
218
  frt_lazy_doc_close(lzd);
@@ -279,7 +279,7 @@ static VALUE frb_td_to_json(VALUE self) {
279
279
  FrtLazyDoc *lzd;
280
280
  FrtSearcher *sea = (FrtSearcher *)DATA_PTR(rb_funcall(self, id_searcher, 0));
281
281
  const int num_hits = RARRAY_LEN(rhits);
282
- int doc_id;
282
+ int doc_num;
283
283
  int len = 32768;
284
284
  char *str = FRT_ALLOC_N(char, len);
285
285
  char *s = str;
@@ -290,8 +290,8 @@ static VALUE frb_td_to_json(VALUE self) {
290
290
  if (i) *(s++) = ',';
291
291
  *(s++) = '{';
292
292
  rhit = RARRAY_PTR(rhits)[i];
293
- doc_id = FIX2INT(rb_funcall(rhit, id_doc, 0));
294
- lzd = sea->get_lazy_doc(sea, doc_id);
293
+ doc_num = FIX2INT(rb_funcall(rhit, id_doc, 0));
294
+ lzd = sea->get_lazy_doc(sea, doc_num);
295
295
  s = frb_lzd_load_to_json(lzd, &str, s, &len);
296
296
  frt_lazy_doc_close(lzd);
297
297
  *(s++) = '}';
@@ -2692,7 +2692,7 @@ static int get_sort_type(VALUE rtype) {
2692
2692
  return FRT_SORT_TYPE_STRING;
2693
2693
  } else if (rtype == sym_score) {
2694
2694
  return FRT_SORT_TYPE_SCORE;
2695
- } else if (rtype == sym_doc_id) {
2695
+ } else if (rtype == sym_doc_num) {
2696
2696
  return FRT_SORT_TYPE_DOC;
2697
2697
  } else if (rtype == sym_float) {
2698
2698
  return FRT_SORT_TYPE_FLOAT;
@@ -2700,7 +2700,7 @@ static int get_sort_type(VALUE rtype) {
2700
2700
  return FRT_SORT_TYPE_AUTO;
2701
2701
  } else {
2702
2702
  rb_raise(rb_eArgError, ":%s is an unknown sort-type. Please choose "
2703
- "from [:integer, :float, :string, :auto, :score, :doc_id]",
2703
+ "from [:integer, :float, :string, :auto, :score, :doc_num]",
2704
2704
  rb_id2name(SYM2ID(rtype)));
2705
2705
  }
2706
2706
  return FRT_SORT_TYPE_DOC;
@@ -2717,7 +2717,7 @@ static int get_sort_type(VALUE rtype) {
2717
2717
  *
2718
2718
  * :type:: Default: +:auto+. Specifies how a field should be sorted.
2719
2719
  * Choose from one of; +:auto+, +:integer+, +:float+,
2720
- * +:string+, +:byte+, +:doc_id+ or +:score+. +:auto+ will
2720
+ * +:string+, +:byte+, +:doc_num+ or +:score+. +:auto+ will
2721
2721
  * check the datatype of the field by trying to parse it into
2722
2722
  * either a number or a float before settling on a string
2723
2723
  * sort. String sort is locale dependent and works for
@@ -2786,7 +2786,7 @@ static VALUE frb_sf_get_name(VALUE self) {
2786
2786
  * sort_field.type -> symbol
2787
2787
  *
2788
2788
  * Return the type of sort. Should be one of; +:auto+, +:integer+, +:float+,
2789
- * +:string+, +:byte+, +:doc_id+ or +:score+.
2789
+ * +:string+, +:byte+, +:doc_num+ or +:score+.
2790
2790
  */
2791
2791
  static VALUE frb_sf_get_type(VALUE self) {
2792
2792
  GET_SF();
@@ -2796,7 +2796,7 @@ static VALUE frb_sf_get_type(VALUE self) {
2796
2796
  case FRT_SORT_TYPE_FLOAT: return sym_float;
2797
2797
  case FRT_SORT_TYPE_STRING: return sym_string;
2798
2798
  case FRT_SORT_TYPE_AUTO: return sym_auto;
2799
- case FRT_SORT_TYPE_DOC: return sym_doc_id;
2799
+ case FRT_SORT_TYPE_DOC: return sym_doc_num;
2800
2800
  case FRT_SORT_TYPE_SCORE: return sym_score;
2801
2801
  }
2802
2802
  return Qnil;
@@ -2900,7 +2900,7 @@ static void frb_parse_sort_str(FrtSort *sort, char *xsort_str) {
2900
2900
 
2901
2901
  if (strcmp("SCORE", s) == 0) {
2902
2902
  sf = frt_sort_field_score_new(reverse);
2903
- } else if (strcmp("DOC_ID", s) == 0) {
2903
+ } else if (strcmp("DOC_NUM", s) == 0) {
2904
2904
  sf = frt_sort_field_doc_new(reverse);
2905
2905
  } else {
2906
2906
  sf = frt_sort_field_auto_new(rb_intern(s), reverse);
@@ -2939,7 +2939,7 @@ static void frb_sort_add(FrtSort *sort, VALUE rsf, bool reverse) {
2939
2939
  #define GET_SORT() FrtSort *sort = (FrtSort *)DATA_PTR(self)
2940
2940
  /*
2941
2941
  * call-seq:
2942
- * Sort.new(sort_fields = [SortField::SCORE, SortField::DOC_ID], reverse = false) -> Sort
2942
+ * Sort.new(sort_fields = [SortField::SCORE, SortField::DOC_NUM], reverse = false) -> Sort
2943
2943
  *
2944
2944
  * Create a new Sort object. If +reverse+ is true, all sort_fields will be
2945
2945
  * reversed so if any of them are already reversed the will be turned back
@@ -3061,21 +3061,21 @@ static VALUE frb_sea_doc_freq(VALUE self, VALUE rfield, VALUE rterm) {
3061
3061
 
3062
3062
  /*
3063
3063
  * call-seq:
3064
- * searcher.get_document(doc_id) -> LazyDoc
3065
- * searcher[doc_id] -> LazyDoc
3064
+ * searcher.get_document(doc_num) -> LazyDoc
3065
+ * searcher[doc_num] -> LazyDoc
3066
3066
  *
3067
3067
  * Retrieve a document from the index. See LazyDoc for more details on the
3068
3068
  * document returned. Documents are referenced internally by document ids
3069
3069
  * which are returned by the Searchers search methods.
3070
3070
  */
3071
- static VALUE frb_sea_doc(VALUE self, VALUE rdoc_id) {
3071
+ static VALUE frb_sea_doc(VALUE self, VALUE rdoc_num) {
3072
3072
  int ex_code = 0;
3073
3073
  const char *msg = NULL;
3074
3074
  GET_SEA();
3075
3075
  VALUE ld = Qnil;
3076
3076
 
3077
3077
  FRT_TRY
3078
- ld = frb_get_lazy_doc(sea->get_lazy_doc(sea, FIX2INT(rdoc_id)));
3078
+ ld = frb_get_lazy_doc(sea->get_lazy_doc(sea, FIX2INT(rdoc_num)));
3079
3079
  FRT_XCATCHALL
3080
3080
  ex_code = xcontext.excode;
3081
3081
  msg = xcontext.msg;
@@ -3098,13 +3098,13 @@ static VALUE frb_sea_doc(VALUE self, VALUE rdoc_id) {
3098
3098
  * there are no deletions, this number also refers to the number of documents
3099
3099
  * in the index.
3100
3100
  */
3101
- static VALUE frb_sea_max_doc(VALUE self) {
3101
+ static VALUE frb_sea_max_doc_num(VALUE self) {
3102
3102
  GET_SEA();
3103
- return INT2FIX(sea->max_doc(sea));
3103
+ return INT2FIX(sea->max_doc_num(sea));
3104
3104
  }
3105
3105
 
3106
- static float call_filter_proc(int doc_id, float score, FrtSearcher *sea, void *arg) {
3107
- VALUE val = rb_funcall((VALUE)arg, id_call, 3, INT2FIX(doc_id), rb_float_new((double)score), sea->rsea);
3106
+ static float call_filter_proc(int doc_num, float score, FrtSearcher *sea, void *arg) {
3107
+ VALUE val = rb_funcall((VALUE)arg, id_call, 3, INT2FIX(doc_num), rb_float_new((double)score), sea->rsea);
3108
3108
  switch (TYPE(val)) {
3109
3109
  case T_NIL:
3110
3110
  case T_FALSE:
@@ -3261,7 +3261,7 @@ static FrtTopDocs *frb_sea_search_internal(FrtQuery *query, VALUE roptions, FrtS
3261
3261
  * to specify a fields type to sort it correctly. For more
3262
3262
  * on this, see the documentation for SortField
3263
3263
  * :filter:: a Filter object to filter the search results with
3264
- * :filter_proc:: a filter Proc is a Proc which takes the doc_id, the score
3264
+ * :filter_proc:: a filter Proc is a Proc which takes the doc_num, the score
3265
3265
  * and the Searcher object as its parameters and returns
3266
3266
  * either a Boolean value specifying whether the result
3267
3267
  * should be included in the result set, or a Float between 0
@@ -3280,13 +3280,13 @@ static VALUE frb_sea_search(int argc, VALUE *argv, VALUE self) {
3280
3280
 
3281
3281
  /*
3282
3282
  * call-seq:
3283
- * searcher.search_each(query, options = {}) {|doc_id, score| do_something}
3283
+ * searcher.search_each(query, options = {}) {|doc_num, score| do_something}
3284
3284
  * -> total_hits
3285
3285
  *
3286
3286
  * Run a query through the Searcher on the index. A TopDocs object is
3287
3287
  * returned with the relevant results. The +query+ is a Query object. The
3288
3288
  * Searcher#search_each method yields the internal document id (used to
3289
- * reference documents in the Searcher object like this; +searcher[doc_id]+)
3289
+ * reference documents in the Searcher object like this; +searcher[doc_num]+)
3290
3290
  * and the search score for that document. It is possible for the score to be
3291
3291
  * greater than 1.0 for some queries and taking boosts into account. This
3292
3292
  * method will also normalize scores to the range 0.0..1.0 when the max-score
@@ -3314,7 +3314,7 @@ static VALUE frb_sea_search(int argc, VALUE *argv, VALUE self) {
3314
3314
  * to specify a fields type to sort it correctly. For more
3315
3315
  * on this, see the documentation for SortField
3316
3316
  * :filter:: a Filter object to filter the search results with
3317
- * :filter_proc:: a filter Proc is a Proc which takes the doc_id, the score
3317
+ * :filter_proc:: a filter Proc is a Proc which takes the doc_num, the score
3318
3318
  * and the Searcher object as its parameters and returns a
3319
3319
  * Boolean value specifying whether the result should be
3320
3320
  * included in the result set.
@@ -3335,7 +3335,7 @@ static VALUE frb_sea_search_each(int argc, VALUE *argv, VALUE self) {
3335
3335
 
3336
3336
  /* yield normalized scores */
3337
3337
  for (i = 0; i < td->size; i++) {
3338
- rb_yield_values(2, INT2FIX(td->hits[i]->doc), rb_float_new((double)(td->hits[i]->score/max_score)));
3338
+ rb_yield_values(2, INT2FIX(td->hits[i]->doc_num), rb_float_new((double)(td->hits[i]->score/max_score)));
3339
3339
  }
3340
3340
 
3341
3341
  rtotal_hits = INT2FIX(td->total_hits);
@@ -3430,14 +3430,14 @@ static VALUE frb_sea_scan(int argc, VALUE *argv, VALUE self) {
3430
3430
 
3431
3431
  /*
3432
3432
  * call-seq:
3433
- * searcher.explain(query, doc_id) -> Explanation
3433
+ * searcher.explain(query, doc_num) -> Explanation
3434
3434
  *
3435
3435
  * Create an explanation object to explain the score returned for a
3436
- * particular document at +doc_id+ in the index for the query +query+.
3436
+ * particular document at +doc_num+ in the index for the query +query+.
3437
3437
  *
3438
3438
  * Usually used like this;
3439
3439
  *
3440
- * puts searcher.explain(query, doc_id).to_s
3440
+ * puts searcher.explain(query, doc_num).to_s
3441
3441
  */
3442
3442
 
3443
3443
  static size_t frb_explanation_size(const void *p) {
@@ -3468,17 +3468,17 @@ static VALUE frb_expl_alloc(VALUE rclass) {
3468
3468
  return TypedData_Wrap_Struct(rclass, &frb_explanation_t, e);
3469
3469
  }
3470
3470
 
3471
- static VALUE frb_sea_explain(VALUE self, VALUE rquery, VALUE rdoc_id) {
3471
+ static VALUE frb_sea_explain(VALUE self, VALUE rquery, VALUE rdoc_num) {
3472
3472
  GET_SEA();
3473
3473
  FrtQuery *query = DATA_PTR(rquery);
3474
3474
  FrtExplanation *expl;
3475
- expl = sea->explain(sea, query, FIX2INT(rdoc_id));
3475
+ expl = sea->explain(sea, query, FIX2INT(rdoc_num));
3476
3476
  return TypedData_Wrap_Struct(cExplanation, &frb_explanation_t, expl);
3477
3477
  }
3478
3478
 
3479
3479
  /*
3480
3480
  * call-seq:
3481
- * searcher.highlight(query, doc_id, field, options = {}) -> Array
3481
+ * searcher.highlight(query, doc_num, field, options = {}) -> Array
3482
3482
  *
3483
3483
  * Returns an array of strings with the matches highlighted.
3484
3484
  *
@@ -3500,7 +3500,7 @@ static VALUE frb_sea_explain(VALUE self, VALUE rquery, VALUE rdoc_id) {
3500
3500
  */
3501
3501
  static VALUE frb_sea_highlight(int argc, VALUE *argv, VALUE self) {
3502
3502
  GET_SEA();
3503
- VALUE rquery, rdoc_id, rfield, roptions, v;
3503
+ VALUE rquery, rdoc_num, rfield, roptions, v;
3504
3504
  int excerpt_length = 150;
3505
3505
  int num_excerpts = 2;
3506
3506
  const char *pre_tag = "<b>";
@@ -3508,7 +3508,7 @@ static VALUE frb_sea_highlight(int argc, VALUE *argv, VALUE self) {
3508
3508
  const char *ellipsis = "...";
3509
3509
  char **excerpts;
3510
3510
 
3511
- rb_scan_args(argc, argv, "31", &rquery, &rdoc_id, &rfield, &roptions);
3511
+ rb_scan_args(argc, argv, "31", &rquery, &rdoc_num, &rfield, &roptions);
3512
3512
  FrtQuery *query = DATA_PTR(rquery);
3513
3513
  if (argc > 3) {
3514
3514
  if (TYPE(roptions) != T_HASH) {
@@ -3538,7 +3538,7 @@ static VALUE frb_sea_highlight(int argc, VALUE *argv, VALUE self) {
3538
3538
 
3539
3539
  if ((excerpts = frt_searcher_highlight(sea,
3540
3540
  query,
3541
- FIX2INT(rdoc_id),
3541
+ FIX2INT(rdoc_num),
3542
3542
  frb_field(rfield),
3543
3543
  excerpt_length,
3544
3544
  num_excerpts,
@@ -3869,7 +3869,7 @@ static void Init_TopDocs(void) {
3869
3869
  *
3870
3870
  * == Example
3871
3871
  *
3872
- * puts searcher.explain(query, doc_id).to_s
3872
+ * puts searcher.explain(query, doc_num).to_s
3873
3873
  */
3874
3874
  static void Init_Explanation(void) {
3875
3875
  cExplanation = rb_define_class_under(mSearch, "Explanation", rb_cObject);
@@ -4748,7 +4748,7 @@ static void Init_Filter(void) {
4748
4748
  * * :float
4749
4749
  * * :string
4750
4750
  * * :byte
4751
- * * :doc_id
4751
+ * * :doc_num
4752
4752
  * * :score
4753
4753
  *
4754
4754
  * The type of the SortField is set by passing it as a parameter to the
@@ -4784,7 +4784,7 @@ static void Init_SortField(void) {
4784
4784
  sym_float = ID2SYM(rb_intern("float"));
4785
4785
  sym_string = ID2SYM(rb_intern("string"));
4786
4786
  sym_auto = ID2SYM(rb_intern("auto"));
4787
- sym_doc_id = ID2SYM(rb_intern("doc_id"));
4787
+ sym_doc_num = ID2SYM(rb_intern("doc_num"));
4788
4788
  sym_score = ID2SYM(rb_intern("score"));
4789
4789
  sym_byte = ID2SYM(rb_intern("byte"));
4790
4790
 
@@ -4804,12 +4804,12 @@ static void Init_SortField(void) {
4804
4804
  rb_define_const(cSortField, "SCORE_REV", TypedData_Wrap_Struct(cSortField, &frb_sort_field_t, FRT_SORT_FIELD_SCORE_REV));
4805
4805
  FRT_SORT_FIELD_SCORE_REV->rfield = rb_const_get(cSortField, rb_intern("SCORE_REV"));
4806
4806
 
4807
- rb_define_const(cSortField, "DOC_ID", TypedData_Wrap_Struct(cSortField, &frb_sort_field_t, FRT_SORT_FIELD_DOC));
4808
- oSORT_FIELD_DOC = rb_const_get(cSortField, rb_intern("DOC_ID"));
4807
+ rb_define_const(cSortField, "DOC_NUM", TypedData_Wrap_Struct(cSortField, &frb_sort_field_t, FRT_SORT_FIELD_DOC));
4808
+ oSORT_FIELD_DOC = rb_const_get(cSortField, rb_intern("DOC_NUM"));
4809
4809
  FRT_SORT_FIELD_DOC->rfield = oSORT_FIELD_DOC;
4810
4810
 
4811
- rb_define_const(cSortField, "DOC_ID_REV", TypedData_Wrap_Struct(cSortField, &frb_sort_field_t, FRT_SORT_FIELD_DOC_REV));
4812
- FRT_SORT_FIELD_DOC_REV->rfield = rb_const_get(cSortField, rb_intern("DOC_ID_REV"));
4811
+ rb_define_const(cSortField, "DOC_NUM_REV", TypedData_Wrap_Struct(cSortField, &frb_sort_field_t, FRT_SORT_FIELD_DOC_REV));
4812
+ FRT_SORT_FIELD_DOC_REV->rfield = rb_const_get(cSortField, rb_intern("DOC_NUM_REV"));
4813
4813
  }
4814
4814
 
4815
4815
  /*
@@ -4871,8 +4871,8 @@ static void Init_Sort(void) {
4871
4871
  *
4872
4872
  * searcher.search_each(TermQuery.new(:content, "ferret")
4873
4873
  * :filter => RangeFilter.new(:date, :< => "2006"),
4874
- * :sort => "date DESC, title") do |doc_id, score|
4875
- * puts "#{searcher[doc_id][title] scored #{score}"
4874
+ * :sort => "date DESC, title") do |doc_num, score|
4875
+ * puts "#{searcher[doc_num][title] scored #{score}"
4876
4876
  * end
4877
4877
  */
4878
4878
  static void Init_Searcher(void) {
@@ -4902,7 +4902,7 @@ static void Init_Searcher(void) {
4902
4902
  rb_define_method(cSearcher, "doc_freq", frb_sea_doc_freq, 2);
4903
4903
  rb_define_method(cSearcher, "get_document", frb_sea_doc, 1);
4904
4904
  rb_define_method(cSearcher, "[]", frb_sea_doc, 1);
4905
- rb_define_method(cSearcher, "max_doc", frb_sea_max_doc, 0);
4905
+ rb_define_method(cSearcher, "max_doc_num", frb_sea_max_doc_num, 0);
4906
4906
  rb_define_method(cSearcher, "search", frb_sea_search, -1);
4907
4907
  rb_define_method(cSearcher, "search_each", frb_sea_search_each, -1);
4908
4908
  rb_define_method(cSearcher, "scan", frb_sea_scan, -1);
@@ -5,14 +5,11 @@
5
5
  #include "frt_doc_field.h"
6
6
  #include <ruby/encoding.h>
7
7
 
8
- /****************************************************************************
9
- *
10
- * FrtDocument
11
- *
12
- ****************************************************************************/
13
-
14
8
  #define FRT_DOC_INIT_CAPA 8
9
+
15
10
  typedef struct FrtDocument {
11
+ // frt_uchar ulid[16];
12
+ // char *ulid_c;
16
13
  FrtHash *field_dict;
17
14
  int field_count;
18
15
  int capa;
@@ -60,7 +60,7 @@ FrtFieldIndex *frt_field_index_get(FrtIndexReader *ir, ID field, const FrtFieldI
60
60
  * just use the field_infos field symbol */
61
61
  self->field = fi->name;
62
62
 
63
- length = ir->max_doc(ir);
63
+ length = ir->max_doc_num(ir);
64
64
  if (length > 0) {
65
65
  FRT_TRY
66
66
  {