isomorfeus-ferret 0.17.3 → 0.17.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/ext/isomorfeus_ferret_ext/frb_index.c +48 -67
- data/ext/isomorfeus_ferret_ext/frb_search.c +47 -47
- data/ext/isomorfeus_ferret_ext/frt_document.h +3 -6
- data/ext/isomorfeus_ferret_ext/frt_field_index.c +1 -1
- data/ext/isomorfeus_ferret_ext/frt_filter.c +2 -2
- data/ext/isomorfeus_ferret_ext/frt_ind.c +2 -2
- data/ext/isomorfeus_ferret_ext/frt_ind.h +1 -1
- data/ext/isomorfeus_ferret_ext/frt_index.c +46 -62
- data/ext/isomorfeus_ferret_ext/frt_index.h +3 -3
- data/ext/isomorfeus_ferret_ext/frt_q_boolean.c +48 -48
- data/ext/isomorfeus_ferret_ext/frt_q_const_score.c +2 -2
- data/ext/isomorfeus_ferret_ext/frt_q_filtered_query.c +4 -4
- data/ext/isomorfeus_ferret_ext/frt_q_match_all.c +10 -10
- data/ext/isomorfeus_ferret_ext/frt_q_multi_term.c +26 -26
- data/ext/isomorfeus_ferret_ext/frt_q_phrase.c +12 -12
- data/ext/isomorfeus_ferret_ext/frt_q_range.c +2 -2
- data/ext/isomorfeus_ferret_ext/frt_q_span.c +144 -145
- data/ext/isomorfeus_ferret_ext/frt_q_term.c +9 -9
- data/ext/isomorfeus_ferret_ext/frt_search.c +31 -31
- data/ext/isomorfeus_ferret_ext/frt_search.h +6 -6
- data/ext/isomorfeus_ferret_ext/frt_similarity.c +1 -1
- data/ext/isomorfeus_ferret_ext/frt_sort.c +20 -20
- data/ext/isomorfeus_ferret_ext/test.c +1 -1
- data/ext/isomorfeus_ferret_ext/test_file_deleter.c +1 -2
- data/ext/isomorfeus_ferret_ext/test_filter.c +5 -6
- data/ext/isomorfeus_ferret_ext/test_index.c +30 -32
- data/ext/isomorfeus_ferret_ext/test_search.c +7 -7
- data/ext/isomorfeus_ferret_ext/test_sort.c +3 -3
- data/ext/isomorfeus_ferret_ext/test_threading.c +1 -1
- data/lib/isomorfeus/ferret/index/index.rb +7 -7
- data/lib/isomorfeus/ferret/version.rb +1 -1
- metadata +12 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: dd91e44772caeec702203193eba91cf212c4363375807333fa1baccfc49e9d4f
|
4
|
+
data.tar.gz: 1f97573ef9b7c5da1951f00b1c1152fdf197a8c68293e775ab4ab101a7f23e44
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d6612d311dd577feb8b1444a4c215ff9024b60251c2ff42e2c04a56a2ab7d2ea969f98a2ac590bfb92b936dc68917f4d50d4a6ef4c358415b9013dfe5ef6c2c7
|
7
|
+
data.tar.gz: 8cb37a11451cde28ec0d7f678f86f73be522331663034e5331470c181fc618b26ddac98a72ea79a619dde60183de54cc257f44bef410c16a03caf73859ea5ced
|
@@ -653,21 +653,14 @@ static VALUE frb_tde_seek_te(VALUE self, VALUE rterm_enum) {
|
|
653
653
|
return self;
|
654
654
|
}
|
655
655
|
|
656
|
-
/*
|
657
|
-
* call-seq:
|
658
|
-
* term_doc_enum.doc -> doc_id
|
659
|
-
*
|
660
|
-
* Returns the current document number pointed to by the +term_doc_enum+.
|
656
|
+
/* Returns the current document number pointed to by the +term_doc_enum+.
|
661
657
|
*/
|
662
|
-
static VALUE
|
658
|
+
static VALUE frb_tde_doc_num(VALUE self) {
|
663
659
|
FrtTermDocEnum *tde = (FrtTermDocEnum *)DATA_PTR(self);
|
664
660
|
return INT2FIX(tde->doc_num(tde));
|
665
661
|
}
|
666
662
|
|
667
663
|
/*
|
668
|
-
* call-seq:
|
669
|
-
* term_doc_enum.doc -> doc_id
|
670
|
-
*
|
671
664
|
* Returns the frequency of the current document pointed to by the
|
672
665
|
* +term_doc_enum+.
|
673
666
|
*/
|
@@ -676,11 +669,7 @@ static VALUE frb_tde_freq(VALUE self) {
|
|
676
669
|
return INT2FIX(tde->freq(tde));
|
677
670
|
}
|
678
671
|
|
679
|
-
/*
|
680
|
-
* call-seq:
|
681
|
-
* term_doc_enum.doc -> doc_id
|
682
|
-
*
|
683
|
-
* Move forward to the next document in the enumeration. Returns +true+ if
|
672
|
+
/* Move forward to the next document in the enumeration. Returns +true+ if
|
684
673
|
* there is another document or +false+ otherwise.
|
685
674
|
*/
|
686
675
|
static VALUE frb_tde_next(VALUE self) {
|
@@ -688,11 +677,7 @@ static VALUE frb_tde_next(VALUE self) {
|
|
688
677
|
return tde->next(tde) ? Qtrue : Qfalse;
|
689
678
|
}
|
690
679
|
|
691
|
-
/*
|
692
|
-
* call-seq:
|
693
|
-
* term_doc_enum.doc -> doc_id
|
694
|
-
*
|
695
|
-
* Move forward to the next document in the enumeration. Returns +true+ if
|
680
|
+
/* Move forward to the next document in the enumeration. Returns +true+ if
|
696
681
|
* there is another document or +false+ otherwise.
|
697
682
|
*/
|
698
683
|
static VALUE frb_tde_next_position(VALUE self) {
|
@@ -709,7 +694,7 @@ static VALUE frb_tde_next_position(VALUE self) {
|
|
709
694
|
|
710
695
|
/*
|
711
696
|
* call-seq:
|
712
|
-
* term_doc_enum.each {|
|
697
|
+
* term_doc_enum.each {|doc_num, freq| do_something() } -> doc_count
|
713
698
|
*
|
714
699
|
* Iterate through the documents and document frequencies in the
|
715
700
|
* +term_doc_enum+.
|
@@ -821,8 +806,8 @@ static VALUE frb_tde_to_json(int argc, VALUE *argv, VALUE self) {
|
|
821
806
|
* used within the each method. For example, to print the terms documents and
|
822
807
|
* positions;
|
823
808
|
*
|
824
|
-
* tde.each do |
|
825
|
-
* puts "term appeared #{freq} times in document #{
|
809
|
+
* tde.each do |doc_num, freq|
|
810
|
+
* puts "term appeared #{freq} times in document #{doc_num}:"
|
826
811
|
* positions = []
|
827
812
|
* tde.each_position {|pos| positions << pos}
|
828
813
|
* puts " #{positions.join(', ')}"
|
@@ -1796,20 +1781,20 @@ static VALUE frb_ir_init(VALUE self, VALUE rdir) {
|
|
1796
1781
|
|
1797
1782
|
/*
|
1798
1783
|
* call-seq:
|
1799
|
-
* index_reader.set_norm(
|
1784
|
+
* index_reader.set_norm(doc_num, field, val)
|
1800
1785
|
*
|
1801
|
-
* Expert: change the boost value for a +field+ in document at +
|
1786
|
+
* Expert: change the boost value for a +field+ in document at +doc_num+.
|
1802
1787
|
* +val+ should be an integer in the range 0..255 which corresponds to an
|
1803
1788
|
* encoded float value.
|
1804
1789
|
*/
|
1805
1790
|
static VALUE
|
1806
|
-
frb_ir_set_norm(VALUE self, VALUE
|
1791
|
+
frb_ir_set_norm(VALUE self, VALUE rdoc_num, VALUE rfield, VALUE rval) {
|
1807
1792
|
int ex_code = 0;
|
1808
1793
|
const char *msg = NULL;
|
1809
1794
|
FrtIndexReader *ir = (FrtIndexReader *)DATA_PTR(self);
|
1810
1795
|
|
1811
1796
|
FRT_TRY
|
1812
|
-
frt_ir_set_norm(ir, FIX2INT(
|
1797
|
+
frt_ir_set_norm(ir, FIX2INT(rdoc_num), frb_field(rfield), (frt_uchar)NUM2CHR(rval));
|
1813
1798
|
FRT_XCATCHALL
|
1814
1799
|
ex_code = xcontext.excode;
|
1815
1800
|
msg = xcontext.msg;
|
@@ -1837,7 +1822,7 @@ frb_ir_norms(VALUE self, VALUE rfield) {
|
|
1837
1822
|
frt_uchar *norms;
|
1838
1823
|
norms = frt_ir_get_norms(ir, frb_field(rfield));
|
1839
1824
|
if (norms) {
|
1840
|
-
return rb_str_new((char *)norms, ir->
|
1825
|
+
return rb_str_new((char *)norms, ir->max_doc_num(ir));
|
1841
1826
|
} else {
|
1842
1827
|
return Qnil;
|
1843
1828
|
}
|
@@ -1855,11 +1840,11 @@ frb_ir_get_norms_into(VALUE self, VALUE rfield, VALUE rnorms, VALUE roffset) {
|
|
1855
1840
|
int offset;
|
1856
1841
|
offset = FIX2INT(roffset);
|
1857
1842
|
Check_Type(rnorms, T_STRING);
|
1858
|
-
if (RSTRING_LEN(rnorms) < offset + ir->
|
1843
|
+
if (RSTRING_LEN(rnorms) < offset + ir->max_doc_num(ir)) {
|
1859
1844
|
rb_raise(rb_eArgError, "supplied a string of length:%ld to "
|
1860
1845
|
"IndexReader#get_norms_into but needed a string of length "
|
1861
1846
|
"offset:%d + maxdoc:%d",
|
1862
|
-
RSTRING_LEN(rnorms), offset, ir->
|
1847
|
+
RSTRING_LEN(rnorms), offset, ir->max_doc_num(ir));
|
1863
1848
|
}
|
1864
1849
|
|
1865
1850
|
frt_ir_get_norms_into(ir, frb_field(rfield),
|
@@ -1927,22 +1912,21 @@ frb_ir_has_deletions(VALUE self) {
|
|
1927
1912
|
return ir->has_deletions(ir) ? Qtrue : Qfalse;
|
1928
1913
|
}
|
1929
1914
|
|
1930
|
-
/*
|
1931
|
-
*
|
1932
|
-
* index_reader.delete(doc_id) -> index_reader
|
1915
|
+
/* call-seq:
|
1916
|
+
* index_reader.delete(doc_num) -> index_reader
|
1933
1917
|
*
|
1934
|
-
* Delete document referenced internally by document id +
|
1918
|
+
* Delete document referenced internally by document id +doc_num+. The
|
1935
1919
|
* document_id is the number used to reference documents in the index and is
|
1936
1920
|
* returned by search methods.
|
1937
1921
|
*/
|
1938
1922
|
static VALUE
|
1939
|
-
frb_ir_delete(VALUE self, VALUE
|
1923
|
+
frb_ir_delete(VALUE self, VALUE rdoc_num) {
|
1940
1924
|
int ex_code = 0;
|
1941
1925
|
const char *msg = NULL;
|
1942
1926
|
FrtIndexReader *ir = (FrtIndexReader *)DATA_PTR(self);
|
1943
1927
|
|
1944
1928
|
FRT_TRY
|
1945
|
-
frt_ir_delete_doc(ir, FIX2INT(
|
1929
|
+
frt_ir_delete_doc(ir, FIX2INT(rdoc_num));
|
1946
1930
|
FRT_XCATCHALL
|
1947
1931
|
ex_code = xcontext.excode;
|
1948
1932
|
msg = xcontext.msg;
|
@@ -1956,31 +1940,30 @@ frb_ir_delete(VALUE self, VALUE rdoc_id) {
|
|
1956
1940
|
return self;
|
1957
1941
|
}
|
1958
1942
|
|
1959
|
-
/*
|
1960
|
-
*
|
1961
|
-
* index_reader.deleted?(doc_id) -> bool
|
1943
|
+
/* call-seq:
|
1944
|
+
* index_reader.deleted?(doc_num) -> bool
|
1962
1945
|
*
|
1963
|
-
* Returns true if the document at +
|
1946
|
+
* Returns true if the document at +doc_num+ has been deleted.
|
1964
1947
|
*/
|
1965
1948
|
static VALUE
|
1966
|
-
frb_ir_is_deleted(VALUE self, VALUE
|
1949
|
+
frb_ir_is_deleted(VALUE self, VALUE rdoc_num) {
|
1967
1950
|
FrtIndexReader *ir = (FrtIndexReader *)DATA_PTR(self);
|
1968
|
-
return ir->is_deleted(ir, FIX2INT(
|
1951
|
+
return ir->is_deleted(ir, FIX2INT(rdoc_num)) ? Qtrue : Qfalse;
|
1969
1952
|
}
|
1970
1953
|
|
1971
|
-
/*
|
1972
|
-
* call-seq:
|
1954
|
+
/* call-seq:
|
1973
1955
|
* index_reader.max_doc -> number
|
1974
1956
|
*
|
1975
1957
|
* Returns 1 + the maximum document id in the index. It is the
|
1976
1958
|
* document_id that will be used by the next document added to the index. If
|
1977
1959
|
* there are no deletions, this number also refers to the number of documents
|
1978
1960
|
* in the index.
|
1961
|
+
* TODO: Rename to next_doc_num?
|
1979
1962
|
*/
|
1980
1963
|
static VALUE
|
1981
|
-
|
1964
|
+
frb_ir_max_doc_num(VALUE self) {
|
1982
1965
|
FrtIndexReader *ir = (FrtIndexReader *)DATA_PTR(self);
|
1983
|
-
return INT2FIX(ir->
|
1966
|
+
return INT2FIX(ir->max_doc_num(ir));
|
1984
1967
|
}
|
1985
1968
|
|
1986
1969
|
/*
|
@@ -2042,8 +2025,8 @@ frb_get_doc_range(FrtIndexReader *ir, int pos, int len, int max) {
|
|
2042
2025
|
|
2043
2026
|
/*
|
2044
2027
|
* call-seq:
|
2045
|
-
* index_reader.get_document(
|
2046
|
-
* index_reader[
|
2028
|
+
* index_reader.get_document(doc_num) -> LazyDoc
|
2029
|
+
* index_reader[doc_num] -> LazyDoc
|
2047
2030
|
*
|
2048
2031
|
* Retrieve a document from the index. See LazyDoc for more details on the
|
2049
2032
|
* document returned. Documents are referenced internally by document ids
|
@@ -2054,7 +2037,7 @@ frb_ir_get_doc(int argc, VALUE *argv, VALUE self) {
|
|
2054
2037
|
FrtIndexReader *ir = (FrtIndexReader *)DATA_PTR(self);
|
2055
2038
|
VALUE arg1, arg2;
|
2056
2039
|
long pos, len;
|
2057
|
-
long max = ir->
|
2040
|
+
long max = ir->max_doc_num(ir);
|
2058
2041
|
rb_scan_args(argc, argv, "11", &arg1, &arg2);
|
2059
2042
|
if (argc == 1) {
|
2060
2043
|
if (FIXNUM_P(arg1)) {
|
@@ -2117,19 +2100,18 @@ frb_ir_is_latest(VALUE self) {
|
|
2117
2100
|
return frt_ir_is_latest(ir) ? Qtrue : Qfalse;
|
2118
2101
|
}
|
2119
2102
|
|
2120
|
-
/*
|
2121
|
-
*
|
2122
|
-
* index_reader.term_vector(doc_id, field) -> TermVector
|
2103
|
+
/* call-seq:
|
2104
|
+
* index_reader.term_vector(doc_num, field) -> TermVector
|
2123
2105
|
*
|
2124
|
-
* Return the TermVector for the field +field+ in the document at +
|
2106
|
+
* Return the TermVector for the field +field+ in the document at +doc_num+ in
|
2125
2107
|
* the index. Return nil if no such term_vector exists. See TermVector.
|
2126
2108
|
*/
|
2127
2109
|
static VALUE
|
2128
|
-
frb_ir_term_vector(VALUE self, VALUE
|
2110
|
+
frb_ir_term_vector(VALUE self, VALUE rdoc_num, VALUE rfield) {
|
2129
2111
|
FrtIndexReader *ir = (FrtIndexReader *)DATA_PTR(self);
|
2130
2112
|
FrtTermVector *tv;
|
2131
2113
|
VALUE rtv;
|
2132
|
-
tv = ir->term_vector(ir, FIX2INT(
|
2114
|
+
tv = ir->term_vector(ir, FIX2INT(rdoc_num), frb_field(rfield));
|
2133
2115
|
if (tv) {
|
2134
2116
|
rtv = frb_get_tv(tv);
|
2135
2117
|
frt_tv_destroy(tv);
|
@@ -2144,18 +2126,17 @@ frb_add_each_tv(void *key, void *value, void *rtvs) {
|
|
2144
2126
|
rb_hash_aset((VALUE)rtvs, ID2SYM((ID)key), frb_get_tv(value));
|
2145
2127
|
}
|
2146
2128
|
|
2147
|
-
/*
|
2148
|
-
*
|
2149
|
-
* index_reader.term_vectors(doc_id) -> hash of TermVector
|
2129
|
+
/* call-seq:
|
2130
|
+
* index_reader.term_vectors(doc_num) -> hash of TermVector
|
2150
2131
|
*
|
2151
|
-
* Return the TermVectors for the document at +
|
2132
|
+
* Return the TermVectors for the document at +doc_num+ in the index. The
|
2152
2133
|
* value returned is a hash of the TermVectors for each field in the document
|
2153
2134
|
* and they are referenced by field names (as symbols).
|
2154
2135
|
*/
|
2155
2136
|
static VALUE
|
2156
|
-
frb_ir_term_vectors(VALUE self, VALUE
|
2137
|
+
frb_ir_term_vectors(VALUE self, VALUE rdoc_num) {
|
2157
2138
|
FrtIndexReader *ir = (FrtIndexReader *)DATA_PTR(self);
|
2158
|
-
FrtHash *tvs = ir->term_vectors(ir, FIX2INT(
|
2139
|
+
FrtHash *tvs = ir->term_vectors(ir, FIX2INT(rdoc_num));
|
2159
2140
|
VALUE rtvs = rb_hash_new();
|
2160
2141
|
frt_h_each(tvs, &frb_add_each_tv, (void *)rtvs);
|
2161
2142
|
frt_h_destroy(tvs);
|
@@ -2359,11 +2340,11 @@ static VALUE frb_ir_each(VALUE self) {
|
|
2359
2340
|
int ex_code = 0;
|
2360
2341
|
const char *msg = NULL;
|
2361
2342
|
long i;
|
2362
|
-
|
2343
|
+
int max_doc_num = ir->max_doc_num(ir);
|
2363
2344
|
VALUE rld;
|
2364
2345
|
|
2365
2346
|
FRT_TRY
|
2366
|
-
for (i = 0; i <
|
2347
|
+
for (i = 0; i < max_doc_num; i++) {
|
2367
2348
|
if (ir->is_deleted(ir, i)) continue;
|
2368
2349
|
rld = frb_get_lazy_doc(ir->get_lazy_doc(ir, i));
|
2369
2350
|
rb_yield(rld);
|
@@ -2504,8 +2485,8 @@ Init_TermEnum(void) {
|
|
2504
2485
|
*
|
2505
2486
|
* tde = index_reader.term_docs_for(:content, "fox")
|
2506
2487
|
*
|
2507
|
-
* tde.each do |
|
2508
|
-
* puts "fox appeared #{freq} times in document #{
|
2488
|
+
* tde.each do |doc_num, freq|
|
2489
|
+
* puts "fox appeared #{freq} times in document #{doc_num}:"
|
2509
2490
|
* positions = []
|
2510
2491
|
* tde.each_position {|pos| positions << pos}
|
2511
2492
|
* puts " #{positions.join(', ')}"
|
@@ -2530,7 +2511,7 @@ static void Init_TermDocEnum(void) {
|
|
2530
2511
|
rb_define_alloc_func(cTermDocEnum, frb_tde_alloc);
|
2531
2512
|
rb_define_method(cTermDocEnum, "seek", frb_tde_seek, 2);
|
2532
2513
|
rb_define_method(cTermDocEnum, "seek_term_enum", frb_tde_seek_te, 1);
|
2533
|
-
rb_define_method(cTermDocEnum, "
|
2514
|
+
rb_define_method(cTermDocEnum, "doc_num", frb_tde_doc_num, 0);
|
2534
2515
|
rb_define_method(cTermDocEnum, "freq", frb_tde_freq, 0);
|
2535
2516
|
rb_define_method(cTermDocEnum, "next?", frb_tde_next, 0);
|
2536
2517
|
rb_define_method(cTermDocEnum, "next_position", frb_tde_next_position, 0);
|
@@ -2599,7 +2580,7 @@ static void Init_TVTerm(void) {
|
|
2599
2580
|
*
|
2600
2581
|
* == Example
|
2601
2582
|
*
|
2602
|
-
* tv = index_reader.term_vector(
|
2583
|
+
* tv = index_reader.term_vector(doc_num, :content)
|
2603
2584
|
* tv_term = tv.find {|tvt| tvt.term == "fox"}
|
2604
2585
|
*
|
2605
2586
|
* # get the term frequency
|
@@ -2825,7 +2806,7 @@ void Init_IndexReader(void) {
|
|
2825
2806
|
rb_define_method(cIndexReader, "has_deletions?", frb_ir_has_deletions, 0);
|
2826
2807
|
rb_define_method(cIndexReader, "delete", frb_ir_delete, 1);
|
2827
2808
|
rb_define_method(cIndexReader, "deleted?", frb_ir_is_deleted, 1);
|
2828
|
-
rb_define_method(cIndexReader, "
|
2809
|
+
rb_define_method(cIndexReader, "max_doc_num", frb_ir_max_doc_num, 0);
|
2829
2810
|
rb_define_method(cIndexReader, "num_docs", frb_ir_num_docs, 0);
|
2830
2811
|
rb_define_method(cIndexReader, "undelete_all", frb_ir_undelete_all, 0);
|
2831
2812
|
rb_define_method(cIndexReader, "latest?", frb_ir_is_latest, 0);
|
@@ -93,7 +93,7 @@ static VALUE sym_integer;
|
|
93
93
|
static VALUE sym_float;
|
94
94
|
static VALUE sym_string;
|
95
95
|
static VALUE sym_auto;
|
96
|
-
static VALUE
|
96
|
+
static VALUE sym_doc_num;
|
97
97
|
static VALUE sym_score;
|
98
98
|
static VALUE sym_byte;
|
99
99
|
|
@@ -146,7 +146,7 @@ extern VALUE frb_get_lazy_doc(FrtLazyDoc *lazy_doc);
|
|
146
146
|
****************************************************************************/
|
147
147
|
|
148
148
|
static VALUE frb_get_hit(FrtHit *hit) {
|
149
|
-
return rb_struct_new(cHit, INT2FIX(hit->
|
149
|
+
return rb_struct_new(cHit, INT2FIX(hit->doc_num), rb_float_new((double)hit->score), NULL);
|
150
150
|
}
|
151
151
|
|
152
152
|
/****************************************************************************
|
@@ -198,10 +198,10 @@ static VALUE frb_td_to_s(int argc, VALUE *argv, VALUE self) {
|
|
198
198
|
|
199
199
|
for (i = 0; i < len; i++) {
|
200
200
|
VALUE rhit = RARRAY_PTR(rhits)[i];
|
201
|
-
int
|
201
|
+
int doc_num = FIX2INT(rb_funcall(rhit, id_doc, 0));
|
202
202
|
const char *value = "";
|
203
203
|
size_t value_len = 0;
|
204
|
-
FrtLazyDoc *lzd = sea->get_lazy_doc(sea,
|
204
|
+
FrtLazyDoc *lzd = sea->get_lazy_doc(sea, doc_num);
|
205
205
|
FrtLazyDocField *lzdf = frt_lazy_doc_get(lzd, field);
|
206
206
|
if (NULL != lzdf) {
|
207
207
|
value = frt_lazy_df_get_data(lzdf, 0);
|
@@ -212,7 +212,7 @@ static VALUE frb_td_to_s(int argc, VALUE *argv, VALUE self) {
|
|
212
212
|
FRT_REALLOC_N(str, char, capa);
|
213
213
|
}
|
214
214
|
|
215
|
-
sprintf(str + p, "\t%d \"%s\": %0.5f\n",
|
215
|
+
sprintf(str + p, "\t%d \"%s\": %0.5f\n", doc_num, value,
|
216
216
|
NUM2DBL(rb_funcall(rhit, id_score, 0)));
|
217
217
|
p += strlen(str + p);
|
218
218
|
frt_lazy_doc_close(lzd);
|
@@ -279,7 +279,7 @@ static VALUE frb_td_to_json(VALUE self) {
|
|
279
279
|
FrtLazyDoc *lzd;
|
280
280
|
FrtSearcher *sea = (FrtSearcher *)DATA_PTR(rb_funcall(self, id_searcher, 0));
|
281
281
|
const int num_hits = RARRAY_LEN(rhits);
|
282
|
-
int
|
282
|
+
int doc_num;
|
283
283
|
int len = 32768;
|
284
284
|
char *str = FRT_ALLOC_N(char, len);
|
285
285
|
char *s = str;
|
@@ -290,8 +290,8 @@ static VALUE frb_td_to_json(VALUE self) {
|
|
290
290
|
if (i) *(s++) = ',';
|
291
291
|
*(s++) = '{';
|
292
292
|
rhit = RARRAY_PTR(rhits)[i];
|
293
|
-
|
294
|
-
lzd = sea->get_lazy_doc(sea,
|
293
|
+
doc_num = FIX2INT(rb_funcall(rhit, id_doc, 0));
|
294
|
+
lzd = sea->get_lazy_doc(sea, doc_num);
|
295
295
|
s = frb_lzd_load_to_json(lzd, &str, s, &len);
|
296
296
|
frt_lazy_doc_close(lzd);
|
297
297
|
*(s++) = '}';
|
@@ -2692,7 +2692,7 @@ static int get_sort_type(VALUE rtype) {
|
|
2692
2692
|
return FRT_SORT_TYPE_STRING;
|
2693
2693
|
} else if (rtype == sym_score) {
|
2694
2694
|
return FRT_SORT_TYPE_SCORE;
|
2695
|
-
} else if (rtype ==
|
2695
|
+
} else if (rtype == sym_doc_num) {
|
2696
2696
|
return FRT_SORT_TYPE_DOC;
|
2697
2697
|
} else if (rtype == sym_float) {
|
2698
2698
|
return FRT_SORT_TYPE_FLOAT;
|
@@ -2700,7 +2700,7 @@ static int get_sort_type(VALUE rtype) {
|
|
2700
2700
|
return FRT_SORT_TYPE_AUTO;
|
2701
2701
|
} else {
|
2702
2702
|
rb_raise(rb_eArgError, ":%s is an unknown sort-type. Please choose "
|
2703
|
-
"from [:integer, :float, :string, :auto, :score, :
|
2703
|
+
"from [:integer, :float, :string, :auto, :score, :doc_num]",
|
2704
2704
|
rb_id2name(SYM2ID(rtype)));
|
2705
2705
|
}
|
2706
2706
|
return FRT_SORT_TYPE_DOC;
|
@@ -2717,7 +2717,7 @@ static int get_sort_type(VALUE rtype) {
|
|
2717
2717
|
*
|
2718
2718
|
* :type:: Default: +:auto+. Specifies how a field should be sorted.
|
2719
2719
|
* Choose from one of; +:auto+, +:integer+, +:float+,
|
2720
|
-
* +:string+, +:byte+, +:
|
2720
|
+
* +:string+, +:byte+, +:doc_num+ or +:score+. +:auto+ will
|
2721
2721
|
* check the datatype of the field by trying to parse it into
|
2722
2722
|
* either a number or a float before settling on a string
|
2723
2723
|
* sort. String sort is locale dependent and works for
|
@@ -2786,7 +2786,7 @@ static VALUE frb_sf_get_name(VALUE self) {
|
|
2786
2786
|
* sort_field.type -> symbol
|
2787
2787
|
*
|
2788
2788
|
* Return the type of sort. Should be one of; +:auto+, +:integer+, +:float+,
|
2789
|
-
* +:string+, +:byte+, +:
|
2789
|
+
* +:string+, +:byte+, +:doc_num+ or +:score+.
|
2790
2790
|
*/
|
2791
2791
|
static VALUE frb_sf_get_type(VALUE self) {
|
2792
2792
|
GET_SF();
|
@@ -2796,7 +2796,7 @@ static VALUE frb_sf_get_type(VALUE self) {
|
|
2796
2796
|
case FRT_SORT_TYPE_FLOAT: return sym_float;
|
2797
2797
|
case FRT_SORT_TYPE_STRING: return sym_string;
|
2798
2798
|
case FRT_SORT_TYPE_AUTO: return sym_auto;
|
2799
|
-
case FRT_SORT_TYPE_DOC: return
|
2799
|
+
case FRT_SORT_TYPE_DOC: return sym_doc_num;
|
2800
2800
|
case FRT_SORT_TYPE_SCORE: return sym_score;
|
2801
2801
|
}
|
2802
2802
|
return Qnil;
|
@@ -2900,7 +2900,7 @@ static void frb_parse_sort_str(FrtSort *sort, char *xsort_str) {
|
|
2900
2900
|
|
2901
2901
|
if (strcmp("SCORE", s) == 0) {
|
2902
2902
|
sf = frt_sort_field_score_new(reverse);
|
2903
|
-
} else if (strcmp("
|
2903
|
+
} else if (strcmp("DOC_NUM", s) == 0) {
|
2904
2904
|
sf = frt_sort_field_doc_new(reverse);
|
2905
2905
|
} else {
|
2906
2906
|
sf = frt_sort_field_auto_new(rb_intern(s), reverse);
|
@@ -2939,7 +2939,7 @@ static void frb_sort_add(FrtSort *sort, VALUE rsf, bool reverse) {
|
|
2939
2939
|
#define GET_SORT() FrtSort *sort = (FrtSort *)DATA_PTR(self)
|
2940
2940
|
/*
|
2941
2941
|
* call-seq:
|
2942
|
-
* Sort.new(sort_fields = [SortField::SCORE, SortField::
|
2942
|
+
* Sort.new(sort_fields = [SortField::SCORE, SortField::DOC_NUM], reverse = false) -> Sort
|
2943
2943
|
*
|
2944
2944
|
* Create a new Sort object. If +reverse+ is true, all sort_fields will be
|
2945
2945
|
* reversed so if any of them are already reversed the will be turned back
|
@@ -3061,21 +3061,21 @@ static VALUE frb_sea_doc_freq(VALUE self, VALUE rfield, VALUE rterm) {
|
|
3061
3061
|
|
3062
3062
|
/*
|
3063
3063
|
* call-seq:
|
3064
|
-
* searcher.get_document(
|
3065
|
-
* searcher[
|
3064
|
+
* searcher.get_document(doc_num) -> LazyDoc
|
3065
|
+
* searcher[doc_num] -> LazyDoc
|
3066
3066
|
*
|
3067
3067
|
* Retrieve a document from the index. See LazyDoc for more details on the
|
3068
3068
|
* document returned. Documents are referenced internally by document ids
|
3069
3069
|
* which are returned by the Searchers search methods.
|
3070
3070
|
*/
|
3071
|
-
static VALUE frb_sea_doc(VALUE self, VALUE
|
3071
|
+
static VALUE frb_sea_doc(VALUE self, VALUE rdoc_num) {
|
3072
3072
|
int ex_code = 0;
|
3073
3073
|
const char *msg = NULL;
|
3074
3074
|
GET_SEA();
|
3075
3075
|
VALUE ld = Qnil;
|
3076
3076
|
|
3077
3077
|
FRT_TRY
|
3078
|
-
ld = frb_get_lazy_doc(sea->get_lazy_doc(sea, FIX2INT(
|
3078
|
+
ld = frb_get_lazy_doc(sea->get_lazy_doc(sea, FIX2INT(rdoc_num)));
|
3079
3079
|
FRT_XCATCHALL
|
3080
3080
|
ex_code = xcontext.excode;
|
3081
3081
|
msg = xcontext.msg;
|
@@ -3098,13 +3098,13 @@ static VALUE frb_sea_doc(VALUE self, VALUE rdoc_id) {
|
|
3098
3098
|
* there are no deletions, this number also refers to the number of documents
|
3099
3099
|
* in the index.
|
3100
3100
|
*/
|
3101
|
-
static VALUE
|
3101
|
+
static VALUE frb_sea_max_doc_num(VALUE self) {
|
3102
3102
|
GET_SEA();
|
3103
|
-
return INT2FIX(sea->
|
3103
|
+
return INT2FIX(sea->max_doc_num(sea));
|
3104
3104
|
}
|
3105
3105
|
|
3106
|
-
static float call_filter_proc(int
|
3107
|
-
VALUE val = rb_funcall((VALUE)arg, id_call, 3, INT2FIX(
|
3106
|
+
static float call_filter_proc(int doc_num, float score, FrtSearcher *sea, void *arg) {
|
3107
|
+
VALUE val = rb_funcall((VALUE)arg, id_call, 3, INT2FIX(doc_num), rb_float_new((double)score), sea->rsea);
|
3108
3108
|
switch (TYPE(val)) {
|
3109
3109
|
case T_NIL:
|
3110
3110
|
case T_FALSE:
|
@@ -3261,7 +3261,7 @@ static FrtTopDocs *frb_sea_search_internal(FrtQuery *query, VALUE roptions, FrtS
|
|
3261
3261
|
* to specify a fields type to sort it correctly. For more
|
3262
3262
|
* on this, see the documentation for SortField
|
3263
3263
|
* :filter:: a Filter object to filter the search results with
|
3264
|
-
* :filter_proc:: a filter Proc is a Proc which takes the
|
3264
|
+
* :filter_proc:: a filter Proc is a Proc which takes the doc_num, the score
|
3265
3265
|
* and the Searcher object as its parameters and returns
|
3266
3266
|
* either a Boolean value specifying whether the result
|
3267
3267
|
* should be included in the result set, or a Float between 0
|
@@ -3280,13 +3280,13 @@ static VALUE frb_sea_search(int argc, VALUE *argv, VALUE self) {
|
|
3280
3280
|
|
3281
3281
|
/*
|
3282
3282
|
* call-seq:
|
3283
|
-
* searcher.search_each(query, options = {}) {|
|
3283
|
+
* searcher.search_each(query, options = {}) {|doc_num, score| do_something}
|
3284
3284
|
* -> total_hits
|
3285
3285
|
*
|
3286
3286
|
* Run a query through the Searcher on the index. A TopDocs object is
|
3287
3287
|
* returned with the relevant results. The +query+ is a Query object. The
|
3288
3288
|
* Searcher#search_each method yields the internal document id (used to
|
3289
|
-
* reference documents in the Searcher object like this; +searcher[
|
3289
|
+
* reference documents in the Searcher object like this; +searcher[doc_num]+)
|
3290
3290
|
* and the search score for that document. It is possible for the score to be
|
3291
3291
|
* greater than 1.0 for some queries and taking boosts into account. This
|
3292
3292
|
* method will also normalize scores to the range 0.0..1.0 when the max-score
|
@@ -3314,7 +3314,7 @@ static VALUE frb_sea_search(int argc, VALUE *argv, VALUE self) {
|
|
3314
3314
|
* to specify a fields type to sort it correctly. For more
|
3315
3315
|
* on this, see the documentation for SortField
|
3316
3316
|
* :filter:: a Filter object to filter the search results with
|
3317
|
-
* :filter_proc:: a filter Proc is a Proc which takes the
|
3317
|
+
* :filter_proc:: a filter Proc is a Proc which takes the doc_num, the score
|
3318
3318
|
* and the Searcher object as its parameters and returns a
|
3319
3319
|
* Boolean value specifying whether the result should be
|
3320
3320
|
* included in the result set.
|
@@ -3335,7 +3335,7 @@ static VALUE frb_sea_search_each(int argc, VALUE *argv, VALUE self) {
|
|
3335
3335
|
|
3336
3336
|
/* yield normalized scores */
|
3337
3337
|
for (i = 0; i < td->size; i++) {
|
3338
|
-
rb_yield_values(2, INT2FIX(td->hits[i]->
|
3338
|
+
rb_yield_values(2, INT2FIX(td->hits[i]->doc_num), rb_float_new((double)(td->hits[i]->score/max_score)));
|
3339
3339
|
}
|
3340
3340
|
|
3341
3341
|
rtotal_hits = INT2FIX(td->total_hits);
|
@@ -3430,14 +3430,14 @@ static VALUE frb_sea_scan(int argc, VALUE *argv, VALUE self) {
|
|
3430
3430
|
|
3431
3431
|
/*
|
3432
3432
|
* call-seq:
|
3433
|
-
* searcher.explain(query,
|
3433
|
+
* searcher.explain(query, doc_num) -> Explanation
|
3434
3434
|
*
|
3435
3435
|
* Create an explanation object to explain the score returned for a
|
3436
|
-
* particular document at +
|
3436
|
+
* particular document at +doc_num+ in the index for the query +query+.
|
3437
3437
|
*
|
3438
3438
|
* Usually used like this;
|
3439
3439
|
*
|
3440
|
-
* puts searcher.explain(query,
|
3440
|
+
* puts searcher.explain(query, doc_num).to_s
|
3441
3441
|
*/
|
3442
3442
|
|
3443
3443
|
static size_t frb_explanation_size(const void *p) {
|
@@ -3468,17 +3468,17 @@ static VALUE frb_expl_alloc(VALUE rclass) {
|
|
3468
3468
|
return TypedData_Wrap_Struct(rclass, &frb_explanation_t, e);
|
3469
3469
|
}
|
3470
3470
|
|
3471
|
-
static VALUE frb_sea_explain(VALUE self, VALUE rquery, VALUE
|
3471
|
+
static VALUE frb_sea_explain(VALUE self, VALUE rquery, VALUE rdoc_num) {
|
3472
3472
|
GET_SEA();
|
3473
3473
|
FrtQuery *query = DATA_PTR(rquery);
|
3474
3474
|
FrtExplanation *expl;
|
3475
|
-
expl = sea->explain(sea, query, FIX2INT(
|
3475
|
+
expl = sea->explain(sea, query, FIX2INT(rdoc_num));
|
3476
3476
|
return TypedData_Wrap_Struct(cExplanation, &frb_explanation_t, expl);
|
3477
3477
|
}
|
3478
3478
|
|
3479
3479
|
/*
|
3480
3480
|
* call-seq:
|
3481
|
-
* searcher.highlight(query,
|
3481
|
+
* searcher.highlight(query, doc_num, field, options = {}) -> Array
|
3482
3482
|
*
|
3483
3483
|
* Returns an array of strings with the matches highlighted.
|
3484
3484
|
*
|
@@ -3500,7 +3500,7 @@ static VALUE frb_sea_explain(VALUE self, VALUE rquery, VALUE rdoc_id) {
|
|
3500
3500
|
*/
|
3501
3501
|
static VALUE frb_sea_highlight(int argc, VALUE *argv, VALUE self) {
|
3502
3502
|
GET_SEA();
|
3503
|
-
VALUE rquery,
|
3503
|
+
VALUE rquery, rdoc_num, rfield, roptions, v;
|
3504
3504
|
int excerpt_length = 150;
|
3505
3505
|
int num_excerpts = 2;
|
3506
3506
|
const char *pre_tag = "<b>";
|
@@ -3508,7 +3508,7 @@ static VALUE frb_sea_highlight(int argc, VALUE *argv, VALUE self) {
|
|
3508
3508
|
const char *ellipsis = "...";
|
3509
3509
|
char **excerpts;
|
3510
3510
|
|
3511
|
-
rb_scan_args(argc, argv, "31", &rquery, &
|
3511
|
+
rb_scan_args(argc, argv, "31", &rquery, &rdoc_num, &rfield, &roptions);
|
3512
3512
|
FrtQuery *query = DATA_PTR(rquery);
|
3513
3513
|
if (argc > 3) {
|
3514
3514
|
if (TYPE(roptions) != T_HASH) {
|
@@ -3538,7 +3538,7 @@ static VALUE frb_sea_highlight(int argc, VALUE *argv, VALUE self) {
|
|
3538
3538
|
|
3539
3539
|
if ((excerpts = frt_searcher_highlight(sea,
|
3540
3540
|
query,
|
3541
|
-
FIX2INT(
|
3541
|
+
FIX2INT(rdoc_num),
|
3542
3542
|
frb_field(rfield),
|
3543
3543
|
excerpt_length,
|
3544
3544
|
num_excerpts,
|
@@ -3869,7 +3869,7 @@ static void Init_TopDocs(void) {
|
|
3869
3869
|
*
|
3870
3870
|
* == Example
|
3871
3871
|
*
|
3872
|
-
* puts searcher.explain(query,
|
3872
|
+
* puts searcher.explain(query, doc_num).to_s
|
3873
3873
|
*/
|
3874
3874
|
static void Init_Explanation(void) {
|
3875
3875
|
cExplanation = rb_define_class_under(mSearch, "Explanation", rb_cObject);
|
@@ -4748,7 +4748,7 @@ static void Init_Filter(void) {
|
|
4748
4748
|
* * :float
|
4749
4749
|
* * :string
|
4750
4750
|
* * :byte
|
4751
|
-
* * :
|
4751
|
+
* * :doc_num
|
4752
4752
|
* * :score
|
4753
4753
|
*
|
4754
4754
|
* The type of the SortField is set by passing it as a parameter to the
|
@@ -4784,7 +4784,7 @@ static void Init_SortField(void) {
|
|
4784
4784
|
sym_float = ID2SYM(rb_intern("float"));
|
4785
4785
|
sym_string = ID2SYM(rb_intern("string"));
|
4786
4786
|
sym_auto = ID2SYM(rb_intern("auto"));
|
4787
|
-
|
4787
|
+
sym_doc_num = ID2SYM(rb_intern("doc_num"));
|
4788
4788
|
sym_score = ID2SYM(rb_intern("score"));
|
4789
4789
|
sym_byte = ID2SYM(rb_intern("byte"));
|
4790
4790
|
|
@@ -4804,12 +4804,12 @@ static void Init_SortField(void) {
|
|
4804
4804
|
rb_define_const(cSortField, "SCORE_REV", TypedData_Wrap_Struct(cSortField, &frb_sort_field_t, FRT_SORT_FIELD_SCORE_REV));
|
4805
4805
|
FRT_SORT_FIELD_SCORE_REV->rfield = rb_const_get(cSortField, rb_intern("SCORE_REV"));
|
4806
4806
|
|
4807
|
-
rb_define_const(cSortField, "
|
4808
|
-
oSORT_FIELD_DOC = rb_const_get(cSortField, rb_intern("
|
4807
|
+
rb_define_const(cSortField, "DOC_NUM", TypedData_Wrap_Struct(cSortField, &frb_sort_field_t, FRT_SORT_FIELD_DOC));
|
4808
|
+
oSORT_FIELD_DOC = rb_const_get(cSortField, rb_intern("DOC_NUM"));
|
4809
4809
|
FRT_SORT_FIELD_DOC->rfield = oSORT_FIELD_DOC;
|
4810
4810
|
|
4811
|
-
rb_define_const(cSortField, "
|
4812
|
-
FRT_SORT_FIELD_DOC_REV->rfield = rb_const_get(cSortField, rb_intern("
|
4811
|
+
rb_define_const(cSortField, "DOC_NUM_REV", TypedData_Wrap_Struct(cSortField, &frb_sort_field_t, FRT_SORT_FIELD_DOC_REV));
|
4812
|
+
FRT_SORT_FIELD_DOC_REV->rfield = rb_const_get(cSortField, rb_intern("DOC_NUM_REV"));
|
4813
4813
|
}
|
4814
4814
|
|
4815
4815
|
/*
|
@@ -4871,8 +4871,8 @@ static void Init_Sort(void) {
|
|
4871
4871
|
*
|
4872
4872
|
* searcher.search_each(TermQuery.new(:content, "ferret")
|
4873
4873
|
* :filter => RangeFilter.new(:date, :< => "2006"),
|
4874
|
-
* :sort => "date DESC, title") do |
|
4875
|
-
* puts "#{searcher[
|
4874
|
+
* :sort => "date DESC, title") do |doc_num, score|
|
4875
|
+
* puts "#{searcher[doc_num][title] scored #{score}"
|
4876
4876
|
* end
|
4877
4877
|
*/
|
4878
4878
|
static void Init_Searcher(void) {
|
@@ -4902,7 +4902,7 @@ static void Init_Searcher(void) {
|
|
4902
4902
|
rb_define_method(cSearcher, "doc_freq", frb_sea_doc_freq, 2);
|
4903
4903
|
rb_define_method(cSearcher, "get_document", frb_sea_doc, 1);
|
4904
4904
|
rb_define_method(cSearcher, "[]", frb_sea_doc, 1);
|
4905
|
-
rb_define_method(cSearcher, "
|
4905
|
+
rb_define_method(cSearcher, "max_doc_num", frb_sea_max_doc_num, 0);
|
4906
4906
|
rb_define_method(cSearcher, "search", frb_sea_search, -1);
|
4907
4907
|
rb_define_method(cSearcher, "search_each", frb_sea_search_each, -1);
|
4908
4908
|
rb_define_method(cSearcher, "scan", frb_sea_scan, -1);
|
@@ -5,14 +5,11 @@
|
|
5
5
|
#include "frt_doc_field.h"
|
6
6
|
#include <ruby/encoding.h>
|
7
7
|
|
8
|
-
/****************************************************************************
|
9
|
-
*
|
10
|
-
* FrtDocument
|
11
|
-
*
|
12
|
-
****************************************************************************/
|
13
|
-
|
14
8
|
#define FRT_DOC_INIT_CAPA 8
|
9
|
+
|
15
10
|
typedef struct FrtDocument {
|
11
|
+
// frt_uchar ulid[16];
|
12
|
+
// char *ulid_c;
|
16
13
|
FrtHash *field_dict;
|
17
14
|
int field_count;
|
18
15
|
int capa;
|