ferret 0.10.7 → 0.10.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ext/q_multi_term.c +3 -3
- data/ext/q_parser.c +18 -1
- data/ext/r_analysis.c +2 -3
- data/ext/r_search.c +32 -4
- data/ext/search.h +1 -0
- data/lib/ferret.rb +1 -0
- data/lib/ferret/field_infos.rb +42 -0
- data/lib/ferret/index.rb +4 -0
- data/lib/ferret_version.rb +1 -1
- data/test/unit/search/tm_searcher.rb +10 -10
- metadata +3 -2
data/ext/q_multi_term.c
CHANGED
@@ -524,7 +524,7 @@ static char *multi_tq_to_s(Query *self, const char *curr_field)
|
|
524
524
|
bptr += flen + 1;
|
525
525
|
}
|
526
526
|
|
527
|
-
*(bptr++) = '
|
527
|
+
*(bptr++) = '"';
|
528
528
|
bt_pq_clone = pq_clone(boosted_terms);
|
529
529
|
while ((bt = (BoostedTerm *)pq_pop(bt_pq_clone)) != NULL) {
|
530
530
|
sprintf(bptr, "%s", bt->term);
|
@@ -540,10 +540,10 @@ static char *multi_tq_to_s(Query *self, const char *curr_field)
|
|
540
540
|
}
|
541
541
|
pq_destroy(bt_pq_clone);
|
542
542
|
|
543
|
-
if (bptr[-1] == '
|
543
|
+
if (bptr[-1] == '"') {
|
544
544
|
bptr++; /* handle zero term case */
|
545
545
|
}
|
546
|
-
bptr[-1] = '
|
546
|
+
bptr[-1] = '"'; /* delete last '|' char */
|
547
547
|
bptr[ 0] = '\0';
|
548
548
|
|
549
549
|
if (self->boost != 1.0) {
|
data/ext/q_parser.c
CHANGED
@@ -1754,6 +1754,11 @@ static int get_word(YYSTYPE *lvalp, QParser *qp)
|
|
1754
1754
|
char *bufp = buf;
|
1755
1755
|
qp->buf_index = (qp->buf_index + 1) % QP_CONC_WORDS;
|
1756
1756
|
|
1757
|
+
if (qp->dynbuf) {
|
1758
|
+
free(qp->dynbuf);
|
1759
|
+
qp->dynbuf = NULL;
|
1760
|
+
}
|
1761
|
+
|
1757
1762
|
qp->qstrp--; /* need to back up one character */
|
1758
1763
|
|
1759
1764
|
while (!strchr(not_word, (c=*qp->qstrp++))) {
|
@@ -1773,6 +1778,14 @@ static int get_word(YYSTYPE *lvalp, QParser *qp)
|
|
1773
1778
|
default:
|
1774
1779
|
*bufp++ = c;
|
1775
1780
|
}
|
1781
|
+
/* we've exceeded the static buffer. switch to the dynamic
|
1782
|
+
one. */
|
1783
|
+
if (!qp->dynbuf && ((bufp - buf) == MAX_WORD_SIZE)) {
|
1784
|
+
qp->dynbuf = ALLOC_AND_ZERO_N(char, strlen(qp->qstr) + 1);
|
1785
|
+
strncpy(qp->dynbuf, buf, MAX_WORD_SIZE);
|
1786
|
+
buf = qp->dynbuf;
|
1787
|
+
bufp = buf + MAX_WORD_SIZE;
|
1788
|
+
}
|
1776
1789
|
}
|
1777
1790
|
qp->qstrp--;
|
1778
1791
|
/* check for keywords. There are only four so we have a bit of a hack which
|
@@ -1843,7 +1856,7 @@ static int yyerror(QParser *qp, char const *msg)
|
|
1843
1856
|
}
|
1844
1857
|
mutex_unlock(&qp->mutex);
|
1845
1858
|
RAISE(PARSE_ERROR, "couldn't parse query ``%s''. Error message "
|
1846
|
-
" was %
|
1859
|
+
" was %s", buf, (char *)msg);
|
1847
1860
|
}
|
1848
1861
|
return 0;
|
1849
1862
|
}
|
@@ -2288,6 +2301,9 @@ void qp_destroy(QParser *self)
|
|
2288
2301
|
if (self->tokenized_fields) {
|
2289
2302
|
hs_destroy(self->tokenized_fields);
|
2290
2303
|
}
|
2304
|
+
if (self->dynbuf) {
|
2305
|
+
free(self->dynbuf);
|
2306
|
+
}
|
2291
2307
|
hs_destroy(self->all_fields);
|
2292
2308
|
hs_destroy(self->fields_buf);
|
2293
2309
|
h_destroy(self->field_cache);
|
@@ -2335,6 +2351,7 @@ QParser *qp_new(HashSet *all_fields, HashSet *def_fields,
|
|
2335
2351
|
self->analyzer = analyzer;
|
2336
2352
|
self->ts_cache = h_new_str(&free, (free_ft)&ts_deref);
|
2337
2353
|
self->buf_index = 0;
|
2354
|
+
self->dynbuf = 0;
|
2338
2355
|
self->non_tokenizer = non_tokenizer_new();
|
2339
2356
|
mutex_init(&self->mutex, NULL);
|
2340
2357
|
return self;
|
data/ext/r_analysis.c
CHANGED
@@ -1040,7 +1040,7 @@ static TokenStream *
|
|
1040
1040
|
cwa_get_ts(Analyzer *a, char *field, char *text)
|
1041
1041
|
{
|
1042
1042
|
VALUE rts = rb_funcall(CWA(a)->ranalyzer, id_token_stream, 2,
|
1043
|
-
rb_intern(field), rb_str_new2(text));
|
1043
|
+
ID2SYM(rb_intern(field)), rb_str_new2(text));
|
1044
1044
|
return frt_get_cwrapped_rts(rts);
|
1045
1045
|
}
|
1046
1046
|
|
@@ -1104,10 +1104,9 @@ frt_analyzer_token_stream(VALUE self, VALUE rfield, VALUE rstring)
|
|
1104
1104
|
Analyzer *a;
|
1105
1105
|
GET_A(a, self);
|
1106
1106
|
|
1107
|
-
StringValue(rfield);
|
1108
1107
|
StringValue(rstring);
|
1109
1108
|
|
1110
|
-
ts = a_get_ts(a,
|
1109
|
+
ts = a_get_ts(a, frt_field(rfield), RSTRING(rstring)->ptr);
|
1111
1110
|
|
1112
1111
|
/* Make sure that there is no entry already */
|
1113
1112
|
object_set(&ts->text, rstring);
|
data/ext/r_search.c
CHANGED
@@ -57,6 +57,8 @@ static VALUE sym_upper;
|
|
57
57
|
static VALUE sym_lower;
|
58
58
|
static VALUE sym_include_upper;
|
59
59
|
static VALUE sym_include_lower;
|
60
|
+
static VALUE sym_upper_exclusive;
|
61
|
+
static VALUE sym_lower_exclusive;
|
60
62
|
|
61
63
|
static VALUE sym_less_than;
|
62
64
|
static VALUE sym_less_than_or_equal_to;
|
@@ -170,6 +172,12 @@ frt_get_td(TopDocs *td)
|
|
170
172
|
return rtop_docs;
|
171
173
|
}
|
172
174
|
|
175
|
+
/*
|
176
|
+
* call-seq:
|
177
|
+
* top_doc.to_s -> string
|
178
|
+
*
|
179
|
+
* Returns a string represention of the top_doc in readable format.
|
180
|
+
*/
|
173
181
|
static VALUE
|
174
182
|
frt_td_to_s(VALUE self)
|
175
183
|
{
|
@@ -180,7 +188,7 @@ frt_td_to_s(VALUE self)
|
|
180
188
|
char *s = str;
|
181
189
|
VALUE rstr;
|
182
190
|
|
183
|
-
sprintf(s, "TopDocs:
|
191
|
+
sprintf(s, "TopDocs: total_hits = %d, max_score = %f [\n",
|
184
192
|
FIX2INT(rb_funcall(self, id_total_hits, 0)),
|
185
193
|
NUM2DBL(rb_funcall(self, id_max_score, 0)));
|
186
194
|
s += strlen(s);
|
@@ -900,6 +908,14 @@ get_range_params(VALUE roptions, char **lterm, char **uterm,
|
|
900
908
|
*uterm = StringValuePtr(v);
|
901
909
|
*include_upper = true;
|
902
910
|
}
|
911
|
+
if (Qnil != (v = rb_hash_aref(roptions, sym_lower_exclusive))) {
|
912
|
+
*lterm = StringValuePtr(v);
|
913
|
+
*include_lower = false;
|
914
|
+
}
|
915
|
+
if (Qnil != (v = rb_hash_aref(roptions, sym_upper_exclusive))) {
|
916
|
+
*uterm = StringValuePtr(v);
|
917
|
+
*include_upper = false;
|
918
|
+
}
|
903
919
|
if (Qnil != (v = rb_hash_aref(roptions, sym_include_lower))) {
|
904
920
|
*include_lower = RTEST(v);
|
905
921
|
}
|
@@ -956,6 +972,8 @@ get_range_params(VALUE roptions, char **lterm, char **uterm,
|
|
956
972
|
* q = RangeQuery.new(:date, :lower => "200501", :include_lower => false)
|
957
973
|
* # is equivalent to
|
958
974
|
* q = RangeQuery.new(:date, :< => "200501")
|
975
|
+
* # is equivalent to
|
976
|
+
* q = RangeQuery.new(:date, :lower_exclusive => "200501")
|
959
977
|
*
|
960
978
|
* q = RangeQuery.new(:date, :lower => "200501", :upper => 200502)
|
961
979
|
* # is equivalent to
|
@@ -993,9 +1011,15 @@ frt_rq_init(VALUE self, VALUE rfield, VALUE roptions)
|
|
993
1011
|
* the query it will do anything of value. See PhraseQuery#add_term.
|
994
1012
|
*/
|
995
1013
|
static VALUE
|
996
|
-
frt_phq_init(VALUE
|
1014
|
+
frt_phq_init(int argc, VALUE *argv, VALUE self)
|
997
1015
|
{
|
998
|
-
|
1016
|
+
VALUE rfield, rslop;
|
1017
|
+
Query *q;
|
1018
|
+
rb_scan_args(argc, argv, "11", &rfield, &rslop);
|
1019
|
+
q = phq_new(frt_field(rfield));
|
1020
|
+
if (argc == 2) {
|
1021
|
+
((PhraseQuery *)q)->slop = FIX2INT(rslop);
|
1022
|
+
}
|
999
1023
|
Frt_Wrap_Struct(self, NULL, &frt_q_free, q);
|
1000
1024
|
object_add(q, self);
|
1001
1025
|
return self;
|
@@ -1715,6 +1739,8 @@ frt_f_to_s(VALUE self)
|
|
1715
1739
|
* f = RangeFilter.new(:date, :lower => "200501", :include_lower => false)
|
1716
1740
|
* # is equivalent to
|
1717
1741
|
* f = RangeFilter.new(:date, :< => "200501")
|
1742
|
+
* # is equivalent to
|
1743
|
+
* f = RangeFilter.new(:date, :lower_exclusive => "200501")
|
1718
1744
|
*
|
1719
1745
|
* f = RangeFilter.new(:date, :lower => "200501", :upper => 200502)
|
1720
1746
|
* # is equivalent to
|
@@ -2991,6 +3017,8 @@ Init_RangeQuery(void)
|
|
2991
3017
|
{
|
2992
3018
|
sym_upper = ID2SYM(rb_intern("upper"));
|
2993
3019
|
sym_lower = ID2SYM(rb_intern("lower"));
|
3020
|
+
sym_upper_exclusive = ID2SYM(rb_intern("upper_exclusive"));
|
3021
|
+
sym_lower_exclusive = ID2SYM(rb_intern("lower_exclusive"));
|
2994
3022
|
sym_include_upper = ID2SYM(rb_intern("include_upper"));
|
2995
3023
|
sym_include_lower = ID2SYM(rb_intern("include_lower"));
|
2996
3024
|
|
@@ -3074,7 +3102,7 @@ Init_PhraseQuery(void)
|
|
3074
3102
|
cPhraseQuery = rb_define_class_under(mSearch, "PhraseQuery", cQuery);
|
3075
3103
|
rb_define_alloc_func(cPhraseQuery, frt_data_alloc);
|
3076
3104
|
|
3077
|
-
rb_define_method(cPhraseQuery, "initialize", frt_phq_init, 1);
|
3105
|
+
rb_define_method(cPhraseQuery, "initialize", frt_phq_init, -1);
|
3078
3106
|
rb_define_method(cPhraseQuery, "add_term", frt_phq_add, -1);
|
3079
3107
|
rb_define_method(cPhraseQuery, "<<", frt_phq_add, -1);
|
3080
3108
|
rb_define_method(cPhraseQuery, "slop", frt_phq_get_slop, 0);
|
data/ext/search.h
CHANGED
data/lib/ferret.rb
CHANGED
@@ -0,0 +1,42 @@
|
|
1
|
+
require 'yaml'
|
2
|
+
|
3
|
+
class Ferret::Index::FieldInfos
|
4
|
+
# Load FieldInfos from a YAML file. The YAML file should look something like
|
5
|
+
# this:
|
6
|
+
# default:
|
7
|
+
# store: :yes
|
8
|
+
# index: :yes
|
9
|
+
# term_vector: :no
|
10
|
+
#
|
11
|
+
# fields:
|
12
|
+
# id:
|
13
|
+
# index: :untokenized
|
14
|
+
# term_vector: :no
|
15
|
+
#
|
16
|
+
# title:
|
17
|
+
# boost: 20.0
|
18
|
+
# term_vector: :no
|
19
|
+
#
|
20
|
+
# content:
|
21
|
+
# term_vector: :with_positions_offsets
|
22
|
+
#
|
23
|
+
def self.load(yaml_str)
|
24
|
+
info = YAML.load(yaml_str)
|
25
|
+
convert_strings_to_symbols(info)
|
26
|
+
fis = FieldInfos.new(info[:default])
|
27
|
+
fields = info[:fields]
|
28
|
+
fields.keys.each {|key| fis.add_field(key, fields[key])} if fields
|
29
|
+
fis
|
30
|
+
end
|
31
|
+
|
32
|
+
private
|
33
|
+
def self.convert_strings_to_symbols(hash)
|
34
|
+
hash.keys.each do |key|
|
35
|
+
convert_strings_to_symbols(hash[key]) if hash[key].is_a?(Hash)
|
36
|
+
if key.is_a?(String)
|
37
|
+
hash[key.intern] = hash[key]
|
38
|
+
hash.delete(key)
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
data/lib/ferret/index.rb
CHANGED
@@ -90,6 +90,10 @@ module Ferret::Index
|
|
90
90
|
end
|
91
91
|
end
|
92
92
|
|
93
|
+
if (fi = options[:field_infos]).is_a?(String)
|
94
|
+
options[:field_infos] = FieldInfos.load(options[:field_infos])
|
95
|
+
end
|
96
|
+
|
93
97
|
@close_dir = options[:close_dir]
|
94
98
|
if options[:dir].is_a?(String)
|
95
99
|
options[:path] = options[:dir]
|
data/lib/ferret_version.rb
CHANGED
@@ -59,14 +59,14 @@ module SearcherTests
|
|
59
59
|
def test_multi_term_query
|
60
60
|
mtq = MultiTermQuery.new(:field, :max_terms => 4, :min_score => 0.5)
|
61
61
|
check_hits(mtq, [])
|
62
|
-
assert_equal("
|
63
|
-
assert_equal(
|
62
|
+
assert_equal('""', mtq.to_s(:field))
|
63
|
+
assert_equal('field:""', mtq.to_s)
|
64
64
|
|
65
65
|
[
|
66
|
-
["brown", 1.0, "
|
67
|
-
["fox", 0.1, "
|
68
|
-
["fox", 0.6, "
|
69
|
-
["fast", 50.0, "
|
66
|
+
["brown", 1.0, '"brown"'],
|
67
|
+
["fox", 0.1, '"brown"'],
|
68
|
+
["fox", 0.6, '"fox^0.6|brown"'],
|
69
|
+
["fast", 50.0, '"fox^0.6|brown|fast^50.0"']
|
70
70
|
].each do |term, boost, str|
|
71
71
|
mtq.add_term(term, boost)
|
72
72
|
assert_equal(str, mtq.to_s(:field))
|
@@ -74,13 +74,13 @@ module SearcherTests
|
|
74
74
|
end
|
75
75
|
|
76
76
|
mtq.boost = 80.1
|
77
|
-
assert_equal("
|
77
|
+
assert_equal('field:"fox^0.6|brown|fast^50.0"^80.1', mtq.to_s())
|
78
78
|
mtq << "word1"
|
79
|
-
assert_equal("
|
79
|
+
assert_equal('field:"fox^0.6|brown|word1|fast^50.0"^80.1', mtq.to_s())
|
80
80
|
mtq << "word2"
|
81
|
-
assert_equal("
|
81
|
+
assert_equal('field:"brown|word1|word2|fast^50.0"^80.1', mtq.to_s())
|
82
82
|
mtq << "word3"
|
83
|
-
assert_equal("
|
83
|
+
assert_equal('field:"brown|word1|word2|fast^50.0"^80.1', mtq.to_s())
|
84
84
|
|
85
85
|
terms = mtq.terms(@searcher)
|
86
86
|
assert(terms.index(Ferret::Term.new(:field, "brown")))
|
metadata
CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.9.0
|
|
3
3
|
specification_version: 1
|
4
4
|
name: ferret
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.10.
|
7
|
-
date: 2006-09-
|
6
|
+
version: 0.10.8
|
7
|
+
date: 2006-09-25 00:00:00 +09:00
|
8
8
|
summary: Ruby indexing library.
|
9
9
|
require_paths:
|
10
10
|
- lib
|
@@ -165,6 +165,7 @@ files:
|
|
165
165
|
- lib/ferret/number_tools.rb
|
166
166
|
- lib/ferret/index.rb
|
167
167
|
- lib/ferret/document.rb
|
168
|
+
- lib/ferret/field_infos.rb
|
168
169
|
- test/test_all.rb
|
169
170
|
- test/test_helper.rb
|
170
171
|
- test/unit/ts_analysis.rb
|