ferret 0.10.7 → 0.10.8
Sign up to get free protection for your applications and to get access to all the features.
- data/ext/q_multi_term.c +3 -3
- data/ext/q_parser.c +18 -1
- data/ext/r_analysis.c +2 -3
- data/ext/r_search.c +32 -4
- data/ext/search.h +1 -0
- data/lib/ferret.rb +1 -0
- data/lib/ferret/field_infos.rb +42 -0
- data/lib/ferret/index.rb +4 -0
- data/lib/ferret_version.rb +1 -1
- data/test/unit/search/tm_searcher.rb +10 -10
- metadata +3 -2
data/ext/q_multi_term.c
CHANGED
@@ -524,7 +524,7 @@ static char *multi_tq_to_s(Query *self, const char *curr_field)
|
|
524
524
|
bptr += flen + 1;
|
525
525
|
}
|
526
526
|
|
527
|
-
*(bptr++) = '
|
527
|
+
*(bptr++) = '"';
|
528
528
|
bt_pq_clone = pq_clone(boosted_terms);
|
529
529
|
while ((bt = (BoostedTerm *)pq_pop(bt_pq_clone)) != NULL) {
|
530
530
|
sprintf(bptr, "%s", bt->term);
|
@@ -540,10 +540,10 @@ static char *multi_tq_to_s(Query *self, const char *curr_field)
|
|
540
540
|
}
|
541
541
|
pq_destroy(bt_pq_clone);
|
542
542
|
|
543
|
-
if (bptr[-1] == '
|
543
|
+
if (bptr[-1] == '"') {
|
544
544
|
bptr++; /* handle zero term case */
|
545
545
|
}
|
546
|
-
bptr[-1] = '
|
546
|
+
bptr[-1] = '"'; /* delete last '|' char */
|
547
547
|
bptr[ 0] = '\0';
|
548
548
|
|
549
549
|
if (self->boost != 1.0) {
|
data/ext/q_parser.c
CHANGED
@@ -1754,6 +1754,11 @@ static int get_word(YYSTYPE *lvalp, QParser *qp)
|
|
1754
1754
|
char *bufp = buf;
|
1755
1755
|
qp->buf_index = (qp->buf_index + 1) % QP_CONC_WORDS;
|
1756
1756
|
|
1757
|
+
if (qp->dynbuf) {
|
1758
|
+
free(qp->dynbuf);
|
1759
|
+
qp->dynbuf = NULL;
|
1760
|
+
}
|
1761
|
+
|
1757
1762
|
qp->qstrp--; /* need to back up one character */
|
1758
1763
|
|
1759
1764
|
while (!strchr(not_word, (c=*qp->qstrp++))) {
|
@@ -1773,6 +1778,14 @@ static int get_word(YYSTYPE *lvalp, QParser *qp)
|
|
1773
1778
|
default:
|
1774
1779
|
*bufp++ = c;
|
1775
1780
|
}
|
1781
|
+
/* we've exceeded the static buffer. switch to the dynamic
|
1782
|
+
one. */
|
1783
|
+
if (!qp->dynbuf && ((bufp - buf) == MAX_WORD_SIZE)) {
|
1784
|
+
qp->dynbuf = ALLOC_AND_ZERO_N(char, strlen(qp->qstr) + 1);
|
1785
|
+
strncpy(qp->dynbuf, buf, MAX_WORD_SIZE);
|
1786
|
+
buf = qp->dynbuf;
|
1787
|
+
bufp = buf + MAX_WORD_SIZE;
|
1788
|
+
}
|
1776
1789
|
}
|
1777
1790
|
qp->qstrp--;
|
1778
1791
|
/* check for keywords. There are only four so we have a bit of a hack which
|
@@ -1843,7 +1856,7 @@ static int yyerror(QParser *qp, char const *msg)
|
|
1843
1856
|
}
|
1844
1857
|
mutex_unlock(&qp->mutex);
|
1845
1858
|
RAISE(PARSE_ERROR, "couldn't parse query ``%s''. Error message "
|
1846
|
-
" was %
|
1859
|
+
" was %s", buf, (char *)msg);
|
1847
1860
|
}
|
1848
1861
|
return 0;
|
1849
1862
|
}
|
@@ -2288,6 +2301,9 @@ void qp_destroy(QParser *self)
|
|
2288
2301
|
if (self->tokenized_fields) {
|
2289
2302
|
hs_destroy(self->tokenized_fields);
|
2290
2303
|
}
|
2304
|
+
if (self->dynbuf) {
|
2305
|
+
free(self->dynbuf);
|
2306
|
+
}
|
2291
2307
|
hs_destroy(self->all_fields);
|
2292
2308
|
hs_destroy(self->fields_buf);
|
2293
2309
|
h_destroy(self->field_cache);
|
@@ -2335,6 +2351,7 @@ QParser *qp_new(HashSet *all_fields, HashSet *def_fields,
|
|
2335
2351
|
self->analyzer = analyzer;
|
2336
2352
|
self->ts_cache = h_new_str(&free, (free_ft)&ts_deref);
|
2337
2353
|
self->buf_index = 0;
|
2354
|
+
self->dynbuf = 0;
|
2338
2355
|
self->non_tokenizer = non_tokenizer_new();
|
2339
2356
|
mutex_init(&self->mutex, NULL);
|
2340
2357
|
return self;
|
data/ext/r_analysis.c
CHANGED
@@ -1040,7 +1040,7 @@ static TokenStream *
|
|
1040
1040
|
cwa_get_ts(Analyzer *a, char *field, char *text)
|
1041
1041
|
{
|
1042
1042
|
VALUE rts = rb_funcall(CWA(a)->ranalyzer, id_token_stream, 2,
|
1043
|
-
rb_intern(field), rb_str_new2(text));
|
1043
|
+
ID2SYM(rb_intern(field)), rb_str_new2(text));
|
1044
1044
|
return frt_get_cwrapped_rts(rts);
|
1045
1045
|
}
|
1046
1046
|
|
@@ -1104,10 +1104,9 @@ frt_analyzer_token_stream(VALUE self, VALUE rfield, VALUE rstring)
|
|
1104
1104
|
Analyzer *a;
|
1105
1105
|
GET_A(a, self);
|
1106
1106
|
|
1107
|
-
StringValue(rfield);
|
1108
1107
|
StringValue(rstring);
|
1109
1108
|
|
1110
|
-
ts = a_get_ts(a,
|
1109
|
+
ts = a_get_ts(a, frt_field(rfield), RSTRING(rstring)->ptr);
|
1111
1110
|
|
1112
1111
|
/* Make sure that there is no entry already */
|
1113
1112
|
object_set(&ts->text, rstring);
|
data/ext/r_search.c
CHANGED
@@ -57,6 +57,8 @@ static VALUE sym_upper;
|
|
57
57
|
static VALUE sym_lower;
|
58
58
|
static VALUE sym_include_upper;
|
59
59
|
static VALUE sym_include_lower;
|
60
|
+
static VALUE sym_upper_exclusive;
|
61
|
+
static VALUE sym_lower_exclusive;
|
60
62
|
|
61
63
|
static VALUE sym_less_than;
|
62
64
|
static VALUE sym_less_than_or_equal_to;
|
@@ -170,6 +172,12 @@ frt_get_td(TopDocs *td)
|
|
170
172
|
return rtop_docs;
|
171
173
|
}
|
172
174
|
|
175
|
+
/*
|
176
|
+
* call-seq:
|
177
|
+
* top_doc.to_s -> string
|
178
|
+
*
|
179
|
+
* Returns a string represention of the top_doc in readable format.
|
180
|
+
*/
|
173
181
|
static VALUE
|
174
182
|
frt_td_to_s(VALUE self)
|
175
183
|
{
|
@@ -180,7 +188,7 @@ frt_td_to_s(VALUE self)
|
|
180
188
|
char *s = str;
|
181
189
|
VALUE rstr;
|
182
190
|
|
183
|
-
sprintf(s, "TopDocs:
|
191
|
+
sprintf(s, "TopDocs: total_hits = %d, max_score = %f [\n",
|
184
192
|
FIX2INT(rb_funcall(self, id_total_hits, 0)),
|
185
193
|
NUM2DBL(rb_funcall(self, id_max_score, 0)));
|
186
194
|
s += strlen(s);
|
@@ -900,6 +908,14 @@ get_range_params(VALUE roptions, char **lterm, char **uterm,
|
|
900
908
|
*uterm = StringValuePtr(v);
|
901
909
|
*include_upper = true;
|
902
910
|
}
|
911
|
+
if (Qnil != (v = rb_hash_aref(roptions, sym_lower_exclusive))) {
|
912
|
+
*lterm = StringValuePtr(v);
|
913
|
+
*include_lower = false;
|
914
|
+
}
|
915
|
+
if (Qnil != (v = rb_hash_aref(roptions, sym_upper_exclusive))) {
|
916
|
+
*uterm = StringValuePtr(v);
|
917
|
+
*include_upper = false;
|
918
|
+
}
|
903
919
|
if (Qnil != (v = rb_hash_aref(roptions, sym_include_lower))) {
|
904
920
|
*include_lower = RTEST(v);
|
905
921
|
}
|
@@ -956,6 +972,8 @@ get_range_params(VALUE roptions, char **lterm, char **uterm,
|
|
956
972
|
* q = RangeQuery.new(:date, :lower => "200501", :include_lower => false)
|
957
973
|
* # is equivalent to
|
958
974
|
* q = RangeQuery.new(:date, :< => "200501")
|
975
|
+
* # is equivalent to
|
976
|
+
* q = RangeQuery.new(:date, :lower_exclusive => "200501")
|
959
977
|
*
|
960
978
|
* q = RangeQuery.new(:date, :lower => "200501", :upper => 200502)
|
961
979
|
* # is equivalent to
|
@@ -993,9 +1011,15 @@ frt_rq_init(VALUE self, VALUE rfield, VALUE roptions)
|
|
993
1011
|
* the query it will do anything of value. See PhraseQuery#add_term.
|
994
1012
|
*/
|
995
1013
|
static VALUE
|
996
|
-
frt_phq_init(VALUE
|
1014
|
+
frt_phq_init(int argc, VALUE *argv, VALUE self)
|
997
1015
|
{
|
998
|
-
|
1016
|
+
VALUE rfield, rslop;
|
1017
|
+
Query *q;
|
1018
|
+
rb_scan_args(argc, argv, "11", &rfield, &rslop);
|
1019
|
+
q = phq_new(frt_field(rfield));
|
1020
|
+
if (argc == 2) {
|
1021
|
+
((PhraseQuery *)q)->slop = FIX2INT(rslop);
|
1022
|
+
}
|
999
1023
|
Frt_Wrap_Struct(self, NULL, &frt_q_free, q);
|
1000
1024
|
object_add(q, self);
|
1001
1025
|
return self;
|
@@ -1715,6 +1739,8 @@ frt_f_to_s(VALUE self)
|
|
1715
1739
|
* f = RangeFilter.new(:date, :lower => "200501", :include_lower => false)
|
1716
1740
|
* # is equivalent to
|
1717
1741
|
* f = RangeFilter.new(:date, :< => "200501")
|
1742
|
+
* # is equivalent to
|
1743
|
+
* f = RangeFilter.new(:date, :lower_exclusive => "200501")
|
1718
1744
|
*
|
1719
1745
|
* f = RangeFilter.new(:date, :lower => "200501", :upper => 200502)
|
1720
1746
|
* # is equivalent to
|
@@ -2991,6 +3017,8 @@ Init_RangeQuery(void)
|
|
2991
3017
|
{
|
2992
3018
|
sym_upper = ID2SYM(rb_intern("upper"));
|
2993
3019
|
sym_lower = ID2SYM(rb_intern("lower"));
|
3020
|
+
sym_upper_exclusive = ID2SYM(rb_intern("upper_exclusive"));
|
3021
|
+
sym_lower_exclusive = ID2SYM(rb_intern("lower_exclusive"));
|
2994
3022
|
sym_include_upper = ID2SYM(rb_intern("include_upper"));
|
2995
3023
|
sym_include_lower = ID2SYM(rb_intern("include_lower"));
|
2996
3024
|
|
@@ -3074,7 +3102,7 @@ Init_PhraseQuery(void)
|
|
3074
3102
|
cPhraseQuery = rb_define_class_under(mSearch, "PhraseQuery", cQuery);
|
3075
3103
|
rb_define_alloc_func(cPhraseQuery, frt_data_alloc);
|
3076
3104
|
|
3077
|
-
rb_define_method(cPhraseQuery, "initialize", frt_phq_init, 1);
|
3105
|
+
rb_define_method(cPhraseQuery, "initialize", frt_phq_init, -1);
|
3078
3106
|
rb_define_method(cPhraseQuery, "add_term", frt_phq_add, -1);
|
3079
3107
|
rb_define_method(cPhraseQuery, "<<", frt_phq_add, -1);
|
3080
3108
|
rb_define_method(cPhraseQuery, "slop", frt_phq_get_slop, 0);
|
data/ext/search.h
CHANGED
data/lib/ferret.rb
CHANGED
@@ -0,0 +1,42 @@
|
|
1
|
+
require 'yaml'
|
2
|
+
|
3
|
+
class Ferret::Index::FieldInfos
|
4
|
+
# Load FieldInfos from a YAML file. The YAML file should look something like
|
5
|
+
# this:
|
6
|
+
# default:
|
7
|
+
# store: :yes
|
8
|
+
# index: :yes
|
9
|
+
# term_vector: :no
|
10
|
+
#
|
11
|
+
# fields:
|
12
|
+
# id:
|
13
|
+
# index: :untokenized
|
14
|
+
# term_vector: :no
|
15
|
+
#
|
16
|
+
# title:
|
17
|
+
# boost: 20.0
|
18
|
+
# term_vector: :no
|
19
|
+
#
|
20
|
+
# content:
|
21
|
+
# term_vector: :with_positions_offsets
|
22
|
+
#
|
23
|
+
def self.load(yaml_str)
|
24
|
+
info = YAML.load(yaml_str)
|
25
|
+
convert_strings_to_symbols(info)
|
26
|
+
fis = FieldInfos.new(info[:default])
|
27
|
+
fields = info[:fields]
|
28
|
+
fields.keys.each {|key| fis.add_field(key, fields[key])} if fields
|
29
|
+
fis
|
30
|
+
end
|
31
|
+
|
32
|
+
private
|
33
|
+
def self.convert_strings_to_symbols(hash)
|
34
|
+
hash.keys.each do |key|
|
35
|
+
convert_strings_to_symbols(hash[key]) if hash[key].is_a?(Hash)
|
36
|
+
if key.is_a?(String)
|
37
|
+
hash[key.intern] = hash[key]
|
38
|
+
hash.delete(key)
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
data/lib/ferret/index.rb
CHANGED
@@ -90,6 +90,10 @@ module Ferret::Index
|
|
90
90
|
end
|
91
91
|
end
|
92
92
|
|
93
|
+
if (fi = options[:field_infos]).is_a?(String)
|
94
|
+
options[:field_infos] = FieldInfos.load(options[:field_infos])
|
95
|
+
end
|
96
|
+
|
93
97
|
@close_dir = options[:close_dir]
|
94
98
|
if options[:dir].is_a?(String)
|
95
99
|
options[:path] = options[:dir]
|
data/lib/ferret_version.rb
CHANGED
@@ -59,14 +59,14 @@ module SearcherTests
|
|
59
59
|
def test_multi_term_query
|
60
60
|
mtq = MultiTermQuery.new(:field, :max_terms => 4, :min_score => 0.5)
|
61
61
|
check_hits(mtq, [])
|
62
|
-
assert_equal("
|
63
|
-
assert_equal(
|
62
|
+
assert_equal('""', mtq.to_s(:field))
|
63
|
+
assert_equal('field:""', mtq.to_s)
|
64
64
|
|
65
65
|
[
|
66
|
-
["brown", 1.0, "
|
67
|
-
["fox", 0.1, "
|
68
|
-
["fox", 0.6, "
|
69
|
-
["fast", 50.0, "
|
66
|
+
["brown", 1.0, '"brown"'],
|
67
|
+
["fox", 0.1, '"brown"'],
|
68
|
+
["fox", 0.6, '"fox^0.6|brown"'],
|
69
|
+
["fast", 50.0, '"fox^0.6|brown|fast^50.0"']
|
70
70
|
].each do |term, boost, str|
|
71
71
|
mtq.add_term(term, boost)
|
72
72
|
assert_equal(str, mtq.to_s(:field))
|
@@ -74,13 +74,13 @@ module SearcherTests
|
|
74
74
|
end
|
75
75
|
|
76
76
|
mtq.boost = 80.1
|
77
|
-
assert_equal("
|
77
|
+
assert_equal('field:"fox^0.6|brown|fast^50.0"^80.1', mtq.to_s())
|
78
78
|
mtq << "word1"
|
79
|
-
assert_equal("
|
79
|
+
assert_equal('field:"fox^0.6|brown|word1|fast^50.0"^80.1', mtq.to_s())
|
80
80
|
mtq << "word2"
|
81
|
-
assert_equal("
|
81
|
+
assert_equal('field:"brown|word1|word2|fast^50.0"^80.1', mtq.to_s())
|
82
82
|
mtq << "word3"
|
83
|
-
assert_equal("
|
83
|
+
assert_equal('field:"brown|word1|word2|fast^50.0"^80.1', mtq.to_s())
|
84
84
|
|
85
85
|
terms = mtq.terms(@searcher)
|
86
86
|
assert(terms.index(Ferret::Term.new(:field, "brown")))
|
metadata
CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.9.0
|
|
3
3
|
specification_version: 1
|
4
4
|
name: ferret
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.10.
|
7
|
-
date: 2006-09-
|
6
|
+
version: 0.10.8
|
7
|
+
date: 2006-09-25 00:00:00 +09:00
|
8
8
|
summary: Ruby indexing library.
|
9
9
|
require_paths:
|
10
10
|
- lib
|
@@ -165,6 +165,7 @@ files:
|
|
165
165
|
- lib/ferret/number_tools.rb
|
166
166
|
- lib/ferret/index.rb
|
167
167
|
- lib/ferret/document.rb
|
168
|
+
- lib/ferret/field_infos.rb
|
168
169
|
- test/test_all.rb
|
169
170
|
- test/test_helper.rb
|
170
171
|
- test/unit/ts_analysis.rb
|