isomorfeus-ferret 0.14.3 → 0.15.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/LICENSE +56 -6
- data/ext/isomorfeus_ferret_ext/extconf.rb +19 -1
- data/ext/isomorfeus_ferret_ext/frb_analysis.c +0 -4
- data/ext/isomorfeus_ferret_ext/frb_field_info.c +65 -55
- data/ext/isomorfeus_ferret_ext/frb_index.c +21 -23
- data/ext/isomorfeus_ferret_ext/frb_lazy_doc.c +4 -0
- data/ext/isomorfeus_ferret_ext/frb_qparser.c +1 -6
- data/ext/isomorfeus_ferret_ext/frb_search.c +0 -5
- data/ext/isomorfeus_ferret_ext/frb_store.c +66 -4
- data/ext/isomorfeus_ferret_ext/frb_utils.c +0 -4
- data/ext/isomorfeus_ferret_ext/frt_compound_io.c +4 -4
- data/ext/isomorfeus_ferret_ext/frt_except.c +11 -11
- data/ext/isomorfeus_ferret_ext/frt_fs_store.c +62 -88
- data/ext/isomorfeus_ferret_ext/frt_hash.c +32 -70
- data/ext/isomorfeus_ferret_ext/frt_ind.c +21 -21
- data/ext/isomorfeus_ferret_ext/frt_ind.h +1 -1
- data/ext/isomorfeus_ferret_ext/frt_index.c +144 -223
- data/ext/isomorfeus_ferret_ext/frt_index.h +35 -58
- data/ext/isomorfeus_ferret_ext/frt_mdbx_store.c +691 -0
- data/ext/isomorfeus_ferret_ext/frt_q_parser.c +4 -4
- data/ext/isomorfeus_ferret_ext/frt_q_span.c +2 -2
- data/ext/isomorfeus_ferret_ext/frt_ram_store.c +9 -9
- data/ext/isomorfeus_ferret_ext/frt_search.c +0 -3
- data/ext/isomorfeus_ferret_ext/frt_search.h +1 -1
- data/ext/isomorfeus_ferret_ext/frt_sort.c +2 -2
- data/ext/isomorfeus_ferret_ext/frt_store.c +6 -8
- data/ext/isomorfeus_ferret_ext/frt_store.h +43 -21
- data/ext/isomorfeus_ferret_ext/frt_threading.h +0 -16
- data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.c +0 -9
- data/ext/isomorfeus_ferret_ext/mdbx.c +33632 -0
- data/ext/isomorfeus_ferret_ext/mdbx.h +5495 -0
- data/ext/isomorfeus_ferret_ext/test.c +20 -18
- data/ext/isomorfeus_ferret_ext/test_1710.c +1 -1
- data/ext/isomorfeus_ferret_ext/test_fields.c +39 -39
- data/ext/isomorfeus_ferret_ext/test_file_deleter.c +1 -1
- data/ext/isomorfeus_ferret_ext/test_filter.c +1 -1
- data/ext/isomorfeus_ferret_ext/test_highlighter.c +1 -2
- data/ext/isomorfeus_ferret_ext/test_index.c +36 -36
- data/ext/isomorfeus_ferret_ext/test_mdbx_store.c +19 -0
- data/ext/isomorfeus_ferret_ext/test_q_fuzzy.c +2 -2
- data/ext/isomorfeus_ferret_ext/test_q_span.c +1 -1
- data/ext/isomorfeus_ferret_ext/test_search.c +4 -4
- data/ext/isomorfeus_ferret_ext/test_segments.c +1 -4
- data/ext/isomorfeus_ferret_ext/test_sort.c +2 -2
- data/ext/isomorfeus_ferret_ext/test_store.c +4 -8
- data/ext/isomorfeus_ferret_ext/test_term_vectors.c +6 -6
- data/ext/isomorfeus_ferret_ext/test_threading.c +4 -4
- data/ext/isomorfeus_ferret_ext/tests_all.h +2 -0
- data/lib/isomorfeus/ferret/version.rb +1 -1
- metadata +9 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: dc82aa85a233a1eb67ae971fbd6e5704b1f6b14660ed5673f9292a3021f34837
|
4
|
+
data.tar.gz: dea91e29a2a3c14381067d05177c455d8953b74f0187e9b7610997c4a1bc68ae
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: '064940571edfbb380795a71031274f6489010cdc28da4c26a64c769eee924593df069d72939a4f6e5e56b8b94398108e6e039c00526a250dbb5f51068a10880e'
|
7
|
+
data.tar.gz: 76edc3b5f07bd779498fe92d8630ec5f651e05eae82982fcedc853b9086fd7af84e827407ee0b2316456dc07bb7f508b63ded855278ff4ca38af18398ee7cc1e
|
data/LICENSE
CHANGED
@@ -28,14 +28,14 @@ Copyright (c) 2005-2006 David Balmain
|
|
28
28
|
MIT License as above
|
29
29
|
|
30
30
|
|
31
|
-
brotli_* files in ext/
|
31
|
+
brotli_* files in ext/isomorfeus_ferret_ext originally taken from https://github.com/google/brotli:
|
32
32
|
|
33
33
|
Copyright (c) 2009, 2010, 2013-2016 by the Brotli Authors.
|
34
34
|
|
35
35
|
MIT License as above
|
36
36
|
|
37
37
|
|
38
|
-
bzlib* files in ext/
|
38
|
+
bzlib* files in ext/isomorfeus_ferret_ext originally taken from git://sourceware.org/git/bzip2.git:
|
39
39
|
|
40
40
|
This program, "bzip2", the associated library "libbzip2", and all
|
41
41
|
documentation, are copyright (C) 1996-2019 Julian R Seward. All
|
@@ -76,7 +76,7 @@ Julian Seward, jseward@acm.org
|
|
76
76
|
bzip2/libbzip2 version 1.0.8 of 13 July 2019
|
77
77
|
|
78
78
|
|
79
|
-
lz4* files in ext/
|
79
|
+
lz4* files in ext/isomorfeus_ferret_ext originally taken from https://github.com/lz4/lz4/tree/dev/lib:
|
80
80
|
|
81
81
|
Copyright (C) 2011-2020, Yann Collet.
|
82
82
|
|
@@ -107,7 +107,7 @@ You can contact the author at:
|
|
107
107
|
- LZ4 source repository : https://github.com/lz4/lz4
|
108
108
|
|
109
109
|
|
110
|
-
stem* and libstemmer* files in ext/
|
110
|
+
stem* and libstemmer* files in ext/isomorfeus_ferret_ext originally taken from https://snowballstem.org/:
|
111
111
|
|
112
112
|
Copyright (c) 2001, Dr Martin Porter
|
113
113
|
Copyright (c) 2004,2005, Richard Boulton
|
@@ -139,11 +139,62 @@ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
139
139
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
140
140
|
|
141
141
|
|
142
|
+
mdbx* files in ext/isomorfeus_ferret_ext originally taken from https://gitflic.ru/project/erthink/libmdbx:
|
143
|
+
|
144
|
+
The OpenLDAP Public License
|
145
|
+
Version 2.8, 17 August 2003
|
146
|
+
|
147
|
+
Redistribution and use of this software and associated documentation
|
148
|
+
("Software"), with or without modification, are permitted provided
|
149
|
+
that the following conditions are met:
|
150
|
+
|
151
|
+
1. Redistributions in source form must retain copyright statements
|
152
|
+
and notices,
|
153
|
+
|
154
|
+
2. Redistributions in binary form must reproduce applicable copyright
|
155
|
+
statements and notices, this list of conditions, and the following
|
156
|
+
disclaimer in the documentation and/or other materials provided
|
157
|
+
with the distribution, and
|
158
|
+
|
159
|
+
3. Redistributions must contain a verbatim copy of this document.
|
160
|
+
|
161
|
+
The OpenLDAP Foundation may revise this license from time to time.
|
162
|
+
Each revision is distinguished by a version number. You may use
|
163
|
+
this Software under terms of this license revision or under the
|
164
|
+
terms of any subsequent revision of the license.
|
165
|
+
|
166
|
+
THIS SOFTWARE IS PROVIDED BY THE OPENLDAP FOUNDATION AND ITS
|
167
|
+
CONTRIBUTORS ``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES,
|
168
|
+
INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
|
169
|
+
AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
|
170
|
+
SHALL THE OPENLDAP FOUNDATION, ITS CONTRIBUTORS, OR THE AUTHOR(S)
|
171
|
+
OR OWNER(S) OF THE SOFTWARE BE LIABLE FOR ANY DIRECT, INDIRECT,
|
172
|
+
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
173
|
+
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
174
|
+
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
175
|
+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
176
|
+
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
177
|
+
ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
178
|
+
POSSIBILITY OF SUCH DAMAGE.
|
179
|
+
|
180
|
+
The names of the authors and copyright holders must not be used in
|
181
|
+
advertising or otherwise to promote the sale, use or other dealing
|
182
|
+
in this Software without specific, written prior permission. Title
|
183
|
+
to copyright in this Software shall at all times remain with copyright
|
184
|
+
holders.
|
185
|
+
|
186
|
+
OpenLDAP is a registered trademark of the OpenLDAP Foundation.
|
187
|
+
|
188
|
+
Copyright 1999-2003 The OpenLDAP Foundation, Redwood City,
|
189
|
+
California, USA. All Rights Reserved. Permission to copy and
|
190
|
+
distribute verbatim copies of this document is granted.
|
191
|
+
|
192
|
+
|
142
193
|
The following licenses apply to files, which are distributed within the repo
|
143
194
|
but not distributed with the gem and not used at runtime:
|
144
195
|
|
145
196
|
|
146
|
-
For the
|
197
|
+
For the Reuters-21578 files in the misc/ferret_vs_others directory (reuters-corpus, etc.),
|
147
198
|
used for research for developing search engine technology:
|
148
199
|
|
149
200
|
The copyright for the text of newswire articles and Reuters
|
@@ -158,7 +209,6 @@ the data set (see "Availability & Questions").
|
|
158
209
|
|
159
210
|
Apache Lucene jars in the misc/ferret_vs_others directory:
|
160
211
|
|
161
|
-
|
162
212
|
Apache License
|
163
213
|
Version 2.0, January 2004
|
164
214
|
http://www.apache.org/licenses/
|
@@ -1,5 +1,23 @@
|
|
1
1
|
require 'mkmf'
|
2
2
|
|
3
|
-
|
3
|
+
WIN_PATTERNS = [
|
4
|
+
/bccwin/i,
|
5
|
+
/cygwin/i,
|
6
|
+
/djgpp/i,
|
7
|
+
/mingw/i,
|
8
|
+
/mswin/i,
|
9
|
+
/wince/i,
|
10
|
+
].freeze
|
11
|
+
|
12
|
+
def win_platform?
|
13
|
+
ruby_platform = RbConfig::CONFIG['host_os']
|
14
|
+
!!WIN_PATTERNS.find {|r| ruby_platform =~ r }
|
15
|
+
end
|
16
|
+
|
17
|
+
$CFLAGS << ' -O2 -DMDBX_BUILD_FLAGS=\"-O2\" -Wall -Wno-sizeof-pointer-div'
|
18
|
+
|
19
|
+
if win_platform?
|
20
|
+
$LDFLAGS << ' -lntdll '
|
21
|
+
end
|
4
22
|
|
5
23
|
create_makefile('isomorfeus_ferret_ext')
|
@@ -2075,10 +2075,6 @@ static void Init_RegExpAnalyzer(void) {
|
|
2075
2075
|
rb_define_method(cRegExpAnalyzer, "token_stream", frb_re_analyzer_token_stream, 2);
|
2076
2076
|
}
|
2077
2077
|
|
2078
|
-
/* rdoc hack
|
2079
|
-
extern VALUE mFerret = rb_define_module("Ferret");
|
2080
|
-
*/
|
2081
|
-
|
2082
2078
|
/*
|
2083
2079
|
* Document-module: Ferret::Analysis
|
2084
2080
|
*
|
@@ -19,57 +19,62 @@ static VALUE sym_with_positions_offsets;
|
|
19
19
|
|
20
20
|
extern VALUE sym_boost;
|
21
21
|
|
22
|
-
void frb_fi_get_params(VALUE roptions,
|
22
|
+
void frb_fi_get_params(VALUE roptions, unsigned int *bits, float *boost) {
|
23
23
|
VALUE v;
|
24
24
|
Check_Type(roptions, T_HASH);
|
25
25
|
v = rb_hash_aref(roptions, sym_boost);
|
26
|
-
if (Qnil != v)
|
27
|
-
|
28
|
-
|
29
|
-
*boost = 1.0f;
|
30
|
-
}
|
26
|
+
if (Qnil != v) *boost = (float)NUM2DBL(v);
|
27
|
+
else *boost = 1.0f;
|
28
|
+
|
31
29
|
v = rb_hash_aref(roptions, sym_store);
|
32
30
|
if (Qnil != v) Check_Type(v, T_SYMBOL);
|
33
31
|
if (v == sym_no || v == sym_false || v == Qfalse) {
|
34
|
-
*
|
32
|
+
*bits &= ~FRT_FI_IS_STORED_BM;
|
35
33
|
} else if (v == sym_yes || v == sym_true || v == Qtrue) {
|
36
|
-
*
|
34
|
+
*bits |= FRT_FI_IS_STORED_BM;
|
37
35
|
} else if (v == Qnil) {
|
38
36
|
/* leave as default */
|
39
37
|
} else {
|
40
|
-
rb_raise(rb_eArgError, ":%s isn't a valid argument for :store. Please choose from [:yes, :no]",
|
41
|
-
rb_id2name(SYM2ID(v)));
|
38
|
+
rb_raise(rb_eArgError, ":%s isn't a valid argument for :store. Please choose from [:yes, :no]", rb_id2name(SYM2ID(v)));
|
42
39
|
}
|
43
40
|
|
44
41
|
v = rb_hash_aref(roptions, sym_compression);
|
45
42
|
if (Qnil != v) Check_Type(v, T_SYMBOL);
|
46
43
|
if (v == sym_no || v == sym_false || v == Qfalse) {
|
47
|
-
*
|
44
|
+
*bits &= ~FRT_FI_IS_COMPRESSED_BM;
|
45
|
+
*bits &= ~FRT_FI_COMPRESSION_BROTLI_BM;
|
46
|
+
*bits &= ~FRT_FI_COMPRESSION_BZ2_BM;
|
47
|
+
*bits &= ~FRT_FI_COMPRESSION_LZ4_BM;
|
48
48
|
} else if (v == sym_yes || v == sym_true || v == Qtrue || v == sym_brotli) {
|
49
|
-
*
|
49
|
+
*bits |= FRT_FI_IS_COMPRESSED_BM | FRT_FI_COMPRESSION_BROTLI_BM;
|
50
50
|
} else if (v == sym_bz2) {
|
51
|
-
*
|
51
|
+
*bits |= FRT_FI_IS_COMPRESSED_BM | FRT_FI_COMPRESSION_BZ2_BM;
|
52
52
|
} else if (v == sym_lz4) {
|
53
|
-
*
|
53
|
+
*bits |= FRT_FI_IS_COMPRESSED_BM | FRT_FI_COMPRESSION_LZ4_BM;
|
54
54
|
} else if (v == Qnil) {
|
55
55
|
/* leave as default */
|
56
56
|
} else {
|
57
|
-
rb_raise(rb_eArgError, ":%s isn't a valid argument for :compression. Please choose from [:yes, :no, :brotli, :bz2, :lz4]",
|
58
|
-
rb_id2name(SYM2ID(v)));
|
57
|
+
rb_raise(rb_eArgError, ":%s isn't a valid argument for :compression. Please choose from [:yes, :no, :brotli, :bz2, :lz4]", rb_id2name(SYM2ID(v)));
|
59
58
|
}
|
60
59
|
|
61
60
|
v = rb_hash_aref(roptions, sym_index);
|
62
61
|
if (Qnil != v) Check_Type(v, T_SYMBOL);
|
63
62
|
if (v == sym_no || v == sym_false || v == Qfalse) {
|
64
|
-
*
|
63
|
+
*bits &= ~FRT_FI_IS_INDEXED_BM;
|
64
|
+
*bits &= ~FRT_FI_IS_TOKENIZED_BM;
|
65
|
+
*bits &= ~FRT_FI_OMIT_NORMS_BM;
|
65
66
|
} else if (v == sym_yes || v == sym_true || v == Qtrue) {
|
66
|
-
*
|
67
|
+
*bits |= FRT_FI_IS_INDEXED_BM | FRT_FI_IS_TOKENIZED_BM;
|
68
|
+
*bits &= ~FRT_FI_OMIT_NORMS_BM;
|
67
69
|
} else if (v == sym_untokenized) {
|
68
|
-
*
|
70
|
+
*bits |= FRT_FI_IS_INDEXED_BM;
|
71
|
+
*bits &= ~FRT_FI_IS_TOKENIZED_BM;
|
72
|
+
*bits &= ~FRT_FI_OMIT_NORMS_BM;
|
69
73
|
} else if (v == sym_omit_norms) {
|
70
|
-
*
|
74
|
+
*bits |= FRT_FI_IS_INDEXED_BM | FRT_FI_IS_TOKENIZED_BM | FRT_FI_OMIT_NORMS_BM;
|
71
75
|
} else if (v == sym_untokenized_omit_norms) {
|
72
|
-
*
|
76
|
+
*bits |= FRT_FI_IS_INDEXED_BM | FRT_FI_OMIT_NORMS_BM;
|
77
|
+
*bits &= ~FRT_FI_IS_TOKENIZED_BM;
|
73
78
|
} else if (v == Qnil) {
|
74
79
|
/* leave as default */
|
75
80
|
} else {
|
@@ -80,18 +85,28 @@ void frb_fi_get_params(VALUE roptions, FrtStoreValue *store, FrtCompressionType
|
|
80
85
|
v = rb_hash_aref(roptions, sym_term_vector);
|
81
86
|
if (Qnil != v) Check_Type(v, T_SYMBOL);
|
82
87
|
if (v == sym_no || v == sym_false || v == Qfalse) {
|
83
|
-
*
|
88
|
+
*bits &= ~FRT_FI_STORE_TERM_VECTOR_BM;
|
89
|
+
*bits &= ~FRT_FI_STORE_POSITIONS_BM;
|
90
|
+
*bits &= ~FRT_FI_STORE_OFFSETS_BM;
|
84
91
|
} else if (v == sym_yes || v == sym_true || v == Qtrue) {
|
85
|
-
*
|
92
|
+
*bits |= FRT_FI_STORE_TERM_VECTOR_BM;
|
93
|
+
*bits &= ~FRT_FI_STORE_POSITIONS_BM;
|
94
|
+
*bits &= ~FRT_FI_STORE_OFFSETS_BM;
|
86
95
|
} else if (v == sym_with_positions) {
|
87
|
-
*
|
96
|
+
*bits |= FRT_FI_STORE_TERM_VECTOR_BM | FRT_FI_STORE_POSITIONS_BM;
|
97
|
+
*bits &= ~FRT_FI_STORE_OFFSETS_BM;
|
88
98
|
} else if (v == sym_with_offsets) {
|
89
|
-
*
|
99
|
+
*bits |= FRT_FI_STORE_TERM_VECTOR_BM | FRT_FI_STORE_OFFSETS_BM;
|
100
|
+
*bits &= ~FRT_FI_STORE_POSITIONS_BM;
|
90
101
|
} else if (v == sym_with_positions_offsets) {
|
91
|
-
*
|
102
|
+
*bits |= FRT_FI_STORE_TERM_VECTOR_BM | FRT_FI_STORE_POSITIONS_BM | FRT_FI_STORE_OFFSETS_BM;
|
92
103
|
} else if (v == Qnil) {
|
93
104
|
/* leave as default */
|
94
|
-
if (*
|
105
|
+
if ((*bits & FRT_FI_IS_INDEXED_BM) == 0) {
|
106
|
+
*bits &= ~FRT_FI_STORE_TERM_VECTOR_BM;
|
107
|
+
*bits &= ~FRT_FI_STORE_POSITIONS_BM;
|
108
|
+
*bits &= ~FRT_FI_STORE_OFFSETS_BM;
|
109
|
+
}
|
95
110
|
} else {
|
96
111
|
rb_raise(rb_eArgError, ":%s isn't a valid argument for :term_vector. Please choose from [:no, :yes, "
|
97
112
|
":with_positions, :with_offsets, :with_positions_offsets]", rb_id2name(SYM2ID(v)));
|
@@ -150,17 +165,12 @@ static VALUE frb_fi_init(int argc, VALUE *argv, VALUE self) {
|
|
150
165
|
VALUE roptions, rname;
|
151
166
|
FrtFieldInfo *fi;
|
152
167
|
TypedData_Get_Struct(self, FrtFieldInfo, &frb_field_info_t, fi);
|
153
|
-
|
154
|
-
FrtCompressionType compression = FRT_COMPRESSION_NONE;
|
155
|
-
FrtIndexValue index = FRT_INDEX_YES;
|
156
|
-
FrtTermVectorValue term_vector = FRT_TERM_VECTOR_WITH_POSITIONS_OFFSETS;
|
168
|
+
unsigned int bits = FRT_FI_DEFAULTS_BM;
|
157
169
|
float boost = 1.0f;
|
158
170
|
|
159
171
|
rb_scan_args(argc, argv, "11", &rname, &roptions);
|
160
|
-
if (argc > 1)
|
161
|
-
|
162
|
-
}
|
163
|
-
fi = frt_fi_init(fi, frb_field(rname), store, compression, index, term_vector);
|
172
|
+
if (argc > 1) frb_fi_get_params(roptions, &bits, &boost);
|
173
|
+
fi = frt_fi_init(fi, frb_field(rname), bits);
|
164
174
|
fi->boost = boost;
|
165
175
|
fi->rfi = self;
|
166
176
|
return self;
|
@@ -185,7 +195,7 @@ static VALUE frb_fi_name(VALUE self) {
|
|
185
195
|
*/
|
186
196
|
static VALUE frb_fi_is_stored(VALUE self) {
|
187
197
|
FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
|
188
|
-
return
|
198
|
+
return bits_is_stored(fi->bits) ? Qtrue : Qfalse;
|
189
199
|
}
|
190
200
|
|
191
201
|
/*
|
@@ -196,7 +206,7 @@ static VALUE frb_fi_is_stored(VALUE self) {
|
|
196
206
|
*/
|
197
207
|
static VALUE frb_fi_is_compressed(VALUE self) {
|
198
208
|
FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
|
199
|
-
return
|
209
|
+
return bits_is_compressed(fi->bits) ? Qtrue : Qfalse;
|
200
210
|
}
|
201
211
|
|
202
212
|
/*
|
@@ -207,7 +217,7 @@ static VALUE frb_fi_is_compressed(VALUE self) {
|
|
207
217
|
*/
|
208
218
|
static VALUE frb_fi_is_indexed(VALUE self) {
|
209
219
|
FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
|
210
|
-
return
|
220
|
+
return bits_is_indexed(fi->bits) ? Qtrue : Qfalse;
|
211
221
|
}
|
212
222
|
|
213
223
|
/*
|
@@ -223,7 +233,7 @@ static VALUE frb_fi_is_indexed(VALUE self) {
|
|
223
233
|
*/
|
224
234
|
static VALUE frb_fi_is_tokenized(VALUE self) {
|
225
235
|
FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
|
226
|
-
return
|
236
|
+
return bits_is_tokenized(fi->bits) ? Qtrue : Qfalse;
|
227
237
|
}
|
228
238
|
|
229
239
|
/*
|
@@ -239,7 +249,7 @@ static VALUE frb_fi_is_tokenized(VALUE self) {
|
|
239
249
|
*/
|
240
250
|
static VALUE frb_fi_omit_norms(VALUE self) {
|
241
251
|
FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
|
242
|
-
return
|
252
|
+
return bits_omit_norms(fi->bits) ? Qtrue : Qfalse;
|
243
253
|
}
|
244
254
|
|
245
255
|
/*
|
@@ -250,7 +260,7 @@ static VALUE frb_fi_omit_norms(VALUE self) {
|
|
250
260
|
*/
|
251
261
|
static VALUE frb_fi_store_term_vector(VALUE self) {
|
252
262
|
FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
|
253
|
-
return
|
263
|
+
return bits_store_term_vector(fi->bits) ? Qtrue : Qfalse;
|
254
264
|
}
|
255
265
|
|
256
266
|
/*
|
@@ -261,7 +271,7 @@ static VALUE frb_fi_store_term_vector(VALUE self) {
|
|
261
271
|
*/
|
262
272
|
static VALUE frb_fi_store_positions(VALUE self) {
|
263
273
|
FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
|
264
|
-
return
|
274
|
+
return bits_store_positions(fi->bits) ? Qtrue : Qfalse;
|
265
275
|
}
|
266
276
|
|
267
277
|
/*
|
@@ -272,7 +282,7 @@ static VALUE frb_fi_store_positions(VALUE self) {
|
|
272
282
|
*/
|
273
283
|
static VALUE frb_fi_store_offsets(VALUE self) {
|
274
284
|
FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
|
275
|
-
return
|
285
|
+
return bits_store_offsets(fi->bits) ? Qtrue : Qfalse;
|
276
286
|
}
|
277
287
|
|
278
288
|
/*
|
@@ -285,7 +295,7 @@ static VALUE frb_fi_store_offsets(VALUE self) {
|
|
285
295
|
*/
|
286
296
|
static VALUE frb_fi_has_norms(VALUE self) {
|
287
297
|
FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
|
288
|
-
return
|
298
|
+
return bits_has_norms(fi->bits) ? Qtrue : Qfalse;
|
289
299
|
}
|
290
300
|
|
291
301
|
/*
|
@@ -326,10 +336,10 @@ static VALUE frb_fi_to_h(VALUE self) {
|
|
326
336
|
bool o;
|
327
337
|
|
328
338
|
// :index
|
329
|
-
if (!
|
339
|
+
if (!bits_is_indexed(fi->bits)) val = sym_no;
|
330
340
|
else {
|
331
|
-
bool t =
|
332
|
-
o =
|
341
|
+
bool t = bits_is_tokenized(fi->bits);
|
342
|
+
o = bits_omit_norms(fi->bits);
|
333
343
|
if (!t && o) val = sym_untokenized_omit_norms;
|
334
344
|
else if (t && o) val = sym_omit_norms;
|
335
345
|
else if (!t && !o) val = sym_untokenized;
|
@@ -338,23 +348,23 @@ static VALUE frb_fi_to_h(VALUE self) {
|
|
338
348
|
rb_hash_aset(hash, sym_index, val);
|
339
349
|
|
340
350
|
// :store
|
341
|
-
rb_hash_aset(hash, sym_store,
|
351
|
+
rb_hash_aset(hash, sym_store, bits_is_stored(fi->bits) ? sym_yes : sym_no);
|
342
352
|
|
343
353
|
// :compress
|
344
|
-
if (!
|
354
|
+
if (!bits_is_compressed(fi->bits)) val = sym_no;
|
345
355
|
else {
|
346
|
-
if (
|
347
|
-
else if (
|
348
|
-
else if (
|
356
|
+
if (bits_is_compressed_brotli(fi->bits)) val = sym_brotli;
|
357
|
+
else if (bits_is_compressed_bz2(fi->bits)) val = sym_bz2;
|
358
|
+
else if (bits_is_compressed_lz4(fi->bits)) val = sym_lz4;
|
349
359
|
else val = sym_yes;
|
350
360
|
}
|
351
361
|
rb_hash_aset(hash, sym_compression, val);
|
352
362
|
|
353
363
|
// :term_vector
|
354
|
-
if (!
|
364
|
+
if (!bits_store_term_vector(fi->bits)) val = sym_no;
|
355
365
|
else {
|
356
|
-
bool p =
|
357
|
-
o =
|
366
|
+
bool p = bits_store_positions(fi->bits);
|
367
|
+
o = bits_store_offsets(fi->bits);
|
358
368
|
if (p && o) val = sym_with_positions_offsets;
|
359
369
|
else if (o) val = sym_with_offsets;
|
360
370
|
else if (p) val = sym_with_positions;
|
@@ -40,7 +40,7 @@ static ID id_boost;
|
|
40
40
|
|
41
41
|
extern VALUE sym_each;
|
42
42
|
extern rb_encoding *utf8_encoding;
|
43
|
-
extern void frb_fi_get_params(VALUE roptions,
|
43
|
+
extern void frb_fi_get_params(VALUE roptions, unsigned int *bits, float *boost);
|
44
44
|
extern FrtAnalyzer *frb_get_cwrapped_analyzer(VALUE ranalyzer);
|
45
45
|
extern VALUE frb_get_analyzer(FrtAnalyzer *a);
|
46
46
|
extern VALUE frb_get_field_info(FrtFieldInfo *fi);
|
@@ -119,17 +119,12 @@ static VALUE frb_fis_init(int argc, VALUE *argv, VALUE self) {
|
|
119
119
|
VALUE roptions;
|
120
120
|
FrtFieldInfos *fis;
|
121
121
|
TypedData_Get_Struct(self, FrtFieldInfos, &frb_field_infos_t, fis);
|
122
|
-
|
123
|
-
FrtCompressionType compression = FRT_COMPRESSION_NONE;
|
124
|
-
FrtIndexValue index = FRT_INDEX_YES;
|
125
|
-
FrtTermVectorValue term_vector = FRT_TERM_VECTOR_WITH_POSITIONS_OFFSETS;
|
122
|
+
unsigned int bits = FRT_FI_DEFAULTS_BM;
|
126
123
|
float boost;
|
127
124
|
|
128
125
|
rb_scan_args(argc, argv, "01", &roptions);
|
129
|
-
if (argc > 0)
|
130
|
-
|
131
|
-
}
|
132
|
-
fis = frt_fis_init(fis, store, compression, index, term_vector);
|
126
|
+
if (argc > 0) frb_fi_get_params(roptions, &bits, &boost);
|
127
|
+
fis = frt_fis_init(fis, bits);
|
133
128
|
fis->rfis = self;
|
134
129
|
return self;
|
135
130
|
}
|
@@ -218,18 +213,15 @@ frb_fis_add_field(int argc, VALUE *argv, VALUE self)
|
|
218
213
|
{
|
219
214
|
FrtFieldInfos *fis = (FrtFieldInfos *)DATA_PTR(self);
|
220
215
|
FrtFieldInfo *fi;
|
221
|
-
|
222
|
-
FrtCompressionType compression = fis->compression;
|
223
|
-
FrtIndexValue index = fis->index;
|
224
|
-
FrtTermVectorValue term_vector = fis->term_vector;
|
216
|
+
unsigned int bits = fis->bits;
|
225
217
|
float boost = 1.0f;
|
226
218
|
VALUE rname, roptions;
|
227
219
|
|
228
220
|
rb_scan_args(argc, argv, "11", &rname, &roptions);
|
229
221
|
if (argc > 1) {
|
230
|
-
frb_fi_get_params(roptions, &
|
222
|
+
frb_fi_get_params(roptions, &bits, &boost);
|
231
223
|
}
|
232
|
-
fi = frt_fi_new(frb_field(rname),
|
224
|
+
fi = frt_fi_new(frb_field(rname), bits);
|
233
225
|
fi->boost = boost;
|
234
226
|
frt_fis_add_field(fis, fi);
|
235
227
|
return self;
|
@@ -340,7 +332,7 @@ frb_fis_get_tk_fields(VALUE self)
|
|
340
332
|
VALUE rfield_names = rb_ary_new();
|
341
333
|
int i;
|
342
334
|
for (i = 0; i < fis->size; i++) {
|
343
|
-
if (!
|
335
|
+
if (!bits_is_tokenized(fis->fields[i]->bits)) continue;
|
344
336
|
rb_ary_push(rfield_names, ID2SYM(fis->fields[i]->name));
|
345
337
|
}
|
346
338
|
return rfield_names;
|
@@ -1082,7 +1074,7 @@ static VALUE frb_iw_init(int argc, VALUE *argv, VALUE self) {
|
|
1082
1074
|
TypedData_Get_Struct(rval, FrtFieldInfos, &frb_field_infos_t, fis);
|
1083
1075
|
frt_index_create(store, fis);
|
1084
1076
|
} else {
|
1085
|
-
fis = frt_fis_new(
|
1077
|
+
fis = frt_fis_new(FRT_FI_DEFAULTS_BM);
|
1086
1078
|
frt_index_create(store, fis);
|
1087
1079
|
frt_fis_deref(fis);
|
1088
1080
|
}
|
@@ -1132,6 +1124,8 @@ frb_hash_to_doc_i(VALUE key, VALUE value, VALUE arg)
|
|
1132
1124
|
if (key == Qundef) {
|
1133
1125
|
return ST_CONTINUE;
|
1134
1126
|
} else {
|
1127
|
+
int ex_code = 0;
|
1128
|
+
const char *msg = NULL;
|
1135
1129
|
FrtDocument *doc = (FrtDocument *)arg;
|
1136
1130
|
ID field = frb_field(key);
|
1137
1131
|
VALUE val;
|
@@ -1162,7 +1156,15 @@ frb_hash_to_doc_i(VALUE key, VALUE value, VALUE arg)
|
|
1162
1156
|
frt_df_add_data_len(df, rstrdup(val), RSTRING_LEN(val), rb_enc_get(val));
|
1163
1157
|
break;
|
1164
1158
|
}
|
1165
|
-
|
1159
|
+
FRT_TRY
|
1160
|
+
frt_doc_add_field(doc, df);
|
1161
|
+
FRT_XCATCHALL
|
1162
|
+
ex_code = xcontext.excode;
|
1163
|
+
msg = xcontext.msg;
|
1164
|
+
FRT_HANDLED();
|
1165
|
+
FRT_XENDTRY
|
1166
|
+
|
1167
|
+
if (ex_code && msg) { frb_raise(ex_code, msg); }
|
1166
1168
|
}
|
1167
1169
|
return ST_CONTINUE;
|
1168
1170
|
}
|
@@ -2293,7 +2295,7 @@ frb_ir_tk_fields(VALUE self)
|
|
2293
2295
|
VALUE rfield_names = rb_ary_new();
|
2294
2296
|
int i;
|
2295
2297
|
for (i = 0; i < fis->size; i++) {
|
2296
|
-
if (!
|
2298
|
+
if (!bits_is_tokenized(fis->fields[i]->bits)) continue;
|
2297
2299
|
rb_ary_push(rfield_names, rb_str_new_cstr(rb_id2name(fis->fields[i]->name)));
|
2298
2300
|
}
|
2299
2301
|
return rfield_names;
|
@@ -2809,10 +2811,6 @@ void Init_IndexReader(void) {
|
|
2809
2811
|
rb_define_method(cIndexReader, "to_enum", frb_ir_to_enum, 0);
|
2810
2812
|
}
|
2811
2813
|
|
2812
|
-
/* rdoc hack
|
2813
|
-
extern VALUE mFerret = rb_define_module("Ferret");
|
2814
|
-
*/
|
2815
|
-
|
2816
2814
|
/*
|
2817
2815
|
* Document-module: Ferret::Index
|
2818
2816
|
*
|
@@ -177,6 +177,7 @@ static VALUE frb_ld_lt(VALUE self, VALUE other) {
|
|
177
177
|
rLazyDoc *other_rld;
|
178
178
|
TypedData_Get_Struct(other, rLazyDoc, &frb_ld_t, other_rld);
|
179
179
|
other_h = frb_ld_to_h(other);
|
180
|
+
(void)other_rld;
|
180
181
|
}
|
181
182
|
VALUE self_h = frb_ld_to_h(self);
|
182
183
|
return rb_funcall(self_h, id_lt, 1, other_h);
|
@@ -190,6 +191,7 @@ static VALUE frb_ld_le(VALUE self, VALUE other) {
|
|
190
191
|
rLazyDoc *other_rld;
|
191
192
|
TypedData_Get_Struct(other, rLazyDoc, &frb_ld_t, other_rld);
|
192
193
|
other_h = frb_ld_to_h(other);
|
194
|
+
(void)other_rld;
|
193
195
|
}
|
194
196
|
VALUE self_h = frb_ld_to_h(self);
|
195
197
|
return rb_funcall(self_h, id_le, 1, other_h);
|
@@ -223,6 +225,7 @@ static VALUE frb_ld_gt(VALUE self, VALUE other) {
|
|
223
225
|
rLazyDoc *other_rld;
|
224
226
|
TypedData_Get_Struct(other, rLazyDoc, &frb_ld_t, other_rld);
|
225
227
|
other_h = frb_ld_to_h(other);
|
228
|
+
(void)other_rld;
|
226
229
|
}
|
227
230
|
VALUE self_h = frb_ld_to_h(self);
|
228
231
|
return rb_funcall(self_h, id_gt, 1, other_h);
|
@@ -236,6 +239,7 @@ static VALUE frb_ld_ge(VALUE self, VALUE other) {
|
|
236
239
|
rLazyDoc *other_rld;
|
237
240
|
TypedData_Get_Struct(other, rLazyDoc, &frb_ld_t, other_rld);
|
238
241
|
other_h = frb_ld_to_h(other);
|
242
|
+
(void)other_rld;
|
239
243
|
}
|
240
244
|
VALUE self_h = frb_ld_to_h(self);
|
241
245
|
return rb_funcall(self_h, id_ge, 1, other_h);
|
@@ -374,11 +374,6 @@ static VALUE frb_qp_set_tkz_fields(VALUE self, VALUE rfields) {
|
|
374
374
|
*
|
375
375
|
****************************************************************************/
|
376
376
|
|
377
|
-
/* rdoc hack
|
378
|
-
extern VALUE mFerret = rb_define_module("Ferret");
|
379
|
-
extern VALUE cQueryParser = rb_define_module_under(mFerret, "QueryParser");
|
380
|
-
*/
|
381
|
-
|
382
377
|
/*
|
383
378
|
* Document-class: Ferret::QueryParser::QueryParseException
|
384
379
|
*
|
@@ -410,7 +405,7 @@ void Init_QueryParseException(void) {
|
|
410
405
|
*
|
411
406
|
* If you want to use one of these characters in one of your terms you need
|
412
407
|
* to escape it with a \ character. \ escapes itself. The exception to this
|
413
|
-
* rule is within Phrases which
|
408
|
+
* rule is within Phrases which are strings surrounded by double quotes (and
|
414
409
|
* will be explained further bellow in the section on PhraseQueries). In
|
415
410
|
* Phrases, only ", | and <> have special meaning and need to be escaped if
|
416
411
|
* you want the literal value. <> is escaped \<\>.
|
@@ -4593,11 +4593,6 @@ static void Init_SpanNotQuery(void) {
|
|
4593
4593
|
rb_define_method(cSpanNotQuery, "initialize", frb_spanxq_init, 2);
|
4594
4594
|
}
|
4595
4595
|
|
4596
|
-
/* rdoc hack
|
4597
|
-
extern VALUE mFerret = rb_define_module("Ferret");
|
4598
|
-
extern VALUE mSearch = rb_define_module_under(mFerret, "Search");
|
4599
|
-
*/
|
4600
|
-
|
4601
4596
|
/*
|
4602
4597
|
* Document-module: Ferret::Search::Spans
|
4603
4598
|
*
|