ferret 0.9.0 → 0.9.1
Sign up to get free protection for your applications and to get access to all the features.
- data/Rakefile +23 -5
- data/TODO +2 -1
- data/ext/analysis.c +838 -177
- data/ext/analysis.h +55 -7
- data/ext/api.c +69 -0
- data/ext/api.h +27 -0
- data/ext/array.c +8 -5
- data/ext/compound_io.c +132 -96
- data/ext/document.c +58 -28
- data/ext/except.c +59 -0
- data/ext/except.h +88 -0
- data/ext/ferret.c +47 -3
- data/ext/ferret.h +3 -0
- data/ext/field.c +15 -9
- data/ext/filter.c +1 -1
- data/ext/fs_store.c +215 -34
- data/ext/global.c +72 -3
- data/ext/global.h +4 -3
- data/ext/hash.c +44 -3
- data/ext/hash.h +9 -0
- data/ext/header.h +58 -0
- data/ext/inc/except.h +88 -0
- data/ext/inc/lang.h +23 -13
- data/ext/ind.c +16 -10
- data/ext/index.h +2 -22
- data/ext/index_io.c +3 -11
- data/ext/index_rw.c +245 -193
- data/ext/lang.h +23 -13
- data/ext/libstemmer.c +92 -0
- data/ext/libstemmer.h +79 -0
- data/ext/modules.h +162 -0
- data/ext/q_boolean.c +34 -21
- data/ext/q_const_score.c +6 -12
- data/ext/q_filtered_query.c +206 -0
- data/ext/q_fuzzy.c +18 -15
- data/ext/q_match_all.c +3 -7
- data/ext/q_multi_phrase.c +10 -14
- data/ext/q_parser.c +29 -2
- data/ext/q_phrase.c +14 -21
- data/ext/q_prefix.c +15 -12
- data/ext/q_range.c +30 -28
- data/ext/q_span.c +13 -21
- data/ext/q_term.c +17 -26
- data/ext/r_analysis.c +693 -21
- data/ext/r_doc.c +11 -12
- data/ext/r_index_io.c +4 -1
- data/ext/r_qparser.c +21 -2
- data/ext/r_search.c +285 -18
- data/ext/ram_store.c +5 -2
- data/ext/search.c +11 -17
- data/ext/search.h +21 -45
- data/ext/similarity.h +67 -0
- data/ext/sort.c +30 -25
- data/ext/stem_ISO_8859_1_danish.c +338 -0
- data/ext/stem_ISO_8859_1_danish.h +16 -0
- data/ext/stem_ISO_8859_1_dutch.c +635 -0
- data/ext/stem_ISO_8859_1_dutch.h +16 -0
- data/ext/stem_ISO_8859_1_english.c +1156 -0
- data/ext/stem_ISO_8859_1_english.h +16 -0
- data/ext/stem_ISO_8859_1_finnish.c +792 -0
- data/ext/stem_ISO_8859_1_finnish.h +16 -0
- data/ext/stem_ISO_8859_1_french.c +1276 -0
- data/ext/stem_ISO_8859_1_french.h +16 -0
- data/ext/stem_ISO_8859_1_german.c +512 -0
- data/ext/stem_ISO_8859_1_german.h +16 -0
- data/ext/stem_ISO_8859_1_italian.c +1091 -0
- data/ext/stem_ISO_8859_1_italian.h +16 -0
- data/ext/stem_ISO_8859_1_norwegian.c +296 -0
- data/ext/stem_ISO_8859_1_norwegian.h +16 -0
- data/ext/stem_ISO_8859_1_porter.c +776 -0
- data/ext/stem_ISO_8859_1_porter.h +16 -0
- data/ext/stem_ISO_8859_1_portuguese.c +1035 -0
- data/ext/stem_ISO_8859_1_portuguese.h +16 -0
- data/ext/stem_ISO_8859_1_spanish.c +1119 -0
- data/ext/stem_ISO_8859_1_spanish.h +16 -0
- data/ext/stem_ISO_8859_1_swedish.c +307 -0
- data/ext/stem_ISO_8859_1_swedish.h +16 -0
- data/ext/stem_KOI8_R_russian.c +701 -0
- data/ext/stem_KOI8_R_russian.h +16 -0
- data/ext/stem_UTF_8_danish.c +344 -0
- data/ext/stem_UTF_8_danish.h +16 -0
- data/ext/stem_UTF_8_dutch.c +653 -0
- data/ext/stem_UTF_8_dutch.h +16 -0
- data/ext/stem_UTF_8_english.c +1176 -0
- data/ext/stem_UTF_8_english.h +16 -0
- data/ext/stem_UTF_8_finnish.c +808 -0
- data/ext/stem_UTF_8_finnish.h +16 -0
- data/ext/stem_UTF_8_french.c +1296 -0
- data/ext/stem_UTF_8_french.h +16 -0
- data/ext/stem_UTF_8_german.c +526 -0
- data/ext/stem_UTF_8_german.h +16 -0
- data/ext/stem_UTF_8_italian.c +1113 -0
- data/ext/stem_UTF_8_italian.h +16 -0
- data/ext/stem_UTF_8_norwegian.c +302 -0
- data/ext/stem_UTF_8_norwegian.h +16 -0
- data/ext/stem_UTF_8_porter.c +794 -0
- data/ext/stem_UTF_8_porter.h +16 -0
- data/ext/stem_UTF_8_portuguese.c +1055 -0
- data/ext/stem_UTF_8_portuguese.h +16 -0
- data/ext/stem_UTF_8_russian.c +709 -0
- data/ext/stem_UTF_8_russian.h +16 -0
- data/ext/stem_UTF_8_spanish.c +1137 -0
- data/ext/stem_UTF_8_spanish.h +16 -0
- data/ext/stem_UTF_8_swedish.c +313 -0
- data/ext/stem_UTF_8_swedish.h +16 -0
- data/ext/stopwords.c +325 -0
- data/ext/store.c +34 -2
- data/ext/tags +2953 -0
- data/ext/term.c +21 -15
- data/ext/termdocs.c +5 -3
- data/ext/utilities.c +446 -0
- data/ext/vector.c +27 -13
- data/lib/ferret/document/document.rb +1 -1
- data/lib/ferret/index/index.rb +44 -6
- data/lib/ferret/query_parser/query_parser.tab.rb +7 -3
- data/lib/rferret.rb +2 -1
- data/test/test_helper.rb +2 -2
- data/test/unit/analysis/ctc_analyzer.rb +401 -0
- data/test/unit/analysis/ctc_tokenstream.rb +423 -0
- data/test/unit/analysis/{tc_letter_tokenizer.rb → rtc_letter_tokenizer.rb} +0 -0
- data/test/unit/analysis/{tc_lower_case_filter.rb → rtc_lower_case_filter.rb} +0 -0
- data/test/unit/analysis/{tc_lower_case_tokenizer.rb → rtc_lower_case_tokenizer.rb} +0 -0
- data/test/unit/analysis/{tc_per_field_analyzer_wrapper.rb → rtc_per_field_analyzer_wrapper.rb} +0 -0
- data/test/unit/analysis/{tc_porter_stem_filter.rb → rtc_porter_stem_filter.rb} +0 -0
- data/test/unit/analysis/{tc_standard_analyzer.rb → rtc_standard_analyzer.rb} +0 -0
- data/test/unit/analysis/{tc_standard_tokenizer.rb → rtc_standard_tokenizer.rb} +0 -0
- data/test/unit/analysis/{tc_stop_analyzer.rb → rtc_stop_analyzer.rb} +0 -0
- data/test/unit/analysis/{tc_stop_filter.rb → rtc_stop_filter.rb} +0 -0
- data/test/unit/analysis/{tc_white_space_analyzer.rb → rtc_white_space_analyzer.rb} +0 -0
- data/test/unit/analysis/{tc_white_space_tokenizer.rb → rtc_white_space_tokenizer.rb} +0 -0
- data/test/unit/analysis/{tc_word_list_loader.rb → rtc_word_list_loader.rb} +0 -0
- data/test/unit/analysis/tc_analyzer.rb +1 -2
- data/test/unit/analysis/{c_token.rb → tc_token.rb} +0 -0
- data/test/unit/document/rtc_field.rb +28 -0
- data/test/unit/document/{c_document.rb → tc_document.rb} +0 -0
- data/test/unit/document/tc_field.rb +82 -12
- data/test/unit/index/{tc_compound_file_io.rb → rtc_compound_file_io.rb} +0 -0
- data/test/unit/index/{tc_field_infos.rb → rtc_field_infos.rb} +0 -0
- data/test/unit/index/{tc_fields_io.rb → rtc_fields_io.rb} +0 -0
- data/test/unit/index/{tc_multiple_term_doc_pos_enum.rb → rtc_multiple_term_doc_pos_enum.rb} +0 -0
- data/test/unit/index/{tc_segment_infos.rb → rtc_segment_infos.rb} +0 -0
- data/test/unit/index/{tc_segment_term_docs.rb → rtc_segment_term_docs.rb} +0 -0
- data/test/unit/index/{tc_segment_term_enum.rb → rtc_segment_term_enum.rb} +0 -0
- data/test/unit/index/{tc_segment_term_vector.rb → rtc_segment_term_vector.rb} +0 -0
- data/test/unit/index/{tc_term_buffer.rb → rtc_term_buffer.rb} +0 -0
- data/test/unit/index/{tc_term_info.rb → rtc_term_info.rb} +0 -0
- data/test/unit/index/{tc_term_infos_io.rb → rtc_term_infos_io.rb} +0 -0
- data/test/unit/index/{tc_term_vectors_io.rb → rtc_term_vectors_io.rb} +0 -0
- data/test/unit/index/{c_index.rb → tc_index.rb} +26 -6
- data/test/unit/index/{c_index_reader.rb → tc_index_reader.rb} +0 -0
- data/test/unit/index/{c_index_writer.rb → tc_index_writer.rb} +0 -0
- data/test/unit/index/{c_term.rb → tc_term.rb} +0 -0
- data/test/unit/index/{c_term_voi.rb → tc_term_voi.rb} +0 -0
- data/test/unit/query_parser/{c_query_parser.rb → rtc_query_parser.rb} +14 -14
- data/test/unit/query_parser/tc_query_parser.rb +24 -16
- data/test/unit/search/{tc_similarity.rb → rtc_similarity.rb} +0 -0
- data/test/unit/search/rtc_sort_field.rb +14 -0
- data/test/unit/search/{c_filter.rb → tc_filter.rb} +11 -11
- data/test/unit/search/{c_fuzzy_query.rb → tc_fuzzy_query.rb} +0 -0
- data/test/unit/search/{c_index_searcher.rb → tc_index_searcher.rb} +0 -0
- data/test/unit/search/{c_search_and_sort.rb → tc_search_and_sort.rb} +0 -0
- data/test/unit/search/{c_sort.rb → tc_sort.rb} +0 -0
- data/test/unit/search/tc_sort_field.rb +20 -7
- data/test/unit/search/{c_spans.rb → tc_spans.rb} +0 -0
- data/test/unit/store/rtc_fs_store.rb +62 -0
- data/test/unit/store/rtc_ram_store.rb +15 -0
- data/test/unit/store/rtm_store.rb +150 -0
- data/test/unit/store/rtm_store_lock.rb +2 -0
- data/test/unit/store/tc_fs_store.rb +54 -40
- data/test/unit/store/tc_ram_store.rb +20 -0
- data/test/unit/store/tm_store.rb +30 -146
- data/test/unit/store/tm_store_lock.rb +66 -0
- data/test/unit/utils/{tc_bit_vector.rb → rtc_bit_vector.rb} +0 -0
- data/test/unit/utils/{tc_date_tools.rb → rtc_date_tools.rb} +0 -0
- data/test/unit/utils/{tc_number_tools.rb → rtc_number_tools.rb} +0 -0
- data/test/unit/utils/{tc_parameter.rb → rtc_parameter.rb} +0 -0
- data/test/unit/utils/{tc_priority_queue.rb → rtc_priority_queue.rb} +0 -0
- data/test/unit/utils/{tc_string_helper.rb → rtc_string_helper.rb} +0 -0
- data/test/unit/utils/{tc_thread.rb → rtc_thread.rb} +0 -0
- data/test/unit/utils/{tc_weak_key_hash.rb → rtc_weak_key_hash.rb} +0 -0
- metadata +360 -289
- data/test/unit/document/c_field.rb +0 -98
- data/test/unit/search/c_sort_field.rb +0 -27
- data/test/unit/store/c_fs_store.rb +0 -76
- data/test/unit/store/c_ram_store.rb +0 -35
- data/test/unit/store/m_store.rb +0 -34
- data/test/unit/store/m_store_lock.rb +0 -68
data/ext/ram_store.c
CHANGED
@@ -1,6 +1,9 @@
|
|
1
1
|
#include <string.h>
|
2
2
|
#include <store.h>
|
3
3
|
|
4
|
+
static char * const RENAME_ERROR_MSG = "tried to rename a file that doesn't exist";
|
5
|
+
static char * const MISSING_RAMFILE_ERROR_MSG ="Couldn't open the ram file to read";
|
6
|
+
|
4
7
|
typedef struct RamFile {
|
5
8
|
char *name;
|
6
9
|
uchar **buffers;
|
@@ -74,7 +77,7 @@ int ram_rename(Store *store, char *from, char *to)
|
|
74
77
|
{
|
75
78
|
RamFile *rf = (RamFile *)h_rem(store->dir.ht, from, false);
|
76
79
|
if (rf == NULL)
|
77
|
-
|
80
|
+
RAISE(IO_ERROR, RENAME_ERROR_MSG);
|
78
81
|
|
79
82
|
free(rf->name);
|
80
83
|
|
@@ -335,7 +338,7 @@ InStream *ram_open_input(Store *store, const char *filename)
|
|
335
338
|
{
|
336
339
|
RamFile *rf = (RamFile *)h_get(store->dir.ht, filename);
|
337
340
|
if (rf == NULL) {
|
338
|
-
|
341
|
+
RAISE(IO_ERROR, MISSING_RAMFILE_ERROR_MSG);
|
339
342
|
}
|
340
343
|
rf->refcnt++;
|
341
344
|
InStream *is = is_create();
|
data/ext/search.c
CHANGED
@@ -1,6 +1,9 @@
|
|
1
1
|
#include <string.h>
|
2
2
|
#include "search.h"
|
3
3
|
|
4
|
+
static char * const NUM_DOCS_ARG_ERROR_MSG = "num_docs must be > 0 to run a search";
|
5
|
+
static char * const FIRST_DOC_ARG_ERROR_MSG = "first_doc must be >= 0 to run a search";
|
6
|
+
|
4
7
|
/***************************************************************************
|
5
8
|
*
|
6
9
|
* Explanation
|
@@ -44,15 +47,11 @@ Explanation *expl_add_detail(Explanation *self, Explanation *detail)
|
|
44
47
|
char *expl_to_s(Explanation *self, int depth)
|
45
48
|
{
|
46
49
|
int i;
|
47
|
-
char dbuf[32];
|
48
50
|
char *buffer = ALLOC_N(char, depth * 2 + 1);
|
49
51
|
memset(buffer, ' ', sizeof(char) * depth * 2);
|
50
52
|
buffer[depth*2] = 0;
|
51
53
|
|
52
|
-
|
53
|
-
buffer = estrcat(buffer, epstrdup("%s = %s\n",
|
54
|
-
strlen(dbuf) + strlen(self->description),
|
55
|
-
dbuf, self->description));
|
54
|
+
buffer = estrcat(buffer, strfmt("%f = %s\n", self->value, self->description));
|
56
55
|
for (i = 0; i < self->dcnt; i++) {
|
57
56
|
buffer = estrcat(buffer, expl_to_s(self->details[i], depth + 1));
|
58
57
|
}
|
@@ -63,12 +62,8 @@ char *expl_to_s(Explanation *self, int depth)
|
|
63
62
|
char *expl_to_html(Explanation *self)
|
64
63
|
{
|
65
64
|
int i;
|
66
|
-
char dbuf[32];
|
67
65
|
char *buffer;
|
68
|
-
|
69
|
-
buffer = epstrdup("<ul>\n<li>%s = %s</li>\n",
|
70
|
-
strlen(dbuf) + strlen(self->description),
|
71
|
-
dbuf, self->description);
|
66
|
+
buffer = strfmt("<ul>\n<li>%f = %s</li>\n", self->value, self->description);
|
72
67
|
|
73
68
|
for (i = 0; i < self->dcnt; i++) {
|
74
69
|
estrcat(buffer, expl_to_html(self->details[i]));
|
@@ -193,13 +188,11 @@ void td_destroy(void *p)
|
|
193
188
|
char *td_to_s(TopDocs *td)
|
194
189
|
{
|
195
190
|
int i;
|
196
|
-
char dbuf[32];
|
197
191
|
Hit *hit;
|
198
|
-
char *buffer =
|
192
|
+
char *buffer = strfmt("%d hits sorted by <score, doc_num>\n", td->total_hits);
|
199
193
|
for (i = 0; i < td->size; i++) {
|
200
194
|
hit = td->hits[i];
|
201
|
-
|
202
|
-
estrcat(buffer, epstrdup("\t%d:%s\n", 52, hit->doc, dbuf));
|
195
|
+
estrcat(buffer, strfmt("\t%d:%f\n", hit->doc, hit->score));
|
203
196
|
}
|
204
197
|
return buffer;
|
205
198
|
}
|
@@ -389,10 +382,10 @@ TopDocs *sea_search(Searcher *self, Query *query, int first_doc,
|
|
389
382
|
|
390
383
|
|
391
384
|
if (num_docs <= 0)
|
392
|
-
|
385
|
+
RAISE(ARG_ERROR, NUM_DOCS_ARG_ERROR_MSG);
|
393
386
|
|
394
387
|
if (first_doc < 0)
|
395
|
-
|
388
|
+
RAISE(ARG_ERROR, FIRST_DOC_ARG_ERROR_MSG);
|
396
389
|
|
397
390
|
weight = q_weight(query, self);
|
398
391
|
scorer = weight->scorer(weight, self->ir);
|
@@ -498,7 +491,7 @@ Similarity *sea_get_similarity(Searcher *self)
|
|
498
491
|
|
499
492
|
void sea_close(Searcher *self)
|
500
493
|
{
|
501
|
-
if (self->ir)
|
494
|
+
if (self->ir && self->close_ir)
|
502
495
|
ir_close(self->ir);
|
503
496
|
free(self);
|
504
497
|
}
|
@@ -507,6 +500,7 @@ Searcher *sea_create(IndexReader *ir)
|
|
507
500
|
{
|
508
501
|
Searcher *self = ALLOC(Searcher);
|
509
502
|
self->ir = ir;
|
503
|
+
self->close_ir = true;
|
510
504
|
self->similarity = sim_create_default();
|
511
505
|
self->doc_freq = &sea_doc_freq;
|
512
506
|
self->doc_freqs = &sea_doc_freqs;
|
data/ext/search.h
CHANGED
@@ -1,53 +1,13 @@
|
|
1
1
|
#ifndef FRT_SEARCH_H
|
2
2
|
#define FRT_SEARCH_H
|
3
3
|
|
4
|
-
typedef struct Similarity Similarity;
|
5
4
|
typedef struct Query Query;
|
6
5
|
typedef struct Weight Weight;
|
7
6
|
typedef struct Scorer Scorer;
|
8
|
-
typedef struct Searcher Searcher;
|
9
7
|
|
10
8
|
#include "index.h"
|
11
9
|
#include "bitvector.h"
|
12
|
-
|
13
|
-
/***************************************************************************
|
14
|
-
*
|
15
|
-
* Similarity
|
16
|
-
*
|
17
|
-
***************************************************************************/
|
18
|
-
|
19
|
-
struct Similarity {
|
20
|
-
void *data;
|
21
|
-
float norm_table[256];
|
22
|
-
float (*length_norm)(Similarity *self, char *field, int num_terms);
|
23
|
-
float (*query_norm)(Similarity *self, float sum_of_squared_weights);
|
24
|
-
float (*tf)(Similarity *self, float freq);
|
25
|
-
float (*sloppy_freq)(Similarity *self, int distance);
|
26
|
-
float (*idf_term)(Similarity *self, Term *term, Searcher *searcher);
|
27
|
-
float (*idf_phrase)(Similarity *self, Term **terms, int tcnt, Searcher *searcher);
|
28
|
-
float (*idf)(Similarity *self, int doc_freq, int num_docs);
|
29
|
-
float (*coord)(Similarity *self, int overlap, int max_overlap);
|
30
|
-
float (*decode_norm)(Similarity *self, uchar b);
|
31
|
-
float (*encode_norm)(Similarity *self, float f);
|
32
|
-
void (*destroy)(void *p);
|
33
|
-
};
|
34
|
-
|
35
|
-
#define sim_length_norm(msim, field, num_terms) msim->length_norm(msim, field, num_terms)
|
36
|
-
#define sim_query_norm(msim, sosw) msim->query_norm(msim, sosw)
|
37
|
-
#define sim_tf(msim, freq) msim->tf(msim, freq)
|
38
|
-
#define sim_sloppy_freq(msim, distance) msim->sloppy_freq(msim, distance)
|
39
|
-
#define sim_idf_term(msim, term, searcher) msim->idf_term(msim, term, searcher)
|
40
|
-
#define sim_idf_phrase(msim, terms, tcnt, searcher) msim->idf_phrase(msim, terms, tcnt, searcher)
|
41
|
-
#define sim_idf(msim, doc_freq, num_docs) msim->idf(msim, doc_freq, num_docs)
|
42
|
-
#define sim_coord(msim, overlap, max_overlap) msim->coord(msim, overlap, max_overlap)
|
43
|
-
#define sim_decode_norm(msim, b) msim->decode_norm(msim, b)
|
44
|
-
#define sim_encode_norm(msim, f) msim->encode_norm(msim, f)
|
45
|
-
#define sim_destroy(msim) msim->destroy(msim)
|
46
|
-
|
47
|
-
float byte_to_float(uchar b);
|
48
|
-
uchar float_to_byte(float f);
|
49
|
-
|
50
|
-
Similarity *sim_create_default();
|
10
|
+
#include "similarity.h"
|
51
11
|
|
52
12
|
/***************************************************************************
|
53
13
|
*
|
@@ -235,6 +195,7 @@ enum QUERY_TYPE {
|
|
235
195
|
PHRASE_QUERY,
|
236
196
|
MULTI_PHRASE_QUERY,
|
237
197
|
CONSTANT_QUERY,
|
198
|
+
FILTERED_QUERY,
|
238
199
|
MATCH_ALL_QUERY,
|
239
200
|
RANGE_QUERY,
|
240
201
|
WILD_CARD_QUERY,
|
@@ -306,6 +267,7 @@ typedef struct BooleanClause {
|
|
306
267
|
} BooleanClause;
|
307
268
|
|
308
269
|
BooleanClause *bc_create(Query *query, unsigned int occur);
|
270
|
+
void bc_destroy(BooleanClause *self);
|
309
271
|
void bc_set_occur(BooleanClause *self, unsigned int occur);
|
310
272
|
|
311
273
|
/***************************************************************************
|
@@ -326,7 +288,8 @@ typedef struct BooleanQuery {
|
|
326
288
|
} BooleanQuery;
|
327
289
|
|
328
290
|
Query *bq_create(bool coord_disabled);
|
329
|
-
|
291
|
+
BooleanClause *bq_add_query(Query *self, Query *sub_query, unsigned int occur);
|
292
|
+
BooleanClause *bq_add_clause(Query *self, BooleanClause *bc);
|
330
293
|
|
331
294
|
/***************************************************************************
|
332
295
|
*
|
@@ -420,15 +383,15 @@ Query *csq_create(Filter *filter);
|
|
420
383
|
|
421
384
|
/***************************************************************************
|
422
385
|
*
|
423
|
-
*
|
386
|
+
* FilteredQueryQuery
|
424
387
|
*
|
425
388
|
***************************************************************************/
|
426
389
|
|
427
|
-
Query *
|
390
|
+
Query *fq_create(Query *query, Filter *filter);
|
428
391
|
|
429
392
|
/***************************************************************************
|
430
393
|
*
|
431
|
-
*
|
394
|
+
* MatchAllQuery
|
432
395
|
*
|
433
396
|
***************************************************************************/
|
434
397
|
|
@@ -453,6 +416,17 @@ Query *rq_create(const char *field, char *lower_term, char *upper_term,
|
|
453
416
|
Query *rq_create_less(const char *field, char *upper_term, bool include_upper);
|
454
417
|
Query *rq_create_more(const char *field, char *lower_term, bool include_lower);
|
455
418
|
|
419
|
+
/***************************************************************************
|
420
|
+
*
|
421
|
+
* FilteredQuery
|
422
|
+
*
|
423
|
+
***************************************************************************/
|
424
|
+
|
425
|
+
typedef struct FilteredQuery {
|
426
|
+
Query *query;
|
427
|
+
Filter *filter;
|
428
|
+
} FilteredQuery;
|
429
|
+
|
456
430
|
/***************************************************************************
|
457
431
|
*
|
458
432
|
* SpanQuery
|
@@ -949,6 +923,7 @@ PriorityQueue *fshq_pq_create(int size, Sort *sort, IndexReader *ir);
|
|
949
923
|
struct Searcher {
|
950
924
|
IndexReader *ir;
|
951
925
|
Similarity *similarity;
|
926
|
+
bool close_ir : 1;
|
952
927
|
int (*doc_freq)(Searcher *self, Term *term);
|
953
928
|
int *(*doc_freqs)(Searcher *self, Term **terms, int tcnt);
|
954
929
|
Document *(*get_doc)(Searcher *self, int doc_num);
|
@@ -1032,6 +1007,7 @@ typedef struct Index {
|
|
1032
1007
|
bool use_compound_file : 1;
|
1033
1008
|
bool auto_flush : 1;
|
1034
1009
|
bool has_writes : 1;
|
1010
|
+
bool check_latest : 1;
|
1035
1011
|
} Index;
|
1036
1012
|
|
1037
1013
|
Index *index_create(Store *store, Analyzer *analyzer, HashSet *def_fields,
|
data/ext/similarity.h
ADDED
@@ -0,0 +1,67 @@
|
|
1
|
+
#ifndef FRT_SIMILARITY_H
|
2
|
+
#define FRT_SIMILARITY_H
|
3
|
+
|
4
|
+
typedef struct Searcher Searcher;
|
5
|
+
|
6
|
+
/****************************************************************************
|
7
|
+
*
|
8
|
+
* Term
|
9
|
+
*
|
10
|
+
****************************************************************************/
|
11
|
+
|
12
|
+
typedef struct Term {
|
13
|
+
char *field;
|
14
|
+
char *text;
|
15
|
+
} Term;
|
16
|
+
|
17
|
+
Term *term_clone(Term *term);
|
18
|
+
Term *term_create(const char *field, char *text);
|
19
|
+
void term_destroy(void *p);
|
20
|
+
int term_cmp(void *t1, void *t2);
|
21
|
+
int term_eq(const void *t1, const void *t2);
|
22
|
+
unsigned int term_hash(const void *t);
|
23
|
+
char *term_to_s(Term *term);
|
24
|
+
|
25
|
+
/***************************************************************************
|
26
|
+
*
|
27
|
+
* Similarity
|
28
|
+
*
|
29
|
+
***************************************************************************/
|
30
|
+
|
31
|
+
typedef struct Similarity Similarity;
|
32
|
+
|
33
|
+
struct Similarity {
|
34
|
+
void *data;
|
35
|
+
float norm_table[256];
|
36
|
+
float (*length_norm)(Similarity *self, char *field, int num_terms);
|
37
|
+
float (*query_norm)(Similarity *self, float sum_of_squared_weights);
|
38
|
+
float (*tf)(Similarity *self, float freq);
|
39
|
+
float (*sloppy_freq)(Similarity *self, int distance);
|
40
|
+
float (*idf_term)(Similarity *self, Term *term, Searcher *searcher);
|
41
|
+
float (*idf_phrase)(Similarity *self, Term **terms,
|
42
|
+
int tcnt, Searcher *searcher);
|
43
|
+
float (*idf)(Similarity *self, int doc_freq, int num_docs);
|
44
|
+
float (*coord)(Similarity *self, int overlap, int max_overlap);
|
45
|
+
float (*decode_norm)(Similarity *self, uchar b);
|
46
|
+
float (*encode_norm)(Similarity *self, float f);
|
47
|
+
void (*destroy)(void *p);
|
48
|
+
};
|
49
|
+
|
50
|
+
#define sim_length_norm(msim, field, num_terms) msim->length_norm(msim, field, num_terms)
|
51
|
+
#define sim_query_norm(msim, sosw) msim->query_norm(msim, sosw)
|
52
|
+
#define sim_tf(msim, freq) msim->tf(msim, freq)
|
53
|
+
#define sim_sloppy_freq(msim, distance) msim->sloppy_freq(msim, distance)
|
54
|
+
#define sim_idf_term(msim, term, searcher) msim->idf_term(msim, term, searcher)
|
55
|
+
#define sim_idf_phrase(msim, terms, tcnt, searcher) msim->idf_phrase(msim, terms, tcnt, searcher)
|
56
|
+
#define sim_idf(msim, doc_freq, num_docs) msim->idf(msim, doc_freq, num_docs)
|
57
|
+
#define sim_coord(msim, overlap, max_overlap) msim->coord(msim, overlap, max_overlap)
|
58
|
+
#define sim_decode_norm(msim, b) msim->decode_norm(msim, b)
|
59
|
+
#define sim_encode_norm(msim, f) msim->encode_norm(msim, f)
|
60
|
+
#define sim_destroy(msim) msim->destroy(msim)
|
61
|
+
|
62
|
+
float byte_to_float(uchar b);
|
63
|
+
uchar float_to_byte(float f);
|
64
|
+
|
65
|
+
Similarity *sim_create_default();
|
66
|
+
|
67
|
+
#endif
|
data/ext/sort.c
CHANGED
@@ -2,6 +2,8 @@
|
|
2
2
|
#include "search.h"
|
3
3
|
#include "index.h"
|
4
4
|
|
5
|
+
static char * const NO_TERM_ERROR_MSG = "no terms in field to sort by";
|
6
|
+
|
5
7
|
/***************************************************************************
|
6
8
|
*
|
7
9
|
* SortField
|
@@ -378,8 +380,8 @@ void *field_cache_get_index(IndexReader *ir, SortField *sf)
|
|
378
380
|
int length = 0;
|
379
381
|
Term term;
|
380
382
|
TermBuffer *tb;
|
381
|
-
TermEnum *te;
|
382
|
-
TermDocEnum *tde;
|
383
|
+
TermEnum *volatile te = NULL;
|
384
|
+
TermDocEnum *volatile tde = NULL;
|
383
385
|
char *field = sf->field;
|
384
386
|
SortField *sf_clone;
|
385
387
|
|
@@ -393,29 +395,32 @@ void *field_cache_get_index(IndexReader *ir, SortField *sf)
|
|
393
395
|
if (index == NULL) {
|
394
396
|
length = ir->max_doc(ir);
|
395
397
|
if (length > 0) {
|
396
|
-
|
397
|
-
|
398
|
-
|
399
|
-
|
400
|
-
|
401
|
-
|
402
|
-
|
403
|
-
|
404
|
-
|
405
|
-
|
406
|
-
|
407
|
-
|
408
|
-
|
409
|
-
|
410
|
-
|
411
|
-
|
412
|
-
|
413
|
-
|
414
|
-
|
415
|
-
|
416
|
-
|
417
|
-
|
418
|
-
|
398
|
+
TRY
|
399
|
+
tde = ir->term_docs(ir);
|
400
|
+
term.field = field;
|
401
|
+
term.text = "";
|
402
|
+
te = ir->terms_from(ir, &term);
|
403
|
+
if (te->tb_curr == NULL) {
|
404
|
+
RAISE(ARG_ERROR, NO_TERM_ERROR_MSG);
|
405
|
+
}
|
406
|
+
|
407
|
+
if (sf->type == SORT_TYPE_AUTO) {
|
408
|
+
sort_field_auto_evaluate(sf, te->tb_curr->text);
|
409
|
+
}
|
410
|
+
|
411
|
+
index = sf->create_index(length);
|
412
|
+
|
413
|
+
do {
|
414
|
+
tb = te->tb_curr;
|
415
|
+
if (strcmp(tb->field, field) != 0) break;
|
416
|
+
term.text = tb->text;
|
417
|
+
tde->seek(tde, &term);
|
418
|
+
sf->handle_term(index, tde, tb->text);
|
419
|
+
} while (te->next(te));
|
420
|
+
XFINALLY
|
421
|
+
tde->close(tde);
|
422
|
+
te->close(te);
|
423
|
+
XENDTRY
|
419
424
|
}
|
420
425
|
sf_clone = sort_field_clone(sf);
|
421
426
|
sf_clone->index = index;
|
@@ -0,0 +1,338 @@
|
|
1
|
+
|
2
|
+
/* This file was generated automatically by the Snowball to ANSI C compiler */
|
3
|
+
|
4
|
+
#include "header.h"
|
5
|
+
|
6
|
+
extern int danish_ISO_8859_1_stem(struct SN_env * z);
|
7
|
+
static int r_undouble(struct SN_env * z);
|
8
|
+
static int r_other_suffix(struct SN_env * z);
|
9
|
+
static int r_consonant_pair(struct SN_env * z);
|
10
|
+
static int r_main_suffix(struct SN_env * z);
|
11
|
+
static int r_mark_regions(struct SN_env * z);
|
12
|
+
|
13
|
+
extern struct SN_env * danish_ISO_8859_1_create_env(void);
|
14
|
+
extern void danish_ISO_8859_1_close_env(struct SN_env * z);
|
15
|
+
|
16
|
+
static symbol s_0_0[3] = { 'h', 'e', 'd' };
|
17
|
+
static symbol s_0_1[5] = { 'e', 't', 'h', 'e', 'd' };
|
18
|
+
static symbol s_0_2[4] = { 'e', 'r', 'e', 'd' };
|
19
|
+
static symbol s_0_3[1] = { 'e' };
|
20
|
+
static symbol s_0_4[5] = { 'e', 'r', 'e', 'd', 'e' };
|
21
|
+
static symbol s_0_5[4] = { 'e', 'n', 'd', 'e' };
|
22
|
+
static symbol s_0_6[6] = { 'e', 'r', 'e', 'n', 'd', 'e' };
|
23
|
+
static symbol s_0_7[3] = { 'e', 'n', 'e' };
|
24
|
+
static symbol s_0_8[4] = { 'e', 'r', 'n', 'e' };
|
25
|
+
static symbol s_0_9[3] = { 'e', 'r', 'e' };
|
26
|
+
static symbol s_0_10[2] = { 'e', 'n' };
|
27
|
+
static symbol s_0_11[5] = { 'h', 'e', 'd', 'e', 'n' };
|
28
|
+
static symbol s_0_12[4] = { 'e', 'r', 'e', 'n' };
|
29
|
+
static symbol s_0_13[2] = { 'e', 'r' };
|
30
|
+
static symbol s_0_14[5] = { 'h', 'e', 'd', 'e', 'r' };
|
31
|
+
static symbol s_0_15[4] = { 'e', 'r', 'e', 'r' };
|
32
|
+
static symbol s_0_16[1] = { 's' };
|
33
|
+
static symbol s_0_17[4] = { 'h', 'e', 'd', 's' };
|
34
|
+
static symbol s_0_18[2] = { 'e', 's' };
|
35
|
+
static symbol s_0_19[5] = { 'e', 'n', 'd', 'e', 's' };
|
36
|
+
static symbol s_0_20[7] = { 'e', 'r', 'e', 'n', 'd', 'e', 's' };
|
37
|
+
static symbol s_0_21[4] = { 'e', 'n', 'e', 's' };
|
38
|
+
static symbol s_0_22[5] = { 'e', 'r', 'n', 'e', 's' };
|
39
|
+
static symbol s_0_23[4] = { 'e', 'r', 'e', 's' };
|
40
|
+
static symbol s_0_24[3] = { 'e', 'n', 's' };
|
41
|
+
static symbol s_0_25[6] = { 'h', 'e', 'd', 'e', 'n', 's' };
|
42
|
+
static symbol s_0_26[5] = { 'e', 'r', 'e', 'n', 's' };
|
43
|
+
static symbol s_0_27[3] = { 'e', 'r', 's' };
|
44
|
+
static symbol s_0_28[3] = { 'e', 't', 's' };
|
45
|
+
static symbol s_0_29[5] = { 'e', 'r', 'e', 't', 's' };
|
46
|
+
static symbol s_0_30[2] = { 'e', 't' };
|
47
|
+
static symbol s_0_31[4] = { 'e', 'r', 'e', 't' };
|
48
|
+
|
49
|
+
static struct among a_0[32] =
|
50
|
+
{
|
51
|
+
/* 0 */ { 3, s_0_0, -1, 1, 0},
|
52
|
+
/* 1 */ { 5, s_0_1, 0, 1, 0},
|
53
|
+
/* 2 */ { 4, s_0_2, -1, 1, 0},
|
54
|
+
/* 3 */ { 1, s_0_3, -1, 1, 0},
|
55
|
+
/* 4 */ { 5, s_0_4, 3, 1, 0},
|
56
|
+
/* 5 */ { 4, s_0_5, 3, 1, 0},
|
57
|
+
/* 6 */ { 6, s_0_6, 5, 1, 0},
|
58
|
+
/* 7 */ { 3, s_0_7, 3, 1, 0},
|
59
|
+
/* 8 */ { 4, s_0_8, 3, 1, 0},
|
60
|
+
/* 9 */ { 3, s_0_9, 3, 1, 0},
|
61
|
+
/* 10 */ { 2, s_0_10, -1, 1, 0},
|
62
|
+
/* 11 */ { 5, s_0_11, 10, 1, 0},
|
63
|
+
/* 12 */ { 4, s_0_12, 10, 1, 0},
|
64
|
+
/* 13 */ { 2, s_0_13, -1, 1, 0},
|
65
|
+
/* 14 */ { 5, s_0_14, 13, 1, 0},
|
66
|
+
/* 15 */ { 4, s_0_15, 13, 1, 0},
|
67
|
+
/* 16 */ { 1, s_0_16, -1, 2, 0},
|
68
|
+
/* 17 */ { 4, s_0_17, 16, 1, 0},
|
69
|
+
/* 18 */ { 2, s_0_18, 16, 1, 0},
|
70
|
+
/* 19 */ { 5, s_0_19, 18, 1, 0},
|
71
|
+
/* 20 */ { 7, s_0_20, 19, 1, 0},
|
72
|
+
/* 21 */ { 4, s_0_21, 18, 1, 0},
|
73
|
+
/* 22 */ { 5, s_0_22, 18, 1, 0},
|
74
|
+
/* 23 */ { 4, s_0_23, 18, 1, 0},
|
75
|
+
/* 24 */ { 3, s_0_24, 16, 1, 0},
|
76
|
+
/* 25 */ { 6, s_0_25, 24, 1, 0},
|
77
|
+
/* 26 */ { 5, s_0_26, 24, 1, 0},
|
78
|
+
/* 27 */ { 3, s_0_27, 16, 1, 0},
|
79
|
+
/* 28 */ { 3, s_0_28, 16, 1, 0},
|
80
|
+
/* 29 */ { 5, s_0_29, 28, 1, 0},
|
81
|
+
/* 30 */ { 2, s_0_30, -1, 1, 0},
|
82
|
+
/* 31 */ { 4, s_0_31, 30, 1, 0}
|
83
|
+
};
|
84
|
+
|
85
|
+
static symbol s_1_0[2] = { 'g', 'd' };
|
86
|
+
static symbol s_1_1[2] = { 'd', 't' };
|
87
|
+
static symbol s_1_2[2] = { 'g', 't' };
|
88
|
+
static symbol s_1_3[2] = { 'k', 't' };
|
89
|
+
|
90
|
+
static struct among a_1[4] =
|
91
|
+
{
|
92
|
+
/* 0 */ { 2, s_1_0, -1, -1, 0},
|
93
|
+
/* 1 */ { 2, s_1_1, -1, -1, 0},
|
94
|
+
/* 2 */ { 2, s_1_2, -1, -1, 0},
|
95
|
+
/* 3 */ { 2, s_1_3, -1, -1, 0}
|
96
|
+
};
|
97
|
+
|
98
|
+
static symbol s_2_0[2] = { 'i', 'g' };
|
99
|
+
static symbol s_2_1[3] = { 'l', 'i', 'g' };
|
100
|
+
static symbol s_2_2[4] = { 'e', 'l', 'i', 'g' };
|
101
|
+
static symbol s_2_3[3] = { 'e', 'l', 's' };
|
102
|
+
static symbol s_2_4[4] = { 'l', 0xF8, 's', 't' };
|
103
|
+
|
104
|
+
static struct among a_2[5] =
|
105
|
+
{
|
106
|
+
/* 0 */ { 2, s_2_0, -1, 1, 0},
|
107
|
+
/* 1 */ { 3, s_2_1, 0, 1, 0},
|
108
|
+
/* 2 */ { 4, s_2_2, 1, 1, 0},
|
109
|
+
/* 3 */ { 3, s_2_3, -1, 1, 0},
|
110
|
+
/* 4 */ { 4, s_2_4, -1, 2, 0}
|
111
|
+
};
|
112
|
+
|
113
|
+
static unsigned char g_v[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 48, 0, 128 };
|
114
|
+
|
115
|
+
static unsigned char g_s_ending[] = { 239, 254, 42, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16 };
|
116
|
+
|
117
|
+
static symbol s_0[] = { 's', 't' };
|
118
|
+
static symbol s_1[] = { 'i', 'g' };
|
119
|
+
static symbol s_2[] = { 'l', 0xF8, 's' };
|
120
|
+
|
121
|
+
static int r_mark_regions(struct SN_env * z) {
|
122
|
+
z->I[0] = z->l;
|
123
|
+
{ int c_test = z->c; /* test, line 33 */
|
124
|
+
{ int c = z->c + 3;
|
125
|
+
if (0 > c || c > z->l) return 0;
|
126
|
+
z->c = c; /* hop, line 33 */
|
127
|
+
}
|
128
|
+
z->I[1] = z->c; /* setmark x, line 33 */
|
129
|
+
z->c = c_test;
|
130
|
+
}
|
131
|
+
while(1) { /* goto, line 34 */
|
132
|
+
int c = z->c;
|
133
|
+
if (!(in_grouping(z, g_v, 97, 248))) goto lab0;
|
134
|
+
z->c = c;
|
135
|
+
break;
|
136
|
+
lab0:
|
137
|
+
z->c = c;
|
138
|
+
if (z->c >= z->l) return 0;
|
139
|
+
z->c++; /* goto, line 34 */
|
140
|
+
}
|
141
|
+
while(1) { /* gopast, line 34 */
|
142
|
+
if (!(out_grouping(z, g_v, 97, 248))) goto lab1;
|
143
|
+
break;
|
144
|
+
lab1:
|
145
|
+
if (z->c >= z->l) return 0;
|
146
|
+
z->c++; /* gopast, line 34 */
|
147
|
+
}
|
148
|
+
z->I[0] = z->c; /* setmark p1, line 34 */
|
149
|
+
/* try, line 35 */
|
150
|
+
if (!(z->I[0] < z->I[1])) goto lab2;
|
151
|
+
z->I[0] = z->I[1];
|
152
|
+
lab2:
|
153
|
+
return 1;
|
154
|
+
}
|
155
|
+
|
156
|
+
static int r_main_suffix(struct SN_env * z) {
|
157
|
+
int among_var;
|
158
|
+
{ int m3; /* setlimit, line 41 */
|
159
|
+
int m = z->l - z->c; (void) m;
|
160
|
+
if (z->c < z->I[0]) return 0;
|
161
|
+
z->c = z->I[0]; /* tomark, line 41 */
|
162
|
+
m3 = z->lb; z->lb = z->c;
|
163
|
+
z->c = z->l - m;
|
164
|
+
z->ket = z->c; /* [, line 41 */
|
165
|
+
among_var = find_among_b(z, a_0, 32); /* substring, line 41 */
|
166
|
+
if (!(among_var)) { z->lb = m3; return 0; }
|
167
|
+
z->bra = z->c; /* ], line 41 */
|
168
|
+
z->lb = m3;
|
169
|
+
}
|
170
|
+
switch(among_var) {
|
171
|
+
case 0: return 0;
|
172
|
+
case 1:
|
173
|
+
{ int ret;
|
174
|
+
ret = slice_del(z); /* delete, line 48 */
|
175
|
+
if (ret < 0) return ret;
|
176
|
+
}
|
177
|
+
break;
|
178
|
+
case 2:
|
179
|
+
if (!(in_grouping_b(z, g_s_ending, 97, 229))) return 0;
|
180
|
+
{ int ret;
|
181
|
+
ret = slice_del(z); /* delete, line 50 */
|
182
|
+
if (ret < 0) return ret;
|
183
|
+
}
|
184
|
+
break;
|
185
|
+
}
|
186
|
+
return 1;
|
187
|
+
}
|
188
|
+
|
189
|
+
static int r_consonant_pair(struct SN_env * z) {
|
190
|
+
{ int m_test = z->l - z->c; /* test, line 55 */
|
191
|
+
{ int m3; /* setlimit, line 56 */
|
192
|
+
int m = z->l - z->c; (void) m;
|
193
|
+
if (z->c < z->I[0]) return 0;
|
194
|
+
z->c = z->I[0]; /* tomark, line 56 */
|
195
|
+
m3 = z->lb; z->lb = z->c;
|
196
|
+
z->c = z->l - m;
|
197
|
+
z->ket = z->c; /* [, line 56 */
|
198
|
+
if (!(find_among_b(z, a_1, 4))) { z->lb = m3; return 0; } /* substring, line 56 */
|
199
|
+
z->bra = z->c; /* ], line 56 */
|
200
|
+
z->lb = m3;
|
201
|
+
}
|
202
|
+
z->c = z->l - m_test;
|
203
|
+
}
|
204
|
+
if (z->c <= z->lb) return 0;
|
205
|
+
z->c--; /* next, line 62 */
|
206
|
+
z->bra = z->c; /* ], line 62 */
|
207
|
+
{ int ret;
|
208
|
+
ret = slice_del(z); /* delete, line 62 */
|
209
|
+
if (ret < 0) return ret;
|
210
|
+
}
|
211
|
+
return 1;
|
212
|
+
}
|
213
|
+
|
214
|
+
static int r_other_suffix(struct SN_env * z) {
|
215
|
+
int among_var;
|
216
|
+
{ int m = z->l - z->c; (void) m; /* do, line 66 */
|
217
|
+
z->ket = z->c; /* [, line 66 */
|
218
|
+
if (!(eq_s_b(z, 2, s_0))) goto lab0;
|
219
|
+
z->bra = z->c; /* ], line 66 */
|
220
|
+
if (!(eq_s_b(z, 2, s_1))) goto lab0;
|
221
|
+
{ int ret;
|
222
|
+
ret = slice_del(z); /* delete, line 66 */
|
223
|
+
if (ret < 0) return ret;
|
224
|
+
}
|
225
|
+
lab0:
|
226
|
+
z->c = z->l - m;
|
227
|
+
}
|
228
|
+
{ int m3; /* setlimit, line 67 */
|
229
|
+
int m = z->l - z->c; (void) m;
|
230
|
+
if (z->c < z->I[0]) return 0;
|
231
|
+
z->c = z->I[0]; /* tomark, line 67 */
|
232
|
+
m3 = z->lb; z->lb = z->c;
|
233
|
+
z->c = z->l - m;
|
234
|
+
z->ket = z->c; /* [, line 67 */
|
235
|
+
among_var = find_among_b(z, a_2, 5); /* substring, line 67 */
|
236
|
+
if (!(among_var)) { z->lb = m3; return 0; }
|
237
|
+
z->bra = z->c; /* ], line 67 */
|
238
|
+
z->lb = m3;
|
239
|
+
}
|
240
|
+
switch(among_var) {
|
241
|
+
case 0: return 0;
|
242
|
+
case 1:
|
243
|
+
{ int ret;
|
244
|
+
ret = slice_del(z); /* delete, line 70 */
|
245
|
+
if (ret < 0) return ret;
|
246
|
+
}
|
247
|
+
{ int m = z->l - z->c; (void) m; /* do, line 70 */
|
248
|
+
{ int ret = r_consonant_pair(z);
|
249
|
+
if (ret == 0) goto lab1; /* call consonant_pair, line 70 */
|
250
|
+
if (ret < 0) return ret;
|
251
|
+
}
|
252
|
+
lab1:
|
253
|
+
z->c = z->l - m;
|
254
|
+
}
|
255
|
+
break;
|
256
|
+
case 2:
|
257
|
+
{ int ret;
|
258
|
+
ret = slice_from_s(z, 3, s_2); /* <-, line 72 */
|
259
|
+
if (ret < 0) return ret;
|
260
|
+
}
|
261
|
+
break;
|
262
|
+
}
|
263
|
+
return 1;
|
264
|
+
}
|
265
|
+
|
266
|
+
static int r_undouble(struct SN_env * z) {
|
267
|
+
{ int m3; /* setlimit, line 76 */
|
268
|
+
int m = z->l - z->c; (void) m;
|
269
|
+
if (z->c < z->I[0]) return 0;
|
270
|
+
z->c = z->I[0]; /* tomark, line 76 */
|
271
|
+
m3 = z->lb; z->lb = z->c;
|
272
|
+
z->c = z->l - m;
|
273
|
+
z->ket = z->c; /* [, line 76 */
|
274
|
+
if (!(out_grouping_b(z, g_v, 97, 248))) { z->lb = m3; return 0; }
|
275
|
+
z->bra = z->c; /* ], line 76 */
|
276
|
+
z->S[0] = slice_to(z, z->S[0]); /* -> ch, line 76 */
|
277
|
+
if (z->S[0] == 0) return -1; /* -> ch, line 76 */
|
278
|
+
z->lb = m3;
|
279
|
+
}
|
280
|
+
if (!(eq_v_b(z, z->S[0]))) return 0; /* name ch, line 77 */
|
281
|
+
{ int ret;
|
282
|
+
ret = slice_del(z); /* delete, line 78 */
|
283
|
+
if (ret < 0) return ret;
|
284
|
+
}
|
285
|
+
return 1;
|
286
|
+
}
|
287
|
+
|
288
|
+
extern int danish_ISO_8859_1_stem(struct SN_env * z) {
|
289
|
+
{ int c = z->c; /* do, line 84 */
|
290
|
+
{ int ret = r_mark_regions(z);
|
291
|
+
if (ret == 0) goto lab0; /* call mark_regions, line 84 */
|
292
|
+
if (ret < 0) return ret;
|
293
|
+
}
|
294
|
+
lab0:
|
295
|
+
z->c = c;
|
296
|
+
}
|
297
|
+
z->lb = z->c; z->c = z->l; /* backwards, line 85 */
|
298
|
+
|
299
|
+
{ int m = z->l - z->c; (void) m; /* do, line 86 */
|
300
|
+
{ int ret = r_main_suffix(z);
|
301
|
+
if (ret == 0) goto lab1; /* call main_suffix, line 86 */
|
302
|
+
if (ret < 0) return ret;
|
303
|
+
}
|
304
|
+
lab1:
|
305
|
+
z->c = z->l - m;
|
306
|
+
}
|
307
|
+
{ int m = z->l - z->c; (void) m; /* do, line 87 */
|
308
|
+
{ int ret = r_consonant_pair(z);
|
309
|
+
if (ret == 0) goto lab2; /* call consonant_pair, line 87 */
|
310
|
+
if (ret < 0) return ret;
|
311
|
+
}
|
312
|
+
lab2:
|
313
|
+
z->c = z->l - m;
|
314
|
+
}
|
315
|
+
{ int m = z->l - z->c; (void) m; /* do, line 88 */
|
316
|
+
{ int ret = r_other_suffix(z);
|
317
|
+
if (ret == 0) goto lab3; /* call other_suffix, line 88 */
|
318
|
+
if (ret < 0) return ret;
|
319
|
+
}
|
320
|
+
lab3:
|
321
|
+
z->c = z->l - m;
|
322
|
+
}
|
323
|
+
{ int m = z->l - z->c; (void) m; /* do, line 89 */
|
324
|
+
{ int ret = r_undouble(z);
|
325
|
+
if (ret == 0) goto lab4; /* call undouble, line 89 */
|
326
|
+
if (ret < 0) return ret;
|
327
|
+
}
|
328
|
+
lab4:
|
329
|
+
z->c = z->l - m;
|
330
|
+
}
|
331
|
+
z->c = z->lb;
|
332
|
+
return 1;
|
333
|
+
}
|
334
|
+
|
335
|
+
extern struct SN_env * danish_ISO_8859_1_create_env(void) { return SN_create_env(1, 2, 0); }
|
336
|
+
|
337
|
+
extern void danish_ISO_8859_1_close_env(struct SN_env * z) { SN_close_env(z); }
|
338
|
+
|