isomorfeus-ferret 0.17.2 → 0.17.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (133) hide show
  1. checksums.yaml +4 -4
  2. data/ext/isomorfeus_ferret_ext/benchmark.c +9 -20
  3. data/ext/isomorfeus_ferret_ext/benchmarks_all.h +1 -2
  4. data/ext/isomorfeus_ferret_ext/bm_hash.c +1 -2
  5. data/ext/isomorfeus_ferret_ext/brotli_dec_decode.c +4 -2
  6. data/ext/isomorfeus_ferret_ext/brotli_enc_encode.c +3 -2
  7. data/ext/isomorfeus_ferret_ext/frb_analysis.c +4 -5
  8. data/ext/isomorfeus_ferret_ext/frb_field_info.c +3 -4
  9. data/ext/isomorfeus_ferret_ext/frb_index.c +161 -187
  10. data/ext/isomorfeus_ferret_ext/frb_lazy_doc.c +14 -16
  11. data/ext/isomorfeus_ferret_ext/frb_search.c +77 -69
  12. data/ext/isomorfeus_ferret_ext/frb_store.c +27 -13
  13. data/ext/isomorfeus_ferret_ext/frb_utils.c +3 -6
  14. data/ext/isomorfeus_ferret_ext/frt_analysis.c +39 -46
  15. data/ext/isomorfeus_ferret_ext/frt_analysis.h +9 -9
  16. data/ext/isomorfeus_ferret_ext/frt_array.c +11 -22
  17. data/ext/isomorfeus_ferret_ext/frt_bitvector.h +3 -6
  18. data/ext/isomorfeus_ferret_ext/frt_doc_field.c +87 -0
  19. data/ext/isomorfeus_ferret_ext/frt_doc_field.h +26 -0
  20. data/ext/isomorfeus_ferret_ext/frt_document.c +4 -97
  21. data/ext/isomorfeus_ferret_ext/frt_document.h +5 -33
  22. data/ext/isomorfeus_ferret_ext/frt_except.c +8 -6
  23. data/ext/isomorfeus_ferret_ext/frt_except.h +1 -2
  24. data/ext/isomorfeus_ferret_ext/frt_field_index.c +14 -33
  25. data/ext/isomorfeus_ferret_ext/frt_field_index.h +0 -6
  26. data/ext/isomorfeus_ferret_ext/frt_field_info.c +69 -0
  27. data/ext/isomorfeus_ferret_ext/frt_field_info.h +49 -0
  28. data/ext/isomorfeus_ferret_ext/frt_field_infos.c +196 -0
  29. data/ext/isomorfeus_ferret_ext/frt_field_infos.h +35 -0
  30. data/ext/isomorfeus_ferret_ext/frt_filter.c +2 -2
  31. data/ext/isomorfeus_ferret_ext/frt_global.c +10 -4
  32. data/ext/isomorfeus_ferret_ext/frt_global.h +11 -15
  33. data/ext/isomorfeus_ferret_ext/frt_hash.c +8 -8
  34. data/ext/isomorfeus_ferret_ext/frt_hash.h +1 -2
  35. data/ext/isomorfeus_ferret_ext/frt_hashset.c +20 -40
  36. data/ext/isomorfeus_ferret_ext/frt_hashset.h +1 -2
  37. data/ext/isomorfeus_ferret_ext/frt_helper.c +7 -15
  38. data/ext/isomorfeus_ferret_ext/frt_in_stream.c +35 -45
  39. data/ext/isomorfeus_ferret_ext/frt_in_stream.h +3 -2
  40. data/ext/isomorfeus_ferret_ext/frt_ind.c +21 -39
  41. data/ext/isomorfeus_ferret_ext/frt_ind.h +1 -1
  42. data/ext/isomorfeus_ferret_ext/frt_index.c +334 -848
  43. data/ext/isomorfeus_ferret_ext/frt_index.h +4 -105
  44. data/ext/isomorfeus_ferret_ext/frt_lang.c +5 -10
  45. data/ext/isomorfeus_ferret_ext/frt_lazy_doc.c +3 -3
  46. data/ext/isomorfeus_ferret_ext/frt_lazy_doc.h +1 -1
  47. data/ext/isomorfeus_ferret_ext/frt_lazy_doc_field.c +18 -25
  48. data/ext/isomorfeus_ferret_ext/frt_lazy_doc_field.h +5 -5
  49. data/ext/isomorfeus_ferret_ext/frt_mdbx_store.c +102 -70
  50. data/ext/isomorfeus_ferret_ext/frt_mempool.c +8 -16
  51. data/ext/isomorfeus_ferret_ext/frt_multimapper.c +23 -46
  52. data/ext/isomorfeus_ferret_ext/frt_multimapper.h +4 -8
  53. data/ext/isomorfeus_ferret_ext/frt_out_stream.c +31 -43
  54. data/ext/isomorfeus_ferret_ext/frt_out_stream.h +2 -2
  55. data/ext/isomorfeus_ferret_ext/frt_posh.c +6 -819
  56. data/ext/isomorfeus_ferret_ext/frt_posh.h +0 -57
  57. data/ext/isomorfeus_ferret_ext/frt_priorityqueue.c +11 -22
  58. data/ext/isomorfeus_ferret_ext/frt_priorityqueue.h +1 -2
  59. data/ext/isomorfeus_ferret_ext/frt_q_boolean.c +131 -217
  60. data/ext/isomorfeus_ferret_ext/frt_q_const_score.c +2 -2
  61. data/ext/isomorfeus_ferret_ext/frt_q_filtered_query.c +4 -4
  62. data/ext/isomorfeus_ferret_ext/frt_q_match_all.c +18 -26
  63. data/ext/isomorfeus_ferret_ext/frt_q_multi_term.c +27 -28
  64. data/ext/isomorfeus_ferret_ext/frt_q_parser.c +49 -98
  65. data/ext/isomorfeus_ferret_ext/frt_q_phrase.c +64 -116
  66. data/ext/isomorfeus_ferret_ext/frt_q_range.c +8 -14
  67. data/ext/isomorfeus_ferret_ext/frt_q_span.c +251 -365
  68. data/ext/isomorfeus_ferret_ext/frt_q_term.c +9 -9
  69. data/ext/isomorfeus_ferret_ext/frt_q_wildcard.c +1 -2
  70. data/ext/isomorfeus_ferret_ext/frt_ram_store.c +134 -85
  71. data/ext/isomorfeus_ferret_ext/frt_search.c +109 -191
  72. data/ext/isomorfeus_ferret_ext/frt_search.h +6 -6
  73. data/ext/isomorfeus_ferret_ext/frt_similarity.c +12 -23
  74. data/ext/isomorfeus_ferret_ext/frt_similarity.h +1 -2
  75. data/ext/isomorfeus_ferret_ext/frt_sort.c +20 -20
  76. data/ext/isomorfeus_ferret_ext/frt_store.c +13 -25
  77. data/ext/isomorfeus_ferret_ext/frt_store.h +86 -52
  78. data/ext/isomorfeus_ferret_ext/frt_term_vectors.c +8 -16
  79. data/ext/isomorfeus_ferret_ext/frt_win32.h +5 -10
  80. data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.c +12 -11
  81. data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.h +11 -13
  82. data/ext/isomorfeus_ferret_ext/lz4.c +422 -195
  83. data/ext/isomorfeus_ferret_ext/lz4.h +114 -46
  84. data/ext/isomorfeus_ferret_ext/lz4frame.c +421 -242
  85. data/ext/isomorfeus_ferret_ext/lz4frame.h +122 -53
  86. data/ext/isomorfeus_ferret_ext/lz4hc.c +127 -111
  87. data/ext/isomorfeus_ferret_ext/lz4hc.h +14 -14
  88. data/ext/isomorfeus_ferret_ext/lz4xxhash.h +1 -1
  89. data/ext/isomorfeus_ferret_ext/mdbx.c +3762 -2526
  90. data/ext/isomorfeus_ferret_ext/mdbx.h +115 -70
  91. data/ext/isomorfeus_ferret_ext/test.c +41 -88
  92. data/ext/isomorfeus_ferret_ext/test.h +3 -6
  93. data/ext/isomorfeus_ferret_ext/test_1710.c +11 -13
  94. data/ext/isomorfeus_ferret_ext/test_analysis.c +32 -64
  95. data/ext/isomorfeus_ferret_ext/test_array.c +6 -12
  96. data/ext/isomorfeus_ferret_ext/test_bitvector.c +12 -24
  97. data/ext/isomorfeus_ferret_ext/test_document.c +23 -33
  98. data/ext/isomorfeus_ferret_ext/test_except.c +10 -21
  99. data/ext/isomorfeus_ferret_ext/test_fields.c +62 -68
  100. data/ext/isomorfeus_ferret_ext/test_file_deleter.c +16 -25
  101. data/ext/isomorfeus_ferret_ext/test_filter.c +22 -33
  102. data/ext/isomorfeus_ferret_ext/test_global.c +14 -29
  103. data/ext/isomorfeus_ferret_ext/test_hash.c +19 -38
  104. data/ext/isomorfeus_ferret_ext/test_hashset.c +8 -16
  105. data/ext/isomorfeus_ferret_ext/test_helper.c +4 -8
  106. data/ext/isomorfeus_ferret_ext/test_highlighter.c +16 -28
  107. data/ext/isomorfeus_ferret_ext/test_index.c +307 -519
  108. data/ext/isomorfeus_ferret_ext/test_lang.c +7 -14
  109. data/ext/isomorfeus_ferret_ext/test_mdbx_store.c +2 -5
  110. data/ext/isomorfeus_ferret_ext/test_mempool.c +5 -10
  111. data/ext/isomorfeus_ferret_ext/test_multimapper.c +3 -6
  112. data/ext/isomorfeus_ferret_ext/test_priorityqueue.c +9 -18
  113. data/ext/isomorfeus_ferret_ext/test_q_const_score.c +4 -6
  114. data/ext/isomorfeus_ferret_ext/test_q_filtered.c +3 -4
  115. data/ext/isomorfeus_ferret_ext/test_q_fuzzy.c +9 -15
  116. data/ext/isomorfeus_ferret_ext/test_q_parser.c +8 -16
  117. data/ext/isomorfeus_ferret_ext/test_q_span.c +19 -35
  118. data/ext/isomorfeus_ferret_ext/test_ram_store.c +14 -13
  119. data/ext/isomorfeus_ferret_ext/test_search.c +66 -115
  120. data/ext/isomorfeus_ferret_ext/test_segments.c +8 -13
  121. data/ext/isomorfeus_ferret_ext/test_similarity.c +2 -4
  122. data/ext/isomorfeus_ferret_ext/test_sort.c +17 -27
  123. data/ext/isomorfeus_ferret_ext/test_store.c +96 -115
  124. data/ext/isomorfeus_ferret_ext/test_term.c +9 -15
  125. data/ext/isomorfeus_ferret_ext/test_term_vectors.c +9 -14
  126. data/ext/isomorfeus_ferret_ext/test_test.c +4 -8
  127. data/ext/isomorfeus_ferret_ext/test_threading.c +15 -21
  128. data/ext/isomorfeus_ferret_ext/testhelper.c +11 -21
  129. data/ext/isomorfeus_ferret_ext/testhelper.h +1 -1
  130. data/ext/isomorfeus_ferret_ext/tests_all.h +1 -2
  131. data/lib/isomorfeus/ferret/index/index.rb +8 -8
  132. data/lib/isomorfeus/ferret/version.rb +1 -1
  133. metadata +32 -6
@@ -0,0 +1,26 @@
1
+ #ifndef FRT_DOC_FIELD_H
2
+ #define FRT_DOC_FIELD_H
3
+
4
+ #include <ruby/encoding.h>
5
+ #include "frt_hash.h"
6
+
7
+ #define FRT_DF_INIT_CAPA 1
8
+
9
+ typedef struct FrtDocField {
10
+ ID name;
11
+ int size;
12
+ int capa;
13
+ int *lengths;
14
+ rb_encoding **encodings; /* used for processing */
15
+ const char **data;
16
+ float boost;
17
+ FrtCompressionType compression_type;
18
+ } FrtDocField;
19
+
20
+ extern FrtDocField *frt_df_new(ID name);
21
+ extern FrtDocField *frt_df_add_data(FrtDocField *df, const char *data, rb_encoding *encoding);
22
+ extern FrtDocField *frt_df_add_data_len(FrtDocField *df, const char *data, int len, rb_encoding *encoding);
23
+ extern void frt_df_destroy(FrtDocField *df);
24
+ extern char *frt_df_to_s(FrtDocField *df);
25
+
26
+ #endif
@@ -1,103 +1,10 @@
1
1
  #include "frt_document.h"
2
2
  #include <string.h>
3
3
 
4
- /****************************************************************************
5
- *
6
- * FrtDocField
7
- *
8
- ****************************************************************************/
9
-
10
- FrtDocField *frt_df_new(ID name) {
11
- FrtDocField *df = FRT_ALLOC(FrtDocField);
12
- df->name = name;
13
- df->size = 0;
14
- df->capa = FRT_DF_INIT_CAPA;
15
- df->data = FRT_ALLOC_N(char *, df->capa);
16
- df->lengths = FRT_ALLOC_N(int, df->capa);
17
- df->encodings = FRT_ALLOC_N(rb_encoding *, df->capa);
18
- df->destroy_data = false;
19
- df->boost = 1.0f;
20
- return df;
21
- }
22
-
23
- FrtDocField *frt_df_add_data_len(FrtDocField *df, char *data, int len, rb_encoding *encoding) {
24
- if (df->size >= df->capa) {
25
- df->capa <<= 2;
26
- FRT_REALLOC_N(df->data, char *, df->capa);
27
- FRT_REALLOC_N(df->lengths, int, df->capa);
28
- FRT_REALLOC_N(df->encodings, rb_encoding *, df->capa);
29
- }
30
- df->data[df->size] = data;
31
- df->lengths[df->size] = len;
32
- df->encodings[df->size] = encoding;
33
- df->size++;
34
- return df;
35
- }
36
-
37
- FrtDocField *frt_df_add_data(FrtDocField *df, char *data, rb_encoding *encoding) {
38
- return frt_df_add_data_len(df, data, strlen(data), encoding);
39
- }
40
-
41
- void frt_df_destroy(FrtDocField *df) {
42
- if (df->destroy_data) {
43
- int i;
44
- for (i = 0; i < df->size; i++) {
45
- free(df->data[i]);
46
- }
47
- }
48
- free(df->data);
49
- free(df->lengths);
50
- free(df->encodings);
51
- free(df);
52
- }
53
-
54
- /*
55
- * Format for one item is: name: "data"
56
- * for more items : name: ["data", "data", "data"]
57
- * internally used for testing, thus encoding can be ignored
58
- */
59
- char *frt_df_to_s(FrtDocField *df) {
60
- const char *df_name = rb_id2name(df->name);
61
- int i, len = 0, namelen = strlen(df_name);
62
- char *str, *s;
63
- for (i = 0; i < df->size; i++) {
64
- len += df->lengths[i] + 4;
65
- }
66
- s = str = FRT_ALLOC_N(char, namelen + len + 5);
67
- memcpy(s, df_name, namelen);
68
- s += namelen;
69
- s = frt_strapp(s, ": ");
70
-
71
- if (df->size > 1) {
72
- s = frt_strapp(s, "[");
73
- }
74
- for (i = 0; i < df->size; i++) {
75
- if (i != 0) {
76
- s = frt_strapp(s, ", ");
77
- }
78
- s = frt_strapp(s, "\"");
79
- memcpy(s, df->data[i], df->lengths[i]);
80
- s += df->lengths[i];
81
- s = frt_strapp(s, "\"");
82
- }
83
-
84
- if (df->size > 1) {
85
- s = frt_strapp(s, "]");
86
- }
87
- *s = 0;
88
- return str;
89
- }
90
-
91
- /****************************************************************************
92
- *
93
- * FrtDocument
94
- *
95
- ****************************************************************************/
96
-
97
4
  FrtDocument *frt_doc_new(void) {
98
5
  FrtDocument *doc = FRT_ALLOC(FrtDocument);
99
6
  doc->field_dict = frt_h_new_ptr((frt_free_ft)&frt_df_destroy);
100
- doc->size = 0;
7
+ doc->field_count = 0;
101
8
  doc->capa = FRT_DOC_INIT_CAPA;
102
9
  doc->fields = FRT_ALLOC_N(FrtDocField *, doc->capa);
103
10
  doc->boost = 1.0f;
@@ -109,12 +16,12 @@ FrtDocField *frt_doc_add_field(FrtDocument *doc, FrtDocField *df) {
109
16
  FRT_RAISE(FRT_EXCEPTION, "tried to add %s field which alread existed\n",
110
17
  rb_id2name(df->name));
111
18
  }
112
- if (doc->size >= doc->capa) {
19
+ if (doc->field_count >= doc->capa) {
113
20
  doc->capa <<= 1;
114
21
  FRT_REALLOC_N(doc->fields, FrtDocField *, doc->capa);
115
22
  }
116
- doc->fields[doc->size] = df;
117
- doc->size++;
23
+ doc->fields[doc->field_count] = df;
24
+ doc->field_count++;
118
25
  return df;
119
26
  }
120
27
 
@@ -2,44 +2,16 @@
2
2
  #define FRT_DOCUMENT_H
3
3
 
4
4
  #include "frt_global.h"
5
- #include "frt_hash.h"
5
+ #include "frt_doc_field.h"
6
6
  #include <ruby/encoding.h>
7
7
 
8
- /****************************************************************************
9
- *
10
- * FrtDocField
11
- *
12
- ****************************************************************************/
13
-
14
- #define FRT_DF_INIT_CAPA 1
15
- typedef struct FrtDocField {
16
- ID name;
17
- int size;
18
- int capa;
19
- int *lengths;
20
- rb_encoding **encodings; /* used for processing */
21
- char **data;
22
- float boost;
23
- FrtCompressionType compression;
24
- bool destroy_data : 1;
25
- } FrtDocField;
26
-
27
- extern FrtDocField *frt_df_new(ID name);
28
- extern FrtDocField *frt_df_add_data(FrtDocField *df, char *data, rb_encoding *encoding);
29
- extern FrtDocField *frt_df_add_data_len(FrtDocField *df, char *data, int len, rb_encoding *encoding);
30
- extern void frt_df_destroy(FrtDocField *df);
31
- extern char *frt_df_to_s(FrtDocField *df);
32
-
33
- /****************************************************************************
34
- *
35
- * FrtDocument
36
- *
37
- ****************************************************************************/
38
-
39
8
  #define FRT_DOC_INIT_CAPA 8
9
+
40
10
  typedef struct FrtDocument {
11
+ // frt_uchar ulid[16];
12
+ // char *ulid_c;
41
13
  FrtHash *field_dict;
42
- int size;
14
+ int field_count;
43
15
  int capa;
44
16
  FrtDocField **fields;
45
17
  float boost;
@@ -1,3 +1,8 @@
1
+ /* prevent warning: #warning Please include winsock2.h before windows.h [-Wcpp] */
2
+ #ifdef _WIN32
3
+ # include <winsock2.h>
4
+ #endif
5
+
1
6
  #include <stdarg.h>
2
7
  #include "bzlib.h"
3
8
  #include "frt_global.h"
@@ -61,11 +66,9 @@ void frt_xraise(int excode, const char *const msg) {
61
66
 
62
67
  if (!top_context) {
63
68
  FRT_XEXIT(ERROR_TYPES[excode], msg);
64
- }
65
- else if (!top_context->in_finally) {
69
+ } else if (!top_context->in_finally) {
66
70
  frt_xraise_context(top_context, excode, msg);
67
- }
68
- else if (top_context->handled) {
71
+ } else if (top_context->handled) {
69
72
  top_context->msg = msg;
70
73
  top_context->excode = excode;
71
74
  top_context->handled = false;
@@ -122,8 +125,7 @@ void frt_xpop_context(void) {
122
125
  if (!top_cxt->handled) {
123
126
  if (context) {
124
127
  frt_xraise_context(context, top_cxt->excode, top_cxt->msg);
125
- }
126
- else {
128
+ } else {
127
129
  FRT_XEXIT(ERROR_TYPES[top_cxt->excode], top_cxt->msg);
128
130
  }
129
131
  }
@@ -112,8 +112,7 @@ extern const char *frt_err_code_to_type(const int err_code);
112
112
 
113
113
  extern void frb_rb_raise(const char *file, int line_num, const char *func, const char *err_type, const char *fmt, ...);
114
114
 
115
- typedef struct frt_xcontext_t
116
- {
115
+ typedef struct frt_xcontext_t {
117
116
  jmp_buf jbuf;
118
117
  struct frt_xcontext_t *next;
119
118
  const char *msg;
@@ -9,22 +9,19 @@
9
9
  *
10
10
  ***************************************************************************/
11
11
 
12
- static unsigned long long field_index_hash(const void *p)
13
- {
12
+ static unsigned long field_index_hash(const void *p) {
14
13
  FrtFieldIndex *self = (FrtFieldIndex *)p;
15
14
  return frt_str_hash(rb_id2name(self->field)) ^ (unsigned long long)(self->klass);
16
15
  }
17
16
 
18
- static int field_index_eq(const void *p1, const void *p2)
19
- {
17
+ static int field_index_eq(const void *p1, const void *p2) {
20
18
  FrtFieldIndex *fi1 = (FrtFieldIndex *)p1;
21
19
  FrtFieldIndex *fi2 = (FrtFieldIndex *)p2;
22
20
  return (fi1->field == fi2->field) &&
23
21
  (fi1->klass->type == fi2->klass->type);
24
22
  }
25
23
 
26
- static void field_index_destroy(void *p)
27
- {
24
+ static void field_index_destroy(void *p) {
28
25
  FrtFieldIndex *self = (FrtFieldIndex *)p;
29
26
  if (self->index) {
30
27
  self->klass->destroy_index(self->index);
@@ -63,7 +60,7 @@ FrtFieldIndex *frt_field_index_get(FrtIndexReader *ir, ID field, const FrtFieldI
63
60
  * just use the field_infos field symbol */
64
61
  self->field = fi->name;
65
62
 
66
- length = ir->max_doc(ir);
63
+ length = ir->max_doc_num(ir);
67
64
  if (length > 0) {
68
65
  FRT_TRY
69
66
  {
@@ -95,10 +92,7 @@ FrtFieldIndex *frt_field_index_get(FrtIndexReader *ir, ID field, const FrtFieldI
95
92
  * index should only be used for sorting and not as a field cache of the
96
93
  * column's value.
97
94
  ******************************************************************************/
98
- static void byte_handle_term(void *index_ptr,
99
- FrtTermDocEnum *tde,
100
- const char *text)
101
- {
95
+ static void byte_handle_term(void *index_ptr, FrtTermDocEnum *tde, const char *text) {
102
96
  long *index = (long *)index_ptr;
103
97
  long val = index[-1]++;
104
98
  (void)text;
@@ -107,15 +101,13 @@ static void byte_handle_term(void *index_ptr,
107
101
  }
108
102
  }
109
103
 
110
- static void *byte_create_index(int size)
111
- {
104
+ static void *byte_create_index(int size) {
112
105
  long *index = FRT_ALLOC_AND_ZERO_N(long, size + 1);
113
106
  index[0] = 1;
114
107
  return &index[1];
115
108
  }
116
109
 
117
- static void byte_destroy_index(void *p)
118
- {
110
+ static void byte_destroy_index(void *p) {
119
111
  long *index = (long *)p;
120
112
  free(&index[-1]);
121
113
  }
@@ -130,15 +122,11 @@ const FrtFieldIndexClass FRT_BYTE_FIELD_INDEX_CLASS = {
130
122
  /******************************************************************************
131
123
  * IntegerFieldIndex < FieldIndex
132
124
  ******************************************************************************/
133
- static void *integer_create_index(int size)
134
- {
125
+ static void *integer_create_index(int size) {
135
126
  return FRT_ALLOC_AND_ZERO_N(long, size);
136
127
  }
137
128
 
138
- static void integer_handle_term(void *index_ptr,
139
- FrtTermDocEnum *tde,
140
- const char *text)
141
- {
129
+ static void integer_handle_term(void *index_ptr, FrtTermDocEnum *tde, const char *text) {
142
130
  long *index = (long *)index_ptr;
143
131
  long val;
144
132
  sscanf(text, "%ld", &val);
@@ -158,15 +146,11 @@ const FrtFieldIndexClass FRT_INTEGER_FIELD_INDEX_CLASS = {
158
146
  * FloatFieldIndex < FieldIndex
159
147
  ******************************************************************************/
160
148
  #define VALUES_ARRAY_START_SIZE 8
161
- static void *float_create_index(int size)
162
- {
149
+ static void *float_create_index(int size) {
163
150
  return FRT_ALLOC_AND_ZERO_N(float, size);
164
151
  }
165
152
 
166
- static void float_handle_term(void *index_ptr,
167
- FrtTermDocEnum *tde,
168
- const char *text)
169
- {
153
+ static void float_handle_term(void *index_ptr, FrtTermDocEnum *tde, const char *text) {
170
154
  float *index = (float *)index_ptr;
171
155
  float val;
172
156
  sscanf(text, "%g", &val);
@@ -186,8 +170,7 @@ const FrtFieldIndexClass FRT_FLOAT_FIELD_INDEX_CLASS = {
186
170
  * StringFieldIndex < FieldIndex
187
171
  ******************************************************************************/
188
172
 
189
- static void *string_create_index(int size)
190
- {
173
+ static void *string_create_index(int size) {
191
174
  FrtStringIndex *self = FRT_ALLOC_AND_ZERO(FrtStringIndex);
192
175
  self->size = size;
193
176
  self->index = FRT_ALLOC_AND_ZERO_N(long, size);
@@ -197,8 +180,7 @@ static void *string_create_index(int size)
197
180
  return self;
198
181
  }
199
182
 
200
- static void string_destroy_index(void *p)
201
- {
183
+ static void string_destroy_index(void *p) {
202
184
  FrtStringIndex *self = (FrtStringIndex *)p;
203
185
  int i;
204
186
  free(self->index);
@@ -211,8 +193,7 @@ static void string_destroy_index(void *p)
211
193
 
212
194
  static void string_handle_term(void *index_ptr,
213
195
  FrtTermDocEnum *tde,
214
- const char *text)
215
- {
196
+ const char *text) {
216
197
  FrtStringIndex *index = (FrtStringIndex *)index_ptr;
217
198
  if (index->v_size >= index->v_capa) {
218
199
  index->v_capa *= 2;
@@ -3,12 +3,6 @@
3
3
 
4
4
  #include "frt_index.h"
5
5
 
6
- /***************************************************************************
7
- *
8
- * FrtFieldIndex
9
- *
10
- ***************************************************************************/
11
-
12
6
  typedef struct FrtStringIndex {
13
7
  int size;
14
8
  long *index;
@@ -0,0 +1,69 @@
1
+ #include "frt_field_info.h"
2
+
3
+ FrtFieldInfo *frt_fi_alloc(void) {
4
+ return FRT_ALLOC(FrtFieldInfo);
5
+ }
6
+
7
+ FrtFieldInfo *frt_fi_init(FrtFieldInfo *fi, ID name, unsigned int bits) {
8
+ assert(NULL != name);
9
+ bits_check(bits);
10
+ fi->name = name;
11
+ fi->boost = 1.0f;
12
+ fi->bits = bits;
13
+ fi->number = 0;
14
+ fi->ref_cnt = 1;
15
+ fi->rfi = Qnil;
16
+ return fi;
17
+ }
18
+
19
+ FrtFieldInfo *frt_fi_new(ID name, unsigned int bits) {
20
+ FrtFieldInfo *fi = frt_fi_alloc();
21
+ return frt_fi_init(fi, name, bits);
22
+ }
23
+
24
+ void frt_fi_deref(FrtFieldInfo *fi) {
25
+ if (FRT_DEREF(fi) == 0) free(fi);
26
+ }
27
+
28
+ void bits_check(unsigned int bits) {
29
+ if (!bits_is_indexed(bits) && bits_store_term_vector(bits)) {
30
+ FRT_RAISE(FRT_ARG_ERROR, "You can't store the term vectors of an unindexed field.");
31
+ }
32
+ if (bits_is_compressed(bits) && !bits_is_stored(bits)) {
33
+ FRT_RAISE(FRT_ARG_ERROR, "Field must be stored for compression to be useful.");
34
+ }
35
+ }
36
+
37
+ FrtCompressionType bits_get_compression_type(unsigned int bits) {
38
+ if (bits_is_compressed_brotli(bits)) {
39
+ return FRT_COMPRESSION_BROTLI;
40
+ } else if (bits_is_compressed_bz2(bits)) {
41
+ return FRT_COMPRESSION_BZ2;
42
+ } else if (bits_is_compressed_lz4(bits)) {
43
+ return FRT_COMPRESSION_LZ4;
44
+ } else {
45
+ return FRT_COMPRESSION_NONE;
46
+ }
47
+ }
48
+
49
+ char *frt_fi_to_s(FrtFieldInfo *fi) {
50
+ const char *fi_name = rb_id2name(fi->name);
51
+ char *str = FRT_ALLOC_N(char, strlen(fi_name) + 200);
52
+ char *s = str;
53
+ s += sprintf(str, "[\"%s\":(%s%s%s%s%s%s%s%s", fi_name,
54
+ bits_is_stored(fi->bits) ? "is_stored, " : "",
55
+ bits_is_compressed(fi->bits) ? "is_compressed, " : "",
56
+ bits_is_indexed(fi->bits) ? "is_indexed, " : "",
57
+ bits_is_tokenized(fi->bits) ? "is_tokenized, " : "",
58
+ bits_omit_norms(fi->bits) ? "omit_norms, " : "",
59
+ bits_store_term_vector(fi->bits) ? "store_term_vector, " : "",
60
+ bits_store_positions(fi->bits) ? "store_positions, " : "",
61
+ bits_store_offsets(fi->bits) ? "store_offsets, " : "");
62
+ s -= 2;
63
+ if (*s != ',') {
64
+ s += 2;
65
+ }
66
+
67
+ sprintf(s, ")]");
68
+ return str;
69
+ }
@@ -0,0 +1,49 @@
1
+ #ifndef FRT_FIELD_INFO_H
2
+ #define FRT_FIELD_INFO_H
3
+
4
+ #include "frt_global.h"
5
+ #include <ruby.h>
6
+
7
+ #define FRT_FI_DEFAULTS_BM FRT_FI_IS_STORED_BM | FRT_FI_IS_INDEXED_BM | FRT_FI_IS_TOKENIZED_BM | FRT_FI_STORE_TERM_VECTOR_BM | FRT_FI_STORE_POSITIONS_BM | FRT_FI_STORE_OFFSETS_BM
8
+ #define FRT_FI_IS_STORED_BM 0x001
9
+ #define FRT_FI_IS_INDEXED_BM 0x002
10
+ #define FRT_FI_IS_TOKENIZED_BM 0x004
11
+ #define FRT_FI_OMIT_NORMS_BM 0x008
12
+ #define FRT_FI_STORE_TERM_VECTOR_BM 0x010
13
+ #define FRT_FI_STORE_POSITIONS_BM 0x020
14
+ #define FRT_FI_STORE_OFFSETS_BM 0x040
15
+ #define FRT_FI_COMPRESSION_BROTLI_BM 0x080
16
+ #define FRT_FI_COMPRESSION_BZ2_BM 0x100
17
+ #define FRT_FI_COMPRESSION_LZ4_BM 0x200
18
+
19
+ typedef struct FrtFieldInfo {
20
+ ID name;
21
+ float boost;
22
+ unsigned int bits;
23
+ int number;
24
+ _Atomic unsigned int ref_cnt;
25
+ VALUE rfi;
26
+ } FrtFieldInfo;
27
+
28
+ extern FrtFieldInfo *frt_fi_alloc();
29
+ extern FrtFieldInfo *frt_fi_init(FrtFieldInfo *fi, ID name, unsigned int bits);
30
+ extern FrtFieldInfo *frt_fi_new(ID name, unsigned int bits);
31
+ extern char *frt_fi_to_s(FrtFieldInfo *fi);
32
+ extern void frt_fi_deref(FrtFieldInfo *fi);
33
+
34
+ extern void bits_check(unsigned int bits);
35
+ extern FrtCompressionType bits_get_compression_type(unsigned int bits);
36
+ #define bits_is_stored(bits) ((bits & FRT_FI_IS_STORED_BM) != 0)
37
+ #define bits_is_indexed(bits) ((bits & FRT_FI_IS_INDEXED_BM) != 0)
38
+ #define bits_is_tokenized(bits) ((bits & FRT_FI_IS_TOKENIZED_BM) != 0)
39
+ #define bits_omit_norms(bits) ((bits & FRT_FI_OMIT_NORMS_BM) != 0)
40
+ #define bits_store_term_vector(bits) ((bits & FRT_FI_STORE_TERM_VECTOR_BM) != 0)
41
+ #define bits_store_positions(bits) ((bits & FRT_FI_STORE_POSITIONS_BM) != 0)
42
+ #define bits_store_offsets(bits) ((bits & FRT_FI_STORE_OFFSETS_BM) != 0)
43
+ #define bits_has_norms(bits) ((bits & (FRT_FI_OMIT_NORMS_BM|FRT_FI_IS_INDEXED_BM)) == FRT_FI_IS_INDEXED_BM)
44
+ #define bits_is_compressed_brotli(bits) ((bits & FRT_FI_COMPRESSION_BROTLI_BM) != 0)
45
+ #define bits_is_compressed_bz2(bits) ((bits & FRT_FI_COMPRESSION_BZ2_BM) != 0)
46
+ #define bits_is_compressed_lz4(bits) ((bits & FRT_FI_COMPRESSION_LZ4_BM) != 0)
47
+ #define bits_is_compressed(bits) (bits_is_compressed_brotli(bits) || bits_is_compressed_bz2(bits) || bits_is_compressed_lz4(bits))
48
+
49
+ #endif
@@ -0,0 +1,196 @@
1
+ #include "frt_field_infos.h"
2
+ #include "frt_except.h"
3
+
4
+ FrtFieldInfos *frt_fis_alloc(void) {
5
+ return FRT_ALLOC(FrtFieldInfos);
6
+ }
7
+
8
+ FrtFieldInfos *frt_fis_init(FrtFieldInfos *fis, unsigned int bits) {
9
+ bits_check(bits);
10
+ fis->field_dict = frt_h_new_ptr((frt_free_ft)&frt_fi_deref);
11
+ fis->size = 0;
12
+ fis->capa = FIELD_INFOS_INIT_CAPA;
13
+ fis->fields = FRT_ALLOC_N(FrtFieldInfo *, fis->capa);
14
+ fis->bits = bits;
15
+ fis->ref_cnt = 1;
16
+ fis->rfis = Qnil;
17
+ return fis;
18
+ }
19
+
20
+ FrtFieldInfos *frt_fis_new(unsigned int bits) {
21
+ FrtFieldInfos *fis = frt_fis_alloc();
22
+ return frt_fis_init(fis, bits);
23
+ }
24
+
25
+ FrtFieldInfo *frt_fis_add_field(FrtFieldInfos *fis, FrtFieldInfo *fi) {
26
+ if (fis->size == fis->capa) {
27
+ fis->capa <<= 1;
28
+ FRT_REALLOC_N(fis->fields, FrtFieldInfo *, fis->capa);
29
+ }
30
+ if (!frt_h_set_safe(fis->field_dict, (void *)fi->name, fi)) {
31
+ FRT_RAISE(FRT_ARG_ERROR, "Field :%s already exists", rb_id2name(fi->name));
32
+ }
33
+ FRT_REF(fi);
34
+ fi->number = fis->size;
35
+ fis->fields[fis->size] = fi;
36
+ fis->size++;
37
+ return fi;
38
+ }
39
+
40
+ FrtFieldInfo *frt_fis_get_field(FrtFieldInfos *fis, ID name) {
41
+ return (FrtFieldInfo *)frt_h_get(fis->field_dict, (void *)name);
42
+ }
43
+
44
+ int frt_fis_get_field_num(FrtFieldInfos *fis, ID name) {
45
+ FrtFieldInfo *fi = (FrtFieldInfo *)frt_h_get(fis->field_dict, (void *)name);
46
+ if (fi) { return fi->number; }
47
+ else { return -1; }
48
+ }
49
+
50
+ FrtFieldInfo *frt_fis_get_or_add_field(FrtFieldInfos *fis, ID name) {
51
+ FrtFieldInfo *fi = (FrtFieldInfo *)frt_h_get(fis->field_dict, (void *)name);
52
+ if (!fi) {
53
+ fi = (FrtFieldInfo*)frt_fi_new(name, fis->bits);
54
+ frt_fis_add_field(fis, fi);
55
+ }
56
+ return fi;
57
+ }
58
+
59
+ bool frt_fis_has_vectors(FrtFieldInfos *fis) {
60
+ int i;
61
+ const int fis_size = fis->size;
62
+
63
+ for (i = 0; i < fis_size; i++) {
64
+ if (bits_store_term_vector(fis->fields[i]->bits)) {
65
+ return true;
66
+ }
67
+ }
68
+ return false;
69
+ }
70
+
71
+ FrtFieldInfos *frt_fis_read(FrtInStream *is) {
72
+ FrtFieldInfos *volatile fis = NULL;
73
+ char *field_name;
74
+ FRT_TRY
75
+ do {
76
+ volatile int i;
77
+ union { frt_u32 i; float f; } tmp;
78
+ FrtFieldInfo *volatile fi;
79
+ fis = frt_fis_new(frt_is_read_vint(is));
80
+ for (i = frt_is_read_vint(is); i > 0; i--) {
81
+ fi = FRT_ALLOC_AND_ZERO(FrtFieldInfo);
82
+ FRT_TRY
83
+ field_name = frt_is_read_string_safe(is);
84
+ fi->name = rb_intern(field_name);
85
+ free(field_name);
86
+ tmp.i = frt_is_read_u32(is);
87
+ fi->boost = tmp.f;
88
+ fi->bits = frt_is_read_vint(is);
89
+ FRT_XCATCHALL
90
+ free(fi);
91
+ FRT_XENDTRY
92
+ frt_fis_add_field(fis, fi);
93
+ fi->ref_cnt = 1;
94
+ }
95
+ } while (0);
96
+ FRT_XCATCHALL
97
+ frt_fis_deref(fis);
98
+ FRT_XENDTRY
99
+ return fis;
100
+ }
101
+
102
+ void frt_fis_write(FrtFieldInfos *fis, FrtOutStream *os) {
103
+ int i;
104
+ union { frt_u32 i; float f; } tmp;
105
+ FrtFieldInfo *fi;
106
+ const int fis_size = fis->size;
107
+
108
+ frt_os_write_vint(os, fis->bits);
109
+ frt_os_write_vint(os, fis->size);
110
+
111
+ for (i = 0; i < fis_size; i++) {
112
+ fi = fis->fields[i];
113
+
114
+ frt_os_write_string(os, rb_id2name(fi->name));
115
+ tmp.f = fi->boost;
116
+ frt_os_write_u32(os, tmp.i);
117
+ frt_os_write_vint(os, fi->bits);
118
+ }
119
+ }
120
+
121
+ static const char *store_str[] = {
122
+ ":no",
123
+ ":yes",
124
+ ":compressed"
125
+ };
126
+
127
+ static const char *fi_store_str(FrtFieldInfo *fi) {
128
+ return store_str[bits_is_compressed(fi->bits) ? 2 : fi->bits & 0x1];
129
+ }
130
+
131
+ static const char *index_str[] = {
132
+ ":no",
133
+ ":untokenized",
134
+ "",
135
+ ":yes",
136
+ "",
137
+ ":untokenized_omit_norms",
138
+ "",
139
+ ":omit_norms"
140
+ };
141
+
142
+ static const char *fi_index_str(FrtFieldInfo *fi) {
143
+ return index_str[(fi->bits >> 1) & 0x7];
144
+ }
145
+
146
+ static const char *term_vector_str[] = {
147
+ ":no",
148
+ ":yes",
149
+ "",
150
+ ":with_positions",
151
+ "",
152
+ ":with_offsets",
153
+ "",
154
+ ":with_positions_offsets"
155
+ };
156
+
157
+ static const char *fi_term_vector_str(FrtFieldInfo *fi) {
158
+ return term_vector_str[(fi->bits >> 4) & 0x7];
159
+ }
160
+
161
+ char *frt_fis_to_s(FrtFieldInfos *fis) {
162
+ int i, pos, capa = 200 + fis->size * 120;
163
+ char *buf = FRT_ALLOC_N(char, capa);
164
+ FrtFieldInfo *fi;
165
+ const int fis_size = fis->size;
166
+
167
+ pos = sprintf(buf,
168
+ "default:\n"
169
+ " store: %s\n"
170
+ " index: %s\n"
171
+ " term_vector: %s\n"
172
+ "fields:\n",
173
+ store_str[bits_is_compressed(fis->bits) ? 2 : fis->bits & 0x1],
174
+ index_str[(fis->bits >> 1) & 0x7],
175
+ term_vector_str[(fis->bits >> 4) & 0x7]);
176
+ for (i = 0; i < fis_size; i++) {
177
+ fi = fis->fields[i];
178
+ pos += sprintf(buf + pos,
179
+ " %s:\n"
180
+ " boost: %f\n"
181
+ " store: %s\n"
182
+ " index: %s\n"
183
+ " term_vector: %s\n",
184
+ rb_id2name(fi->name), fi->boost, fi_store_str(fi),
185
+ fi_index_str(fi), fi_term_vector_str(fi));
186
+ }
187
+ return buf;
188
+ }
189
+
190
+ void frt_fis_deref(FrtFieldInfos *fis) {
191
+ if (FRT_DEREF(fis) == 0) {
192
+ frt_h_destroy(fis->field_dict);
193
+ free(fis->fields);
194
+ free(fis);
195
+ }
196
+ }