isomorfeus-ferret 0.12.7 → 0.13.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (164) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE +101 -19
  3. data/README.md +54 -1
  4. data/ext/isomorfeus_ferret_ext/bm_bitvector.c +22 -30
  5. data/ext/isomorfeus_ferret_ext/bm_hash.c +6 -12
  6. data/ext/isomorfeus_ferret_ext/bm_micro_string.c +3 -6
  7. data/ext/isomorfeus_ferret_ext/bm_store.c +11 -22
  8. data/ext/isomorfeus_ferret_ext/brotli_common_dictionary.c +1 -1
  9. data/ext/isomorfeus_ferret_ext/brotli_dec_decode.c +1 -1
  10. data/ext/isomorfeus_ferret_ext/bzip_blocksort.c +1094 -0
  11. data/ext/isomorfeus_ferret_ext/bzip_huffman.c +205 -0
  12. data/ext/isomorfeus_ferret_ext/bzlib.c +1572 -0
  13. data/ext/isomorfeus_ferret_ext/bzlib.h +282 -0
  14. data/ext/isomorfeus_ferret_ext/bzlib_compress.c +672 -0
  15. data/ext/isomorfeus_ferret_ext/bzlib_crctable.c +104 -0
  16. data/ext/isomorfeus_ferret_ext/bzlib_decompress.c +652 -0
  17. data/ext/isomorfeus_ferret_ext/bzlib_private.h +509 -0
  18. data/ext/isomorfeus_ferret_ext/bzlib_randtable.c +84 -0
  19. data/ext/isomorfeus_ferret_ext/fio_tmpfile.h +53 -53
  20. data/ext/isomorfeus_ferret_ext/frb_analysis.c +785 -1192
  21. data/ext/isomorfeus_ferret_ext/frb_index.c +492 -474
  22. data/ext/isomorfeus_ferret_ext/frb_qparser.c +48 -60
  23. data/ext/isomorfeus_ferret_ext/frb_search.c +1520 -1002
  24. data/ext/isomorfeus_ferret_ext/frb_store.c +96 -96
  25. data/ext/isomorfeus_ferret_ext/frb_threading.h +0 -1
  26. data/ext/isomorfeus_ferret_ext/frb_utils.c +147 -196
  27. data/ext/isomorfeus_ferret_ext/frt_analysis.c +695 -1090
  28. data/ext/isomorfeus_ferret_ext/frt_analysis.h +174 -170
  29. data/ext/isomorfeus_ferret_ext/frt_array.c +2 -4
  30. data/ext/isomorfeus_ferret_ext/frt_bitvector.c +9 -16
  31. data/ext/isomorfeus_ferret_ext/frt_bitvector.h +32 -81
  32. data/ext/isomorfeus_ferret_ext/frt_document.c +15 -20
  33. data/ext/isomorfeus_ferret_ext/frt_document.h +10 -10
  34. data/ext/isomorfeus_ferret_ext/frt_except.c +5 -12
  35. data/ext/isomorfeus_ferret_ext/frt_field_index.c +3 -3
  36. data/ext/isomorfeus_ferret_ext/frt_field_index.h +6 -7
  37. data/ext/isomorfeus_ferret_ext/frt_filter.c +35 -46
  38. data/ext/isomorfeus_ferret_ext/frt_fs_store.c +1 -0
  39. data/ext/isomorfeus_ferret_ext/frt_global.c +105 -63
  40. data/ext/isomorfeus_ferret_ext/frt_global.h +7 -3
  41. data/ext/isomorfeus_ferret_ext/frt_hash.c +1 -2
  42. data/ext/isomorfeus_ferret_ext/frt_ind.c +32 -35
  43. data/ext/isomorfeus_ferret_ext/frt_ind.h +9 -9
  44. data/ext/isomorfeus_ferret_ext/frt_index.c +580 -399
  45. data/ext/isomorfeus_ferret_ext/frt_index.h +272 -291
  46. data/ext/isomorfeus_ferret_ext/frt_mempool.c +1 -2
  47. data/ext/isomorfeus_ferret_ext/frt_multimapper.c +4 -7
  48. data/ext/isomorfeus_ferret_ext/frt_q_boolean.c +67 -91
  49. data/ext/isomorfeus_ferret_ext/frt_q_const_score.c +35 -38
  50. data/ext/isomorfeus_ferret_ext/frt_q_filtered_query.c +53 -72
  51. data/ext/isomorfeus_ferret_ext/frt_q_fuzzy.c +25 -32
  52. data/ext/isomorfeus_ferret_ext/frt_q_match_all.c +21 -23
  53. data/ext/isomorfeus_ferret_ext/frt_q_multi_term.c +66 -103
  54. data/ext/isomorfeus_ferret_ext/frt_q_parser.c +207 -195
  55. data/ext/isomorfeus_ferret_ext/frt_q_phrase.c +20 -16
  56. data/ext/isomorfeus_ferret_ext/frt_q_prefix.c +17 -14
  57. data/ext/isomorfeus_ferret_ext/frt_q_range.c +102 -131
  58. data/ext/isomorfeus_ferret_ext/frt_q_span.c +179 -178
  59. data/ext/isomorfeus_ferret_ext/frt_q_term.c +47 -60
  60. data/ext/isomorfeus_ferret_ext/frt_q_wildcard.c +18 -16
  61. data/ext/isomorfeus_ferret_ext/frt_ram_store.c +45 -84
  62. data/ext/isomorfeus_ferret_ext/frt_search.c +105 -146
  63. data/ext/isomorfeus_ferret_ext/frt_search.h +331 -320
  64. data/ext/isomorfeus_ferret_ext/frt_similarity.c +5 -13
  65. data/ext/isomorfeus_ferret_ext/frt_similarity.h +7 -12
  66. data/ext/isomorfeus_ferret_ext/frt_sort.c +105 -149
  67. data/ext/isomorfeus_ferret_ext/frt_store.c +13 -7
  68. data/ext/isomorfeus_ferret_ext/frt_store.h +10 -2
  69. data/ext/isomorfeus_ferret_ext/frt_threading.h +0 -1
  70. data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.c +21 -109
  71. data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.h +2 -32
  72. data/ext/isomorfeus_ferret_ext/lz4.c +2495 -0
  73. data/ext/isomorfeus_ferret_ext/lz4.h +774 -0
  74. data/ext/isomorfeus_ferret_ext/lz4frame.c +1899 -0
  75. data/ext/isomorfeus_ferret_ext/lz4frame.h +623 -0
  76. data/ext/isomorfeus_ferret_ext/lz4hc.c +1615 -0
  77. data/ext/isomorfeus_ferret_ext/lz4hc.h +413 -0
  78. data/ext/isomorfeus_ferret_ext/lz4xxhash.c +1030 -0
  79. data/ext/isomorfeus_ferret_ext/lz4xxhash.h +328 -0
  80. data/ext/isomorfeus_ferret_ext/stem_modules.h +0 -86
  81. data/ext/isomorfeus_ferret_ext/test.c +1 -2
  82. data/ext/isomorfeus_ferret_ext/test_1710.c +11 -12
  83. data/ext/isomorfeus_ferret_ext/test_analysis.c +590 -583
  84. data/ext/isomorfeus_ferret_ext/test_compound_io.c +1 -1
  85. data/ext/isomorfeus_ferret_ext/test_document.c +19 -15
  86. data/ext/isomorfeus_ferret_ext/test_except.c +1 -2
  87. data/ext/isomorfeus_ferret_ext/test_fields.c +59 -60
  88. data/ext/isomorfeus_ferret_ext/test_file_deleter.c +10 -27
  89. data/ext/isomorfeus_ferret_ext/test_filter.c +11 -8
  90. data/ext/isomorfeus_ferret_ext/test_hash.c +2 -2
  91. data/ext/isomorfeus_ferret_ext/test_hashset.c +1 -1
  92. data/ext/isomorfeus_ferret_ext/test_highlighter.c +15 -11
  93. data/ext/isomorfeus_ferret_ext/test_index.c +372 -365
  94. data/ext/isomorfeus_ferret_ext/test_q_const_score.c +5 -3
  95. data/ext/isomorfeus_ferret_ext/test_q_filtered.c +5 -3
  96. data/ext/isomorfeus_ferret_ext/test_q_fuzzy.c +13 -10
  97. data/ext/isomorfeus_ferret_ext/test_q_parser.c +45 -7
  98. data/ext/isomorfeus_ferret_ext/test_q_span.c +15 -12
  99. data/ext/isomorfeus_ferret_ext/test_ram_store.c +3 -3
  100. data/ext/isomorfeus_ferret_ext/test_search.c +60 -62
  101. data/ext/isomorfeus_ferret_ext/test_segments.c +5 -4
  102. data/ext/isomorfeus_ferret_ext/test_sort.c +17 -14
  103. data/ext/isomorfeus_ferret_ext/test_store.c +2 -0
  104. data/ext/isomorfeus_ferret_ext/test_term.c +3 -1
  105. data/ext/isomorfeus_ferret_ext/test_term_vectors.c +9 -10
  106. data/ext/isomorfeus_ferret_ext/test_test.c +1 -2
  107. data/ext/isomorfeus_ferret_ext/test_threading.c +9 -10
  108. data/ext/isomorfeus_ferret_ext/testhelper.c +1 -2
  109. data/lib/isomorfeus/ferret/version.rb +1 -1
  110. metadata +27 -57
  111. data/ext/isomorfeus_ferret_ext/email.rl +0 -21
  112. data/ext/isomorfeus_ferret_ext/frt_scanner.c +0 -900
  113. data/ext/isomorfeus_ferret_ext/frt_scanner.h +0 -28
  114. data/ext/isomorfeus_ferret_ext/frt_scanner_mb.c +0 -6706
  115. data/ext/isomorfeus_ferret_ext/frt_scanner_utf8.c +0 -4420
  116. data/ext/isomorfeus_ferret_ext/scanner.h +0 -28
  117. data/ext/isomorfeus_ferret_ext/scanner.in +0 -43
  118. data/ext/isomorfeus_ferret_ext/scanner.rl +0 -84
  119. data/ext/isomorfeus_ferret_ext/scanner_mb.rl +0 -200
  120. data/ext/isomorfeus_ferret_ext/scanner_utf8.rl +0 -85
  121. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_basque.c +0 -1167
  122. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_basque.h +0 -6
  123. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_catalan.c +0 -1433
  124. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_catalan.h +0 -6
  125. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.c +0 -301
  126. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.h +0 -6
  127. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.c +0 -590
  128. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.h +0 -6
  129. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.c +0 -1049
  130. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.h +0 -6
  131. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.c +0 -705
  132. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.h +0 -6
  133. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.c +0 -1239
  134. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.h +0 -6
  135. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.c +0 -477
  136. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.h +0 -6
  137. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_hungarian.c +0 -1217
  138. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_hungarian.h +0 -7
  139. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_indonesian.c +0 -394
  140. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_indonesian.h +0 -6
  141. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_irish.c +0 -457
  142. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_irish.h +0 -6
  143. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.c +0 -1009
  144. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.h +0 -6
  145. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.c +0 -259
  146. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.h +0 -6
  147. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.c +0 -704
  148. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.h +0 -6
  149. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.c +0 -948
  150. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.h +0 -6
  151. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.c +0 -1028
  152. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.h +0 -6
  153. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.c +0 -275
  154. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.h +0 -6
  155. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_hungarian.c +0 -849
  156. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_hungarian.h +0 -6
  157. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.c +0 -952
  158. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.h +0 -6
  159. data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.c +0 -669
  160. data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.h +0 -6
  161. data/ext/isomorfeus_ferret_ext/stem_modules.txt +0 -63
  162. data/ext/isomorfeus_ferret_ext/uchar-ucs4.rl +0 -1854
  163. data/ext/isomorfeus_ferret_ext/uchar-utf8.rl +0 -1999
  164. data/ext/isomorfeus_ferret_ext/url.rl +0 -27
@@ -8,21 +8,18 @@
8
8
  *
9
9
  ***************************************************************************/
10
10
 
11
- void frt_filt_destroy_i(FrtFilter *filt)
12
- {
11
+ void frt_filt_destroy_i(FrtFilter *filt) {
13
12
  frt_h_destroy(filt->cache);
14
13
  free(filt);
15
14
  }
16
15
 
17
- void frt_filt_deref(FrtFilter *filt)
18
- {
16
+ void frt_filt_deref(FrtFilter *filt) {
19
17
  if (--(filt->ref_cnt) == 0) {
20
18
  filt->destroy_i(filt);
21
19
  }
22
20
  }
23
21
 
24
- FrtBitVector *frt_filt_get_bv(FrtFilter *filt, FrtIndexReader *ir)
25
- {
22
+ FrtBitVector *frt_filt_get_bv(FrtFilter *filt, FrtIndexReader *ir) {
26
23
  FrtCacheObject *co = (FrtCacheObject *)frt_h_get(filt->cache, ir);
27
24
 
28
25
  if (!co) {
@@ -37,26 +34,22 @@ FrtBitVector *frt_filt_get_bv(FrtFilter *filt, FrtIndexReader *ir)
37
34
  return (FrtBitVector *)co->obj;
38
35
  }
39
36
 
40
- static char *filt_to_s_i(FrtFilter *filt)
41
- {
37
+ static char *filt_to_s_i(FrtFilter *filt) {
42
38
  return frt_estrdup(rb_id2name(filt->name));
43
39
  }
44
40
 
45
- static unsigned long long frt_filt_hash_default(FrtFilter *filt)
46
- {
41
+ static unsigned long long frt_filt_hash_default(FrtFilter *filt) {
47
42
  (void)filt;
48
43
  return 0;
49
44
  }
50
45
 
51
- static int frt_filt_eq_default(FrtFilter *filt, FrtFilter *o)
52
- {
46
+ static int frt_filt_eq_default(FrtFilter *filt, FrtFilter *o) {
53
47
  (void)filt; (void)o;
54
48
  return false;
55
49
  }
56
50
 
57
- FrtFilter *frt_filt_create(size_t size, FrtSymbol name)
58
- {
59
- FrtFilter *filt = (FrtFilter *)frt_emalloc(size);
51
+ FrtFilter *frt_filt_create(size_t size, ID name) {
52
+ FrtFilter *filt = (FrtFilter *)frt_emalloc(size);
60
53
  filt->cache = frt_co_hash_create();
61
54
  filt->name = name;
62
55
  filt->to_s = &filt_to_s_i;
@@ -64,46 +57,35 @@ FrtFilter *frt_filt_create(size_t size, FrtSymbol name)
64
57
  filt->eq = &frt_filt_eq_default;
65
58
  filt->destroy_i = &frt_filt_destroy_i;
66
59
  filt->ref_cnt = 1;
60
+ filt->rfilter = Qnil;
67
61
  return filt;
68
62
  }
69
63
 
70
- unsigned long long frt_filt_hash(FrtFilter *filt)
71
- {
64
+ unsigned long long frt_filt_hash(FrtFilter *filt) {
72
65
  return frt_str_hash(rb_id2name(filt->name)) ^ filt->hash(filt);
73
66
  }
74
67
 
75
- int frt_filt_eq(FrtFilter *filt, FrtFilter *o)
76
- {
77
- return ((filt == o)
78
- || ((filt->name == o->name)
79
- && (filt->eq == o->eq)
80
- && (filt->eq(filt, o))));
68
+ int frt_filt_eq(FrtFilter *filt, FrtFilter *o) {
69
+ return ((filt == o) || ((filt->name == o->name) && (filt->eq == o->eq) && (filt->eq(filt, o))));
81
70
  }
82
71
 
83
72
  /***************************************************************************
84
73
  *
85
- * QueryFilter
74
+ * FrtQueryFilter
86
75
  *
87
76
  ***************************************************************************/
88
77
 
89
- #define QF(filt) ((QueryFilter *)(filt))
90
- typedef struct QueryFilter
91
- {
92
- FrtFilter super;
93
- FrtQuery *query;
94
- } QueryFilter;
78
+ #define QF(filt) ((FrtQueryFilter *)(filt))
95
79
 
96
- static char *qfilt_to_s(FrtFilter *filt)
97
- {
80
+ static char *qfilt_to_s(FrtFilter *filt) {
98
81
  FrtQuery *query = QF(filt)->query;
99
- char *query_str = query->to_s(query, (FrtSymbol)NULL);
82
+ char *query_str = query->to_s(query, (ID)NULL);
100
83
  char *filter_str = frt_strfmt("QueryFilter< %s >", query_str);
101
84
  free(query_str);
102
85
  return filter_str;
103
86
  }
104
87
 
105
- static FrtBitVector *qfilt_get_bv_i(FrtFilter *filt, FrtIndexReader *ir)
106
- {
88
+ static FrtBitVector *qfilt_get_bv_i(FrtFilter *filt, FrtIndexReader *ir) {
107
89
  FrtBitVector *bv = frt_bv_new_capa(ir->max_doc(ir));
108
90
  FrtSearcher *sea = frt_isea_new(ir);
109
91
  FrtWeight *weight = frt_q_weight(QF(filt)->query, sea);
@@ -119,27 +101,25 @@ static FrtBitVector *qfilt_get_bv_i(FrtFilter *filt, FrtIndexReader *ir)
119
101
  return bv;
120
102
  }
121
103
 
122
- static unsigned long long qfilt_hash(FrtFilter *filt)
123
- {
104
+ static unsigned long long qfilt_hash(FrtFilter *filt) {
124
105
  return frt_q_hash(QF(filt)->query);
125
106
  }
126
107
 
127
- static int qfilt_eq(FrtFilter *filt, FrtFilter *o)
128
- {
108
+ static int qfilt_eq(FrtFilter *filt, FrtFilter *o) {
129
109
  return frt_q_eq(QF(filt)->query, QF(o)->query);
130
110
  }
131
111
 
132
- static void qfilt_destroy_i(FrtFilter *filt)
133
- {
112
+ static void qfilt_destroy_i(FrtFilter *filt) {
134
113
  FrtQuery *query = QF(filt)->query;
135
114
  frt_q_deref(query);
136
115
  frt_filt_destroy_i(filt);
137
116
  }
138
117
 
139
- FrtFilter *frt_qfilt_new_nr(FrtQuery *query)
140
- {
141
- FrtFilter *filt = filt_new(QueryFilter);
118
+ FrtFilter *frt_qfilt_alloc(void) {
119
+ return filt_new(FrtQueryFilter);
120
+ }
142
121
 
122
+ FrtFilter *frt_qfilt_init_nr(FrtFilter *filt, FrtQuery *query) {
143
123
  QF(filt)->query = query;
144
124
 
145
125
  filt->get_bv_i = &qfilt_get_bv_i;
@@ -150,8 +130,17 @@ FrtFilter *frt_qfilt_new_nr(FrtQuery *query)
150
130
  return filt;
151
131
  }
152
132
 
153
- FrtFilter *frt_qfilt_new(FrtQuery *query)
154
- {
133
+ FrtFilter *frt_qfilt_new_nr(FrtQuery *query) {
134
+ FrtFilter *filt = frt_qfilt_alloc();
135
+ return frt_qfilt_init_nr(filt, query);
136
+ }
137
+
138
+ FrtFilter *frt_qfilt_init(FrtFilter *filt, FrtQuery *query) {
139
+ FRT_REF(query);
140
+ return frt_qfilt_init_nr(filt, query);
141
+ }
142
+
143
+ FrtFilter *frt_qfilt_new(FrtQuery *query) {
155
144
  FRT_REF(query);
156
145
  return frt_qfilt_new_nr(query);
157
146
  }
@@ -405,6 +405,7 @@ static FrtLock *fs_open_lock_i(FrtStore *store, const char *lockname)
405
405
  lock->obtain = &fs_lock_obtain;
406
406
  lock->release = &fs_lock_release;
407
407
  lock->is_locked = &fs_lock_is_locked;
408
+ lock->rlock = Qnil;
408
409
  return lock;
409
410
  }
410
411
 
@@ -1,5 +1,6 @@
1
1
  #include "frt_global.h"
2
2
  #include "frt_hash.h"
3
+ #include "frt_search.h"
3
4
  #include <stdarg.h>
4
5
  #include <stdio.h>
5
6
  #include <stdlib.h>
@@ -11,21 +12,33 @@
11
12
  #include <sys/types.h>
12
13
  #include <unistd.h>
13
14
  #include <signal.h>
15
+ #include <ruby/encoding.h>
14
16
 
15
17
  const char *FRT_EMPTY_STRING = "";
16
18
 
17
- int frt_scmp(const void *p1, const void *p2)
18
- {
19
+ rb_encoding *utf8_encoding;
20
+ int utf8_mbmaxlen;
21
+ OnigCodePoint cp_apostrophe;
22
+ OnigCodePoint cp_dot;
23
+ OnigCodePoint cp_comma;
24
+ OnigCodePoint cp_backslash;
25
+ OnigCodePoint cp_slash;
26
+ OnigCodePoint cp_underscore;
27
+ OnigCodePoint cp_dash;
28
+ OnigCodePoint cp_hyphen;
29
+ OnigCodePoint cp_at;
30
+ OnigCodePoint cp_ampersand;
31
+ OnigCodePoint cp_colon;
32
+
33
+ int frt_scmp(const void *p1, const void *p2) {
19
34
  return strcmp(*(char **) p1, *(char **) p2);
20
35
  }
21
36
 
22
- void frt_strsort(char **str_array, int size)
23
- {
37
+ void frt_strsort(char **str_array, int size) {
24
38
  qsort(str_array, size, sizeof(char *), &frt_scmp);
25
39
  }
26
40
 
27
- int frt_icmp(const void *p1, const void *p2)
28
- {
41
+ int frt_icmp(const void *p1, const void *p2) {
29
42
  int i1 = *(int *) p1;
30
43
  int i2 = *(int *) p2;
31
44
 
@@ -38,42 +51,36 @@ int frt_icmp(const void *p1, const void *p2)
38
51
  return 0;
39
52
  }
40
53
 
41
- int frt_icmp_risky(const void *p1, const void *p2)
42
- {
54
+ int frt_icmp_risky(const void *p1, const void *p2) {
43
55
  return (*(int *)p1) - *((int *)p2);
44
56
  }
45
57
 
46
- unsigned int *frt_imalloc(unsigned int value)
47
- {
58
+ unsigned int *frt_imalloc(unsigned int value) {
48
59
  unsigned int *p = FRT_ALLOC(unsigned int);
49
60
  *p = value;
50
61
  return p;
51
62
  }
52
63
 
53
- unsigned long *frt_lmalloc(unsigned long value)
54
- {
64
+ unsigned long *frt_lmalloc(unsigned long value) {
55
65
  unsigned long *p = FRT_ALLOC(unsigned long);
56
66
  *p = value;
57
67
  return p;
58
68
  }
59
69
 
60
- frt_u32 *frt_u32malloc(frt_u32 value)
61
- {
70
+ frt_u32 *frt_u32malloc(frt_u32 value) {
62
71
  frt_u32 *p = FRT_ALLOC(frt_u32);
63
72
  *p = value;
64
73
  return p;
65
74
  }
66
75
 
67
- frt_u64 *frt_u64malloc(frt_u64 value)
68
- {
76
+ frt_u64 *frt_u64malloc(frt_u64 value) {
69
77
  frt_u64 *p = FRT_ALLOC(frt_u64);
70
78
  *p = value;
71
79
  return p;
72
80
  }
73
81
 
74
82
  /* concatenate two strings freeing the second */
75
- char *frt_estrcat(char *str1, char *str2)
76
- {
83
+ char *frt_estrcat(char *str1, char *str2) {
77
84
  size_t len1 = strlen(str1);
78
85
  size_t len2 = strlen(str2);
79
86
  FRT_REALLOC_N(str1, char, len1 + len2 + 3); /* leave room for <CR> */
@@ -83,8 +90,7 @@ char *frt_estrcat(char *str1, char *str2)
83
90
  }
84
91
 
85
92
  /* epstrdup: duplicate a string with a format, report if error */
86
- char *frt_epstrdup(const char *fmt, int len, ...)
87
- {
93
+ char *frt_epstrdup(const char *fmt, int len, ...) {
88
94
  char *string;
89
95
  va_list args;
90
96
  len += (int) strlen(fmt);
@@ -98,8 +104,7 @@ char *frt_epstrdup(const char *fmt, int len, ...)
98
104
  }
99
105
 
100
106
  /* frt_estrdup: duplicate a string, report if error */
101
- char *frt_estrdup(const char *s)
102
- {
107
+ char *frt_estrdup(const char *s) {
103
108
  char *t = FRT_ALLOC_N(char, strlen(s) + 1);
104
109
  strcpy(t, s);
105
110
  return t;
@@ -108,15 +113,13 @@ char *frt_estrdup(const char *s)
108
113
  /* Pretty print a float to the buffer. The buffer should have at least 32
109
114
  * bytes available.
110
115
  */
111
- char *frt_dbl_to_s(char *buf, double num)
112
- {
116
+ char *frt_dbl_to_s(char *buf, double num) {
113
117
  char *p, *e;
114
118
 
115
119
  #ifdef FRT_IS_C99
116
120
  if (isinf(num)) {
117
121
  return frt_estrdup(num < 0 ? "-Infinity" : "Infinity");
118
- }
119
- else if (isnan(num)) {
122
+ } else if (isnan(num)) {
120
123
  return frt_estrdup("NaN");
121
124
  }
122
125
  #endif
@@ -154,8 +157,7 @@ char *frt_strapp(char *dst, const char *src) {
154
157
  }
155
158
 
156
159
  /* strfmt: like sprintf except that it allocates memory for the string */
157
- char *frt_vstrfmt(const char *fmt, va_list args)
158
- {
160
+ char *frt_vstrfmt(const char *fmt, va_list args) {
159
161
  char *string;
160
162
  char *p = (char *) fmt, *q;
161
163
  int len = (int) strlen(fmt) + 1;
@@ -217,8 +219,7 @@ char *frt_vstrfmt(const char *fmt, va_list args)
217
219
  return string;
218
220
  }
219
221
 
220
- char *frt_strfmt(const char *fmt, ...)
221
- {
222
+ char *frt_strfmt(const char *fmt, ...) {
222
223
  va_list args;
223
224
  char *str;
224
225
  va_start(args, fmt);
@@ -227,8 +228,7 @@ char *frt_strfmt(const char *fmt, ...)
227
228
  return str;
228
229
  }
229
230
 
230
- void frt_dummy_free(void *p)
231
- {
231
+ void frt_dummy_free(void *p) {
232
232
  (void)p; /* suppress unused argument warning */
233
233
  }
234
234
 
@@ -236,8 +236,7 @@ void frt_dummy_free(void *p)
236
236
  #define CMD_BUF_SIZE (128 + FILENAME_MAX)
237
237
  /* need to declare this as it is masked by default in linux */
238
238
 
239
- static char *build_shell_command()
240
- {
239
+ static char *build_shell_command(void) {
241
240
  int pid = getpid();
242
241
  char *buf = FRT_ALLOC_N(char, CMD_BUF_SIZE);
243
242
  char *command =
@@ -252,8 +251,7 @@ static char *build_shell_command()
252
251
  /**
253
252
  * Call out to gdb to get our stacktrace.
254
253
  */
255
- char *frt_get_stacktrace()
256
- {
254
+ char *frt_get_stacktrace(void) {
257
255
  #ifdef HAVE_GDB
258
256
  FILE *stream;
259
257
  char *gdb_filename = NULL, *buf = NULL, *stack = NULL;
@@ -288,20 +286,18 @@ char *frt_get_stacktrace()
288
286
  #endif
289
287
  }
290
288
 
291
- void frt_print_stacktrace()
292
- {
289
+ void frt_print_stacktrace(void) {
293
290
  char *stack = frt_get_stacktrace();
294
291
 
295
292
  if (stack) {
296
293
  fprintf(EXCEPTION_STREAM, "Stack trace:\n%s", stack);
297
294
  free(stack);
298
295
  } else {
299
- fprintf(EXCEPTION_STREAM, "Stack trace not avaialble\n");
296
+ fprintf(EXCEPTION_STREAM, "Stack trace not available\n");
300
297
  }
301
298
  }
302
299
 
303
- typedef struct FreeMe
304
- {
300
+ typedef struct FreeMe {
305
301
  void *p;
306
302
  frt_free_ft free_func;
307
303
  } FreeMe;
@@ -310,8 +306,7 @@ static FreeMe *free_mes = NULL;
310
306
  static int free_mes_size = 0;
311
307
  static int free_mes_capa = 0;
312
308
 
313
- void frt_register_for_cleanup(void *p, frt_free_ft free_func)
314
- {
309
+ void frt_register_for_cleanup(void *p, frt_free_ft free_func) {
315
310
  FreeMe *free_me;
316
311
  if (free_mes_capa == 0) {
317
312
  free_mes_capa = 16;
@@ -330,18 +325,15 @@ void frt_register_for_cleanup(void *p, frt_free_ft free_func)
330
325
  static char name[MAX_PROG_NAME]; /* program name for error msgs */
331
326
 
332
327
  /* frt_setprogname: set stored name of program */
333
- void frt_setprogname(const char *str)
334
- {
328
+ void frt_setprogname(const char *str) {
335
329
  strncpy(name, str, sizeof(name) - 1);
336
330
  }
337
331
 
338
- const char *frt_progname()
339
- {
332
+ const char *frt_progname(void) {
340
333
  return name;
341
334
  }
342
335
 
343
- static const char *signal_to_string(int signum)
344
- {
336
+ static const char *signal_to_string(int signum) {
345
337
  switch (signum)
346
338
  {
347
339
  case SIGILL: return "SIGILL";
@@ -356,8 +348,7 @@ static const char *signal_to_string(int signum)
356
348
  return "Unknown Signal";
357
349
  }
358
350
 
359
- static void sighandler_crash(int signum)
360
- {
351
+ static void sighandler_crash(int signum) {
361
352
  frt_print_stacktrace();
362
353
  FRT_XEXIT("Signal", "Exiting on signal %s (%d)", signal_to_string(signum), signum);
363
354
  }
@@ -366,8 +357,7 @@ static void sighandler_crash(int signum)
366
357
  signal(sig, handler); \
367
358
  } while(0)
368
359
 
369
- void frt_init(int argc, const char *const argv[])
370
- {
360
+ void frt_init(int argc, const char *const argv[]) {
371
361
  if (argc > 0) {
372
362
  frt_setprogname(argv[0]);
373
363
  }
@@ -381,6 +371,63 @@ void frt_init(int argc, const char *const argv[])
381
371
  SETSIG_IF_UNSET(SIGSEGV, sighandler_crash);
382
372
 
383
373
  atexit(&frt_hash_finalize);
374
+
375
+ utf8_encoding = rb_enc_find("UTF-8");
376
+ utf8_mbmaxlen = rb_enc_mbmaxlen(utf8_encoding);
377
+ char *p = "'";
378
+ cp_apostrophe = rb_enc_mbc_to_codepoint(p, p + 1, utf8_encoding);
379
+ p = ".";
380
+ cp_dot = rb_enc_mbc_to_codepoint(p, p + 1, utf8_encoding);
381
+ p = ",";
382
+ cp_comma = rb_enc_mbc_to_codepoint(p, p + 1, utf8_encoding);
383
+ p = "\\";
384
+ cp_backslash = rb_enc_mbc_to_codepoint(p, p + 1, utf8_encoding);
385
+ p = "/";
386
+ cp_slash = rb_enc_mbc_to_codepoint(p, p + 1, utf8_encoding);
387
+ p = "_";
388
+ cp_underscore = rb_enc_mbc_to_codepoint(p, p + 1, utf8_encoding);
389
+ p = "-";
390
+ cp_dash = rb_enc_mbc_to_codepoint(p, p + 1, utf8_encoding);
391
+ p = "\u2010";
392
+ cp_hyphen = rb_enc_mbc_to_codepoint(p, p + 1, utf8_encoding);
393
+ p = "@";
394
+ cp_at = rb_enc_mbc_to_codepoint(p, p + 1, utf8_encoding);
395
+ p = "&";
396
+ cp_ampersand = rb_enc_mbc_to_codepoint(p, p + 1, utf8_encoding);
397
+ p = ":";
398
+ cp_colon = rb_enc_mbc_to_codepoint(p, p + 1, utf8_encoding);
399
+
400
+ FRT_SORT_FIELD_SCORE = frt_sort_field_alloc();
401
+ FRT_SORT_FIELD_SCORE->field_index_class = NULL; /* field_index_class */
402
+ FRT_SORT_FIELD_SCORE->field = (ID)NULL; /* field */
403
+ FRT_SORT_FIELD_SCORE->type = FRT_SORT_TYPE_SCORE; /* type */
404
+ FRT_SORT_FIELD_SCORE->reverse = false; /* reverse */
405
+ FRT_SORT_FIELD_SCORE->compare = frt_sort_field_score_compare; /* compare */
406
+ FRT_SORT_FIELD_SCORE->get_val = frt_sort_field_score_get_val; /* get_val */
407
+
408
+ FRT_SORT_FIELD_SCORE_REV = frt_sort_field_alloc();
409
+ FRT_SORT_FIELD_SCORE_REV->field_index_class = NULL; /* field_index_class */
410
+ FRT_SORT_FIELD_SCORE_REV->field = (ID)NULL; /* field */
411
+ FRT_SORT_FIELD_SCORE_REV->type = FRT_SORT_TYPE_SCORE; /* type */
412
+ FRT_SORT_FIELD_SCORE_REV->reverse = true; /* reverse */
413
+ FRT_SORT_FIELD_SCORE_REV->compare = frt_sort_field_score_compare; /* compare */
414
+ FRT_SORT_FIELD_SCORE_REV->get_val = frt_sort_field_score_get_val; /* get_val */
415
+
416
+ FRT_SORT_FIELD_DOC = frt_sort_field_alloc();
417
+ FRT_SORT_FIELD_DOC->field_index_class = NULL; /* field_index_class */
418
+ FRT_SORT_FIELD_DOC->field = (ID)NULL; /* field */
419
+ FRT_SORT_FIELD_DOC->type = FRT_SORT_TYPE_DOC; /* type */
420
+ FRT_SORT_FIELD_DOC->reverse = false; /* reverse */
421
+ FRT_SORT_FIELD_DOC->compare = frt_sort_field_doc_compare; /* compare */
422
+ FRT_SORT_FIELD_DOC->get_val = frt_sort_field_doc_get_val; /* get_val */
423
+
424
+ FRT_SORT_FIELD_DOC_REV = frt_sort_field_alloc();
425
+ FRT_SORT_FIELD_DOC_REV->field_index_class = NULL; /* field_index_class */
426
+ FRT_SORT_FIELD_DOC_REV->field = (ID)NULL; /* field */
427
+ FRT_SORT_FIELD_DOC_REV->type = FRT_SORT_TYPE_DOC; /* type */
428
+ FRT_SORT_FIELD_DOC_REV->reverse = true; /* reverse */
429
+ FRT_SORT_FIELD_DOC_REV->compare = frt_sort_field_doc_compare; /* compare */
430
+ FRT_SORT_FIELD_DOC_REV->get_val = frt_sort_field_doc_get_val; /* get_val */
384
431
  }
385
432
 
386
433
  /**
@@ -392,8 +439,7 @@ void frt_init(int argc, const char *const argv[])
392
439
  static bool p_switch = false;
393
440
  static bool p_switch_tmp = false;
394
441
 
395
- void p(const char *format, ...)
396
- {
442
+ void p(const char *format, ...) {
397
443
  va_list args;
398
444
 
399
445
  if (!p_switch) return;
@@ -403,25 +449,21 @@ void p(const char *format, ...)
403
449
  va_end(args);
404
450
  }
405
451
 
406
- void p_on()
407
- {
452
+ void p_on(void) {
408
453
  fprintf(stderr, "> > > > > STARTING PRINT\n");
409
454
  p_switch = true;
410
455
  }
411
456
 
412
- void p_off()
413
- {
457
+ void p_off(void) {
414
458
  fprintf(stderr, "< < < < < STOPPING PRINT\n");
415
459
  p_switch = false;
416
460
  }
417
461
 
418
- void frt_p_pause()
419
- {
462
+ void frt_p_pause(void) {
420
463
  p_switch_tmp = p_switch;
421
464
  p_switch = false;
422
465
  }
423
466
 
424
- void frt_p_resume()
425
- {
467
+ void frt_p_resume(void) {
426
468
  p_switch = p_switch_tmp;
427
469
  }
@@ -11,12 +11,17 @@
11
11
  #include <string.h>
12
12
  #include <ruby.h>
13
13
 
14
- typedef ID FrtSymbol;
15
-
16
14
  #define FRT_MAX_WORD_SIZE 255
17
15
  #define FRT_MAX_FILE_PATH 1024
18
16
  #define FRT_BUFFER_SIZE 1024
19
17
 
18
+ typedef enum {
19
+ FRT_COMPRESSION_NONE = 0,
20
+ FRT_COMPRESSION_BROTLI = 1,
21
+ FRT_COMPRESSION_BZ2 = 2,
22
+ FRT_COMPRESSION_LZ4 = 3
23
+ } FrtCompressionType;
24
+
20
25
  #define FRT_DBL2S "%#.7g"
21
26
 
22
27
  #if __GNUC__ >= 3
@@ -37,7 +42,6 @@ typedef void (*frt_free_ft)(void *key);
37
42
 
38
43
  #define FRT_NELEMS(array) ((int)(sizeof(array)/sizeof(array[0])))
39
44
 
40
-
41
45
  #define FRT_ZEROSET(ptr, type) memset(ptr, 0, sizeof(type))
42
46
  #define FRT_ZEROSET_N(ptr, type, n) memset(ptr, 0, sizeof(type)*(n))
43
47
 
@@ -510,8 +510,7 @@ void frt_h_str_print_keys(FrtHash *self, FILE *out)
510
510
  free(keys);
511
511
  }
512
512
 
513
- void frt_hash_finalize()
514
- {
513
+ void frt_hash_finalize(void) {
515
514
  while (num_free_hts > 0) {
516
515
  free(free_hts[--num_free_hts]);
517
516
  }