jk-ferret 0.11.8.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (228) hide show
  1. data/CHANGELOG +24 -0
  2. data/MIT-LICENSE +20 -0
  3. data/README +90 -0
  4. data/RELEASE_CHANGES +137 -0
  5. data/RELEASE_NOTES +60 -0
  6. data/Rakefile +443 -0
  7. data/TODO +109 -0
  8. data/TUTORIAL +231 -0
  9. data/bin/ferret-browser +79 -0
  10. data/ext/BZLIB_blocksort.c +1094 -0
  11. data/ext/BZLIB_bzlib.c +1578 -0
  12. data/ext/BZLIB_compress.c +672 -0
  13. data/ext/BZLIB_crctable.c +104 -0
  14. data/ext/BZLIB_decompress.c +626 -0
  15. data/ext/BZLIB_huffman.c +205 -0
  16. data/ext/BZLIB_randtable.c +84 -0
  17. data/ext/STEMMER_api.c +66 -0
  18. data/ext/STEMMER_libstemmer.c +93 -0
  19. data/ext/STEMMER_stem_ISO_8859_1_danish.c +337 -0
  20. data/ext/STEMMER_stem_ISO_8859_1_dutch.c +624 -0
  21. data/ext/STEMMER_stem_ISO_8859_1_english.c +1117 -0
  22. data/ext/STEMMER_stem_ISO_8859_1_finnish.c +762 -0
  23. data/ext/STEMMER_stem_ISO_8859_1_french.c +1246 -0
  24. data/ext/STEMMER_stem_ISO_8859_1_german.c +503 -0
  25. data/ext/STEMMER_stem_ISO_8859_1_hungarian.c +1230 -0
  26. data/ext/STEMMER_stem_ISO_8859_1_italian.c +1065 -0
  27. data/ext/STEMMER_stem_ISO_8859_1_norwegian.c +297 -0
  28. data/ext/STEMMER_stem_ISO_8859_1_porter.c +749 -0
  29. data/ext/STEMMER_stem_ISO_8859_1_portuguese.c +1017 -0
  30. data/ext/STEMMER_stem_ISO_8859_1_spanish.c +1093 -0
  31. data/ext/STEMMER_stem_ISO_8859_1_swedish.c +307 -0
  32. data/ext/STEMMER_stem_ISO_8859_2_romanian.c +998 -0
  33. data/ext/STEMMER_stem_KOI8_R_russian.c +700 -0
  34. data/ext/STEMMER_stem_UTF_8_danish.c +339 -0
  35. data/ext/STEMMER_stem_UTF_8_dutch.c +634 -0
  36. data/ext/STEMMER_stem_UTF_8_english.c +1125 -0
  37. data/ext/STEMMER_stem_UTF_8_finnish.c +768 -0
  38. data/ext/STEMMER_stem_UTF_8_french.c +1256 -0
  39. data/ext/STEMMER_stem_UTF_8_german.c +509 -0
  40. data/ext/STEMMER_stem_UTF_8_hungarian.c +1234 -0
  41. data/ext/STEMMER_stem_UTF_8_italian.c +1073 -0
  42. data/ext/STEMMER_stem_UTF_8_norwegian.c +299 -0
  43. data/ext/STEMMER_stem_UTF_8_porter.c +755 -0
  44. data/ext/STEMMER_stem_UTF_8_portuguese.c +1023 -0
  45. data/ext/STEMMER_stem_UTF_8_romanian.c +1004 -0
  46. data/ext/STEMMER_stem_UTF_8_russian.c +694 -0
  47. data/ext/STEMMER_stem_UTF_8_spanish.c +1097 -0
  48. data/ext/STEMMER_stem_UTF_8_swedish.c +309 -0
  49. data/ext/STEMMER_stem_UTF_8_turkish.c +2205 -0
  50. data/ext/STEMMER_utilities.c +478 -0
  51. data/ext/analysis.c +1710 -0
  52. data/ext/analysis.h +266 -0
  53. data/ext/api.h +26 -0
  54. data/ext/array.c +125 -0
  55. data/ext/array.h +62 -0
  56. data/ext/bitvector.c +96 -0
  57. data/ext/bitvector.h +594 -0
  58. data/ext/bzlib.h +282 -0
  59. data/ext/bzlib_private.h +503 -0
  60. data/ext/compound_io.c +384 -0
  61. data/ext/config.h +52 -0
  62. data/ext/document.c +159 -0
  63. data/ext/document.h +63 -0
  64. data/ext/except.c +102 -0
  65. data/ext/except.h +176 -0
  66. data/ext/extconf.rb +15 -0
  67. data/ext/ferret.c +416 -0
  68. data/ext/ferret.h +94 -0
  69. data/ext/field_index.c +262 -0
  70. data/ext/field_index.h +52 -0
  71. data/ext/filter.c +157 -0
  72. data/ext/fs_store.c +493 -0
  73. data/ext/global.c +458 -0
  74. data/ext/global.h +302 -0
  75. data/ext/hash.c +524 -0
  76. data/ext/hash.h +515 -0
  77. data/ext/hashset.c +192 -0
  78. data/ext/hashset.h +215 -0
  79. data/ext/header.h +58 -0
  80. data/ext/helper.c +63 -0
  81. data/ext/helper.h +21 -0
  82. data/ext/index.c +6804 -0
  83. data/ext/index.h +935 -0
  84. data/ext/internal.h +1019 -0
  85. data/ext/lang.c +10 -0
  86. data/ext/lang.h +68 -0
  87. data/ext/libstemmer.h +79 -0
  88. data/ext/mempool.c +88 -0
  89. data/ext/mempool.h +43 -0
  90. data/ext/modules.h +190 -0
  91. data/ext/multimapper.c +351 -0
  92. data/ext/multimapper.h +60 -0
  93. data/ext/posh.c +1006 -0
  94. data/ext/posh.h +973 -0
  95. data/ext/priorityqueue.c +149 -0
  96. data/ext/priorityqueue.h +155 -0
  97. data/ext/q_boolean.c +1621 -0
  98. data/ext/q_const_score.c +162 -0
  99. data/ext/q_filtered_query.c +212 -0
  100. data/ext/q_fuzzy.c +280 -0
  101. data/ext/q_match_all.c +149 -0
  102. data/ext/q_multi_term.c +673 -0
  103. data/ext/q_parser.c +3103 -0
  104. data/ext/q_phrase.c +1206 -0
  105. data/ext/q_prefix.c +98 -0
  106. data/ext/q_range.c +682 -0
  107. data/ext/q_span.c +2390 -0
  108. data/ext/q_term.c +337 -0
  109. data/ext/q_wildcard.c +167 -0
  110. data/ext/r_analysis.c +2626 -0
  111. data/ext/r_index.c +3468 -0
  112. data/ext/r_qparser.c +635 -0
  113. data/ext/r_search.c +4490 -0
  114. data/ext/r_store.c +513 -0
  115. data/ext/r_utils.c +1131 -0
  116. data/ext/ram_store.c +476 -0
  117. data/ext/scanner.c +895 -0
  118. data/ext/scanner.h +36 -0
  119. data/ext/scanner_mb.c +6701 -0
  120. data/ext/scanner_utf8.c +4415 -0
  121. data/ext/search.c +1864 -0
  122. data/ext/search.h +953 -0
  123. data/ext/similarity.c +151 -0
  124. data/ext/similarity.h +89 -0
  125. data/ext/sort.c +786 -0
  126. data/ext/stem_ISO_8859_1_danish.h +16 -0
  127. data/ext/stem_ISO_8859_1_dutch.h +16 -0
  128. data/ext/stem_ISO_8859_1_english.h +16 -0
  129. data/ext/stem_ISO_8859_1_finnish.h +16 -0
  130. data/ext/stem_ISO_8859_1_french.h +16 -0
  131. data/ext/stem_ISO_8859_1_german.h +16 -0
  132. data/ext/stem_ISO_8859_1_hungarian.h +16 -0
  133. data/ext/stem_ISO_8859_1_italian.h +16 -0
  134. data/ext/stem_ISO_8859_1_norwegian.h +16 -0
  135. data/ext/stem_ISO_8859_1_porter.h +16 -0
  136. data/ext/stem_ISO_8859_1_portuguese.h +16 -0
  137. data/ext/stem_ISO_8859_1_spanish.h +16 -0
  138. data/ext/stem_ISO_8859_1_swedish.h +16 -0
  139. data/ext/stem_ISO_8859_2_romanian.h +16 -0
  140. data/ext/stem_KOI8_R_russian.h +16 -0
  141. data/ext/stem_UTF_8_danish.h +16 -0
  142. data/ext/stem_UTF_8_dutch.h +16 -0
  143. data/ext/stem_UTF_8_english.h +16 -0
  144. data/ext/stem_UTF_8_finnish.h +16 -0
  145. data/ext/stem_UTF_8_french.h +16 -0
  146. data/ext/stem_UTF_8_german.h +16 -0
  147. data/ext/stem_UTF_8_hungarian.h +16 -0
  148. data/ext/stem_UTF_8_italian.h +16 -0
  149. data/ext/stem_UTF_8_norwegian.h +16 -0
  150. data/ext/stem_UTF_8_porter.h +16 -0
  151. data/ext/stem_UTF_8_portuguese.h +16 -0
  152. data/ext/stem_UTF_8_romanian.h +16 -0
  153. data/ext/stem_UTF_8_russian.h +16 -0
  154. data/ext/stem_UTF_8_spanish.h +16 -0
  155. data/ext/stem_UTF_8_swedish.h +16 -0
  156. data/ext/stem_UTF_8_turkish.h +16 -0
  157. data/ext/stopwords.c +410 -0
  158. data/ext/store.c +698 -0
  159. data/ext/store.h +799 -0
  160. data/ext/symbol.c +10 -0
  161. data/ext/symbol.h +23 -0
  162. data/ext/term_vectors.c +73 -0
  163. data/ext/threading.h +31 -0
  164. data/ext/win32.h +62 -0
  165. data/lib/ferret.rb +30 -0
  166. data/lib/ferret/browser.rb +246 -0
  167. data/lib/ferret/browser/s/global.js +192 -0
  168. data/lib/ferret/browser/s/style.css +148 -0
  169. data/lib/ferret/browser/views/document/list.rhtml +49 -0
  170. data/lib/ferret/browser/views/document/show.rhtml +27 -0
  171. data/lib/ferret/browser/views/error/index.rhtml +7 -0
  172. data/lib/ferret/browser/views/help/index.rhtml +8 -0
  173. data/lib/ferret/browser/views/home/index.rhtml +29 -0
  174. data/lib/ferret/browser/views/layout.rhtml +22 -0
  175. data/lib/ferret/browser/views/term-vector/index.rhtml +4 -0
  176. data/lib/ferret/browser/views/term/index.rhtml +199 -0
  177. data/lib/ferret/browser/views/term/termdocs.rhtml +1 -0
  178. data/lib/ferret/browser/webrick.rb +14 -0
  179. data/lib/ferret/document.rb +130 -0
  180. data/lib/ferret/field_infos.rb +44 -0
  181. data/lib/ferret/field_symbol.rb +87 -0
  182. data/lib/ferret/index.rb +973 -0
  183. data/lib/ferret/number_tools.rb +157 -0
  184. data/lib/ferret/version.rb +3 -0
  185. data/setup.rb +1555 -0
  186. data/test/long_running/largefile/tc_largefile.rb +46 -0
  187. data/test/test_all.rb +5 -0
  188. data/test/test_helper.rb +29 -0
  189. data/test/test_installed.rb +1 -0
  190. data/test/threading/number_to_spoken.rb +132 -0
  191. data/test/threading/thread_safety_index_test.rb +88 -0
  192. data/test/threading/thread_safety_read_write_test.rb +73 -0
  193. data/test/threading/thread_safety_test.rb +133 -0
  194. data/test/unit/analysis/tc_analyzer.rb +550 -0
  195. data/test/unit/analysis/tc_token_stream.rb +653 -0
  196. data/test/unit/index/tc_index.rb +867 -0
  197. data/test/unit/index/tc_index_reader.rb +699 -0
  198. data/test/unit/index/tc_index_writer.rb +447 -0
  199. data/test/unit/index/th_doc.rb +332 -0
  200. data/test/unit/query_parser/tc_query_parser.rb +238 -0
  201. data/test/unit/search/tc_filter.rb +156 -0
  202. data/test/unit/search/tc_fuzzy_query.rb +147 -0
  203. data/test/unit/search/tc_index_searcher.rb +67 -0
  204. data/test/unit/search/tc_multi_searcher.rb +128 -0
  205. data/test/unit/search/tc_multiple_search_requests.rb +58 -0
  206. data/test/unit/search/tc_search_and_sort.rb +179 -0
  207. data/test/unit/search/tc_sort.rb +49 -0
  208. data/test/unit/search/tc_sort_field.rb +27 -0
  209. data/test/unit/search/tc_spans.rb +190 -0
  210. data/test/unit/search/tm_searcher.rb +436 -0
  211. data/test/unit/store/tc_fs_store.rb +115 -0
  212. data/test/unit/store/tc_ram_store.rb +35 -0
  213. data/test/unit/store/tm_store.rb +34 -0
  214. data/test/unit/store/tm_store_lock.rb +68 -0
  215. data/test/unit/tc_document.rb +81 -0
  216. data/test/unit/tc_field_symbol.rb +26 -0
  217. data/test/unit/ts_analysis.rb +2 -0
  218. data/test/unit/ts_index.rb +2 -0
  219. data/test/unit/ts_largefile.rb +4 -0
  220. data/test/unit/ts_query_parser.rb +2 -0
  221. data/test/unit/ts_search.rb +2 -0
  222. data/test/unit/ts_store.rb +2 -0
  223. data/test/unit/ts_utils.rb +2 -0
  224. data/test/unit/utils/tc_bit_vector.rb +295 -0
  225. data/test/unit/utils/tc_number_tools.rb +117 -0
  226. data/test/unit/utils/tc_priority_queue.rb +106 -0
  227. data/test/utils/content_generator.rb +226 -0
  228. metadata +319 -0
data/ext/document.h ADDED
@@ -0,0 +1,63 @@
1
+ #ifndef FRT_DOCUMENT_H
2
+ #define FRT_DOCUMENT_H
3
+
4
+ #ifdef __cplusplus
5
+ extern "C" {
6
+ #endif
7
+
8
+ #include "global.h"
9
+ #include "symbol.h"
10
+ #include "hash.h"
11
+
12
+ /****************************************************************************
13
+ *
14
+ * FrtDocField
15
+ *
16
+ ****************************************************************************/
17
+
18
+ #define FRT_DF_INIT_CAPA 1
19
+ typedef struct FrtDocField
20
+ {
21
+ FrtSymbol name;
22
+ int size;
23
+ int capa;
24
+ int *lengths;
25
+ char **data;
26
+ float boost;
27
+ bool destroy_data : 1;
28
+ bool is_compressed : 1;
29
+ } FrtDocField;
30
+
31
+ extern FrtDocField *frt_df_new(FrtSymbol name);
32
+ extern FrtDocField *frt_df_add_data(FrtDocField *df, char *data);
33
+ extern FrtDocField *frt_df_add_data_len(FrtDocField *df, char *data, int len);
34
+ extern void frt_df_destroy(FrtDocField *df);
35
+ extern char *frt_df_to_s(FrtDocField *df);
36
+
37
+ /****************************************************************************
38
+ *
39
+ * FrtDocument
40
+ *
41
+ ****************************************************************************/
42
+
43
+ #define FRT_DOC_INIT_CAPA 8
44
+ typedef struct FrtDocument
45
+ {
46
+ FrtHash *field_dict;
47
+ int size;
48
+ int capa;
49
+ FrtDocField **fields;
50
+ float boost;
51
+ } FrtDocument;
52
+
53
+ extern FrtDocument *frt_doc_new();
54
+ extern FrtDocField *frt_doc_add_field(FrtDocument *doc, FrtDocField *df);
55
+ extern FrtDocField *frt_doc_get_field(FrtDocument *doc, FrtSymbol name);
56
+ extern char *frt_doc_to_s(FrtDocument *doc);
57
+ extern void frt_doc_destroy(FrtDocument *doc);
58
+
59
+ #ifdef __cplusplus
60
+ } // extern "C"
61
+ #endif
62
+
63
+ #endif
data/ext/except.c ADDED
@@ -0,0 +1,102 @@
1
+ #include <stdarg.h>
2
+ #include "global.h"
3
+ #include "except.h"
4
+ #include "threading.h"
5
+ #include "internal.h"
6
+
7
+ static const char *const ERROR_TYPES[] = {
8
+ "Body",
9
+ "Finally",
10
+ "Exception",
11
+ "IO Error",
12
+ "File Not Found Error",
13
+ "Argument Error",
14
+ "End-of-File Error",
15
+ "Unsupported Function Error",
16
+ "State Error",
17
+ "Parse Error",
18
+ "Memory Error",
19
+ "Index Error",
20
+ "Lock Error"
21
+ };
22
+
23
+ char *const UNSUPPORTED_ERROR_MSG = "Unsupported operation";
24
+ char *const EOF_ERROR_MSG = "Read past end of file";
25
+ char xmsg_buffer[XMSG_BUFFER_SIZE];
26
+ char xmsg_buffer_final[XMSG_BUFFER_SIZE];
27
+
28
+ static thread_key_t exception_stack_key;
29
+ static thread_once_t exception_stack_key_once = THREAD_ONCE_INIT;
30
+
31
+ static void exception_stack_alloc(void)
32
+ {
33
+ thread_key_create(&exception_stack_key, NULL);
34
+ }
35
+
36
+ void xpush_context(xcontext_t *context)
37
+ {
38
+ xcontext_t *top_context;
39
+ thread_once(&exception_stack_key_once, *exception_stack_alloc);
40
+ top_context = (xcontext_t *)thread_getspecific(exception_stack_key);
41
+ context->next = top_context;
42
+ thread_setspecific(exception_stack_key, context);
43
+ context->handled = true;
44
+ context->in_finally = false;
45
+ }
46
+
47
+ static INLINE void xraise_context(xcontext_t *context,
48
+ volatile int excode,
49
+ const char *const msg)
50
+ {
51
+ context->msg = msg;
52
+ context->excode = excode;
53
+ context->handled = false;
54
+ longjmp(context->jbuf, excode);
55
+ }
56
+
57
+ #ifndef FRT_HAS_VARARGS
58
+ void RAISE(int excode, const char *fmt, ...)
59
+ {
60
+ va_list args;
61
+ va_start(args, fmt);
62
+ vsnprintf(xmsg_buffer, XMSG_BUFFER_SIZE, fmt, args);
63
+ xraise(excode, xmsg_buffer);
64
+ va_end(args);
65
+ }
66
+ #endif
67
+
68
+ void xraise(int excode, const char *const msg)
69
+ {
70
+ xcontext_t *top_context;
71
+ thread_once(&exception_stack_key_once, *exception_stack_alloc);
72
+ top_context = (xcontext_t *)thread_getspecific(exception_stack_key);
73
+
74
+ if (!top_context) {
75
+ XEXIT(ERROR_TYPES[excode], msg);
76
+ }
77
+ else if (!top_context->in_finally) {
78
+ xraise_context(top_context, excode, msg);
79
+ }
80
+ else if (top_context->handled) {
81
+ top_context->msg = msg;
82
+ top_context->excode = excode;
83
+ top_context->handled = false;
84
+ }
85
+ }
86
+
87
+ void xpop_context()
88
+ {
89
+ xcontext_t *top_cxt, *context;
90
+ thread_once(&exception_stack_key_once, *exception_stack_alloc);
91
+ top_cxt = (xcontext_t *)thread_getspecific(exception_stack_key);
92
+ context = top_cxt->next;
93
+ thread_setspecific(exception_stack_key, context);
94
+ if (!top_cxt->handled) {
95
+ if (context) {
96
+ xraise_context(context, top_cxt->excode, top_cxt->msg);
97
+ }
98
+ else {
99
+ XEXIT(ERROR_TYPES[top_cxt->excode], top_cxt->msg);
100
+ }
101
+ }
102
+ }
data/ext/except.h ADDED
@@ -0,0 +1,176 @@
1
+ /**
2
+ * Exception Handling Framework
3
+ *
4
+ * Exception Handling looks something like this;
5
+ *
6
+ * <pre>
7
+ * FRT_TRY
8
+ * FRT_RAISE(FRT_EXCEPTION, msg1);
9
+ * break;
10
+ * case FRT_EXCEPTION:
11
+ * // This should be called
12
+ * exception_handled = true;
13
+ * FRT_HANDLED();
14
+ * break;
15
+ * default:
16
+ * // shouldn't enter here
17
+ * break;
18
+ * FRT_XFINALLY
19
+ * // this code will always be run
20
+ * if (close_widget_one(arg) == 0) {
21
+ * FRT_RAISE(EXCEPTION_CODE, msg);
22
+ * }
23
+ * // this code will also always run, even if the above exception is
24
+ * // raised
25
+ * if (close_widget_two(arg) == 0) {
26
+ * FRT_RAISE(EXCEPTION_CODE, msg);
27
+ * }
28
+ * FRT_XENDTRY
29
+ * </pre>
30
+ *
31
+ * Basically exception handling uses the following macros;
32
+ *
33
+ * FRT_TRY
34
+ * Sets up the exception handler and need be placed before any expected
35
+ * exceptions would be raised.
36
+ *
37
+ * case <EXCEPTION_CODE>:
38
+ * Internally the exception handling uses a switch statement so use the case
39
+ * statement with the appropriate error code to catch Exceptions. Hence, if
40
+ * you want to catch all exceptions, use the default keyword.
41
+ *
42
+ * FRT_HANDLED
43
+ * If you catch and handle an exception you need to explicitely call
44
+ * FRT_HANDLED(); or the exeption will be re-raised once the current exception
45
+ * handling context is left.
46
+ *
47
+ * case FRT_FINALLY:
48
+ * Code in this block is always called. Use this block to close any
49
+ * resources opened in the Exception handling body.
50
+ *
51
+ * FRT_ENDTRY
52
+ * Must be placed at the end of all exception handling code.
53
+ *
54
+ * FRT_XFINALLY
55
+ * Similar to case FRT_FINALLY: except that it uses a fall through (ie, you must
56
+ * not use a break before it) instead of a jump to get to it. This saves a
57
+ * jump. It must be used in combination with FRT_XENDTRY and must not have any
58
+ * other catches. This is an optimization so should probably be not be used
59
+ * in most cases.
60
+ *
61
+ * FRT_XCATCHALL
62
+ * Like FRT_XFINALLY but the block is only called when an exception is raised.
63
+ * Must use in combination with FRT_XENDTRY and do not have any other FRT_FINALLY or
64
+ * catch block.
65
+ *
66
+ * FRT_XENDTRY
67
+ * Must use in combination with FRT_XFINALLY or FRT_XCATCHALL. Simply, it doesn't
68
+ * jump to FRT_FINALLY, making it more efficient.
69
+ */
70
+ #ifndef FRT_EXCEPT_H
71
+ #define FRT_EXCEPT_H
72
+
73
+ #ifdef __cplusplus
74
+ extern "C" {
75
+ #endif
76
+
77
+ #include <setjmp.h>
78
+ #include "config.h"
79
+
80
+ /* TODO make this an enum */
81
+ #define FRT_BODY 0
82
+ #define FRT_FINALLY 1
83
+ #define FRT_EXCEPTION 2
84
+ #define FRT_FERRET_ERROR 2
85
+ #define FRT_IO_ERROR 3
86
+ #define FRT_FILE_NOT_FOUND_ERROR 4
87
+ #define FRT_ARG_ERROR 5
88
+ #define FRT_EOF_ERROR 6
89
+ #define FRT_UNSUPPORTED_ERROR 7
90
+ #define FRT_STATE_ERROR 8
91
+ #define FRT_PARSE_ERROR 9
92
+ #define FRT_MEM_ERROR 10
93
+ #define FRT_INDEX_ERROR 11
94
+ #define FRT_LOCK_ERROR 12
95
+
96
+ extern char *const FRT_UNSUPPORTED_ERROR_MSG;
97
+ extern char *const FRT_EOF_ERROR_MSG;
98
+
99
+ typedef struct frt_xcontext_t
100
+ {
101
+ jmp_buf jbuf;
102
+ struct frt_xcontext_t *next;
103
+ const char *msg;
104
+ volatile int excode;
105
+ unsigned int handled : 1;
106
+ unsigned int in_finally : 1;
107
+ } frt_xcontext_t;
108
+
109
+ #define FRT_TRY\
110
+ do {\
111
+ frt_xcontext_t xcontext;\
112
+ frt_xpush_context(&xcontext);\
113
+ switch (setjmp(xcontext.jbuf)) {\
114
+ case FRT_BODY:
115
+
116
+
117
+ #define FRT_XENDTRY\
118
+ }\
119
+ frt_xpop_context();\
120
+ } while (0);
121
+
122
+ #define FRT_ENDTRY\
123
+ }\
124
+ if (!xcontext.in_finally) {\
125
+ frt_xpop_context();\
126
+ xcontext.in_finally = 1;\
127
+ longjmp(xcontext.jbuf, FRT_FINALLY);\
128
+ }\
129
+ } while (0);
130
+
131
+ #define FRT_RETURN_EARLY() frt_xpop_context()
132
+
133
+
134
+ #define FRT_XFINALLY default: xcontext.in_finally = 1;
135
+
136
+ #define FRT_XCATCHALL break; default: xcontext.in_finally = 1;
137
+
138
+ #define FRT_HANDLED() xcontext.handled = 1; /* true */
139
+
140
+ #define FRT_XMSG_BUFFER_SIZE 2048
141
+
142
+ #ifdef FRT_HAS_ISO_VARARGS
143
+ # define FRT_RAISE(excode, ...) do {\
144
+ snprintf(frt_xmsg_buffer, FRT_XMSG_BUFFER_SIZE, __VA_ARGS__);\
145
+ snprintf(frt_xmsg_buffer_final, FRT_XMSG_BUFFER_SIZE,\
146
+ "Error occured in %s:%d - %s\n\t%s\n",\
147
+ __FILE__, __LINE__, __func__, frt_xmsg_buffer);\
148
+ frt_xraise(excode, frt_xmsg_buffer_final);\
149
+ } while (0)
150
+ #elif defined(FRT_HAS_GNUC_VARARGS)
151
+ # define FRT_RAISE(excode, args...) do {\
152
+ snprintf(frt_xmsg_buffer, FRT_XMSG_BUFFER_SIZE, ##args);\
153
+ snprintf(frt_xmsg_buffer_final, FRT_XMSG_BUFFER_SIZE,\
154
+ "Error occured in %s:%d - %s\n\t%s\n",\
155
+ __FILE__, __LINE__, __func__, frt_xmsg_buffer);\
156
+ frt_xraise(excode, frt_xmsg_buffer_final);\
157
+ } while (0)
158
+
159
+ #else
160
+ extern void FRT_RAISE(int excode, const char *fmt, ...);
161
+ #endif
162
+ #define RAISE_HELL() FRT_RAISE(FRT_FERRET_ERROR, "Hell")
163
+
164
+
165
+ extern void frt_xraise(int excode, const char *const msg);
166
+ extern void frt_xpush_context(frt_xcontext_t *context);
167
+ extern void frt_xpop_context();
168
+
169
+ extern char frt_xmsg_buffer[FRT_XMSG_BUFFER_SIZE];
170
+ extern char frt_xmsg_buffer_final[FRT_XMSG_BUFFER_SIZE];
171
+
172
+ #ifdef __cplusplus
173
+ } // extern "C"
174
+ #endif
175
+
176
+ #endif
data/ext/extconf.rb ADDED
@@ -0,0 +1,15 @@
1
+ # extconf.rb for Ferret extensions
2
+ if (/mswin/ =~ RUBY_PLATFORM) and ENV['make'].nil?
3
+ require 'mkmf'
4
+ $LIBS += " msvcprt.lib"
5
+ create_makefile("ferret_ext")
6
+ elsif ENV['FERRET_DEV']
7
+ require 'mkmf'
8
+ $CFLAGS = " -g -Wall -fno-stack-protector -fno-common -D_FILE_OFFSET_BITS=64 -D_XOPEN_SOURCE=500"
9
+ puts $CFLAGS
10
+ create_makefile("ferret_ext")
11
+ else
12
+ require 'mkmf'
13
+ $CFLAGS += " -Wall -D_FILE_OFFSET_BITS=64 -D_XOPEN_SOURCE=500"
14
+ create_makefile("ferret_ext")
15
+ end
data/ext/ferret.c ADDED
@@ -0,0 +1,416 @@
1
+ #include <errno.h>
2
+ #include "ferret.h"
3
+ #include "except.h"
4
+ #include "hash.h"
5
+ #include "hashset.h"
6
+ #include "threading.h"
7
+ #include "symbol.h"
8
+ #include "internal.h"
9
+
10
+ /* Object Map */
11
+ static Hash *object_map;
12
+
13
+ /* IDs */
14
+ ID id_new;
15
+ ID id_call;
16
+ ID id_eql;
17
+ ID id_hash;
18
+ ID id_capacity;
19
+ ID id_less_than;
20
+ ID id_lt;
21
+ ID id_is_directory;
22
+ ID id_close;
23
+ ID id_cclass;
24
+ ID id_data;
25
+
26
+ static ID id_mkdir_p;
27
+
28
+ /* Symbols */
29
+ VALUE sym_yes;
30
+ VALUE sym_no;
31
+ VALUE sym_true;
32
+ VALUE sym_false;
33
+ VALUE sym_path;
34
+ VALUE sym_dir;
35
+
36
+ /* Modules */
37
+ VALUE mFerret;
38
+ VALUE mStore;
39
+ VALUE mStringHelper;
40
+ VALUE mSpans;
41
+
42
+ /* Classes */
43
+ VALUE cTerm;
44
+ /*
45
+ */
46
+
47
+ unsigned long value_hash(const void *key)
48
+ {
49
+ return (unsigned long)key;
50
+ }
51
+
52
+ int value_eq(const void *key1, const void *key2)
53
+ {
54
+ return key1 == key2;
55
+ }
56
+
57
+ VALUE object_get(void *key)
58
+ {
59
+ VALUE val = (VALUE)h_get(object_map, key);
60
+ if (!val) val = Qnil;
61
+ return val;
62
+ }
63
+
64
+ //static int hash_cnt = 0;
65
+ void
66
+ //object_add(void *key, VALUE obj)
67
+ object_add2(void *key, VALUE obj, const char *file, int line)
68
+ {
69
+ if (h_get(object_map, key))
70
+ printf("failed adding %lx to %ld; already contains %lx. %s:%d\n",
71
+ (long)obj, (long)key, (long)h_get(object_map, key), file, line);
72
+ //printf("adding %ld. now contains %d %s:%d\n", (long)key, ++hash_cnt, file, line);
73
+ h_set(object_map, key, (void *)obj);
74
+ }
75
+
76
+ void
77
+ //object_set(void *key, VALUE obj)
78
+ object_set2(void *key, VALUE obj, const char *file, int line)
79
+ {
80
+ //if (!h_get(object_map, key))
81
+ //printf("adding %ld. now contains %d %s:%d\n", (long)key, ++hash_cnt, file, line);
82
+ h_set(object_map, key, (void *)obj);
83
+ }
84
+
85
+ void
86
+ //object_del(void *key)
87
+ object_del2(void *key, const char *file, int line)
88
+ {
89
+ if (object_get(key) == Qnil)
90
+ printf("failed deleting %ld. %s:%d\n", (long)key, file, line);
91
+ //printf("deleting %ld. now contains %ld, %s:%d\n", (long)key, --hash_cnt, file, line);
92
+ h_del(object_map, key);
93
+ }
94
+
95
+ void frb_gc_mark(void *key)
96
+ {
97
+ VALUE val = (VALUE)h_get(object_map, key);
98
+ if (val)
99
+ rb_gc_mark(val);
100
+ }
101
+
102
+ VALUE frb_data_alloc(VALUE klass)
103
+ {
104
+ return Frt_Make_Struct(klass);
105
+ }
106
+
107
+ void frb_deref_free(void *p)
108
+ {
109
+ object_del(p);
110
+ }
111
+
112
+ void frb_thread_once(int *once_control, void (*init_routine) (void))
113
+ {
114
+ if (*once_control) {
115
+ init_routine();
116
+ *once_control = 0;
117
+ }
118
+ }
119
+
120
+ void frb_thread_key_create(thread_key_t *key, void (*destr_function)(void *))
121
+ {
122
+ *key = h_new(&value_hash, &value_eq, NULL, destr_function);
123
+ }
124
+
125
+ void frb_thread_key_delete(thread_key_t key)
126
+ {
127
+ h_destroy(key);
128
+ }
129
+
130
+ void frb_thread_setspecific(thread_key_t key, const void *pointer)
131
+ {
132
+ h_set(key, (void *)rb_thread_current(), (void *)pointer);
133
+ }
134
+
135
+ void *frb_thread_getspecific(thread_key_t key)
136
+ {
137
+ return h_get(key, (void *)rb_thread_current());
138
+ }
139
+
140
+ void frb_create_dir(VALUE rpath)
141
+ {
142
+ VALUE mFileUtils;
143
+ rb_require("fileutils");
144
+ mFileUtils = rb_define_module("FileUtils");
145
+ rb_funcall(mFileUtils, id_mkdir_p, 1, rpath);
146
+ }
147
+
148
+ VALUE frb_hs_to_rb_ary(HashSet *hs)
149
+ {
150
+ HashSetEntry *hse;
151
+ VALUE ary = rb_ary_new();
152
+
153
+ for (hse = hs->first; hse; hse = hse->next) {
154
+ rb_ary_push(ary, rb_str_new2(hse->elem));
155
+ }
156
+ return ary;
157
+ }
158
+
159
+ void *frb_rb_data_ptr(VALUE val)
160
+ {
161
+ Check_Type(val, T_DATA);
162
+ return DATA_PTR(val);
163
+ }
164
+
165
+ char *
166
+ rs2s(VALUE rstr)
167
+ {
168
+ return (char *)(RSTRING_PTR(rstr) ? RSTRING_PTR(rstr) : EMPTY_STRING);
169
+ }
170
+
171
+ char *
172
+ rstrdup(VALUE rstr)
173
+ {
174
+ char *old = rs2s(rstr);
175
+ int len = RSTRING_LEN(rstr);
176
+ char *new = ALLOC_N(char, len + 1);
177
+ memcpy(new, old, len + 1);
178
+ return new;
179
+ }
180
+
181
+ Symbol
182
+ rintern(VALUE rstr)
183
+ {
184
+ char *old = rs2s(rstr);
185
+ return frt_intern(old);
186
+ }
187
+
188
+ Symbol
189
+ frb_field(VALUE rfield)
190
+ {
191
+ switch (TYPE(rfield)) {
192
+ case T_SYMBOL:
193
+ return SYM2FSYM(rfield);
194
+ case T_STRING:
195
+ return I(rs2s(rfield));
196
+ default:
197
+ rb_raise(rb_eArgError, "field name must be a symbol");
198
+ return NULL;
199
+ }
200
+ }
201
+
202
+ /*
203
+ * Json Exportation - Loading each LazyDoc and formatting them into json
204
+ * This code is designed to get a VERY FAST json string, the goal was speed,
205
+ * not sexiness.
206
+ * Jeremie 'ahFeel' BORDIER
207
+ * ahFeel@rift.Fr
208
+ */
209
+ char *
210
+ json_concat_string(char *s, char *field)
211
+ {
212
+ *(s++) = '"';
213
+ while (*field) {
214
+ if (*field == '"') {
215
+ *(s++) = '\'';
216
+ *(s++) = *(field++);
217
+ *(s++) = '\'';
218
+ }
219
+ else {
220
+ *(s++) = *(field++);
221
+ }
222
+ }
223
+ *(s++) = '"';
224
+ return s;
225
+ }
226
+
227
+ static VALUE error_map;
228
+
229
+ VALUE frb_get_error(const char *err_type)
230
+ {
231
+ VALUE error_class;
232
+ if (Qnil != (error_class = rb_hash_aref(error_map, rb_intern(err_type)))) {
233
+ return error_class;
234
+ }
235
+ return rb_eStandardError;
236
+ }
237
+
238
+ #define FRT_BUF_SIZ 2046
239
+ #ifdef FRT_HAS_VARARGS
240
+ void vfrt_rb_raise(const char *file, int line_num, const char *func,
241
+ const char *err_type, const char *fmt, va_list args)
242
+ #else
243
+ void V_FRT_EXIT(const char *err_type, const char *fmt, va_list args)
244
+ #endif
245
+ {
246
+ char buf[FRT_BUF_SIZ];
247
+ size_t so_far = 0;
248
+ #ifdef FRT_HAS_VARARGS
249
+ snprintf(buf, FRT_BUF_SIZ, "%s occured at <%s>:%d in %s\n",
250
+ err_type, file, line_num, func);
251
+ #else
252
+ snprintf(buf, FRT_BUF_SIZ, "%s occured:\n", err_type);
253
+ #endif
254
+ so_far = strlen(buf);
255
+ vsnprintf(buf + so_far, FRT_BUF_SIZ - so_far, fmt, args);
256
+
257
+ so_far = strlen(buf);
258
+ if (fmt[0] != '\0' && fmt[strlen(fmt) - 1] == ':') {
259
+ snprintf(buf + so_far, FRT_BUF_SIZ - so_far, " %s", strerror(errno));
260
+ so_far = strlen(buf);
261
+ }
262
+
263
+ snprintf(buf + so_far, FRT_BUF_SIZ - so_far, "\n");
264
+ rb_raise(frb_get_error(err_type), buf);
265
+ }
266
+
267
+ #ifdef FRT_HAS_VARARGS
268
+ void frb_rb_raise(const char *file, int line_num, const char *func,
269
+ const char *err_type, const char *fmt, ...)
270
+ #else
271
+ void FRT_EXIT(const char *err_type, const char *fmt, ...)
272
+ #endif
273
+ {
274
+ va_list args;
275
+ va_start(args, fmt);
276
+ #ifdef FRT_HAS_VARARGS
277
+ vfrt_rb_raise(file, line_num, func, err_type, fmt, args);
278
+ #else
279
+ V_FRT_EXIT(err_type, fmt, args);
280
+ #endif
281
+ va_end(args);
282
+ }
283
+
284
+ /****************************************************************************
285
+ *
286
+ * Term Methods
287
+ *
288
+ ****************************************************************************/
289
+ static ID id_field;
290
+ static ID id_text;
291
+
292
+ VALUE frb_get_term(Symbol field, const char *text)
293
+ {
294
+ return rb_struct_new(cTerm,
295
+ FSYM2SYM(field),
296
+ rb_str_new2(text),
297
+ NULL);
298
+ }
299
+
300
+ static VALUE frb_term_to_s(VALUE self)
301
+ {
302
+ VALUE rstr;
303
+ VALUE rfield = rb_funcall(self, id_field, 0);
304
+ VALUE rtext = rb_funcall(self, id_text, 0);
305
+ char *field = StringValuePtr(rfield);
306
+ char *text = StringValuePtr(rtext);
307
+ char *term_str = ALLOC_N(char,
308
+ 5 + RSTRING_LEN(rfield) + RSTRING_LEN(rtext));
309
+ sprintf(term_str, "%s:%s", field, text);
310
+ rstr = rb_str_new2(term_str);
311
+ free(term_str);
312
+ return rstr;
313
+ }
314
+ /*
315
+ * Document-class: Ferret::Term
316
+ *
317
+ * == Summary
318
+ *
319
+ * A Term holds a term from a document and its field name (as a Symbol).
320
+ */
321
+ void Init_Term(void)
322
+ {
323
+ const char *term_class = "Term";
324
+ cTerm = rb_struct_define(term_class, "field", "text", NULL);
325
+ rb_set_class_path(cTerm, mFerret, term_class);
326
+ rb_const_set(mFerret, rb_intern(term_class), cTerm);
327
+ rb_define_method(cTerm, "to_s", frb_term_to_s, 0);
328
+ id_field = rb_intern("field");
329
+ id_text = rb_intern("text");
330
+ }
331
+
332
+ /*
333
+ * Document-module: Ferret
334
+ *
335
+ * See the README
336
+ */
337
+ void Init_Ferret(void)
338
+ {
339
+ mFerret = rb_define_module("Ferret");
340
+ Init_Term();
341
+ }
342
+
343
+ void Init_ferret_ext(void)
344
+ {
345
+ VALUE cParseError;
346
+ VALUE cStateError;
347
+ VALUE cFileNotFoundError;
348
+
349
+ const char *const progname[] = {"ruby"};
350
+
351
+ frt_init(1, progname);
352
+
353
+ /* initialize object map */
354
+ object_map = h_new(&value_hash, &value_eq, NULL, NULL);
355
+
356
+ /* IDs */
357
+ id_new = rb_intern("new");
358
+ id_call = rb_intern("call");
359
+ id_eql = rb_intern("eql?");
360
+ id_hash = rb_intern("hash");
361
+
362
+ id_capacity = rb_intern("capacity");
363
+ id_less_than = rb_intern("less_than");
364
+ id_lt = rb_intern("<");
365
+
366
+ id_mkdir_p = rb_intern("mkdir_p");
367
+ id_is_directory = rb_intern("directory?");
368
+ id_close = rb_intern("close");
369
+
370
+ id_cclass = rb_intern("cclass");
371
+
372
+ id_data = rb_intern("@data");
373
+
374
+ /* Symbols */
375
+ sym_yes = ID2SYM(rb_intern("yes"));;
376
+ sym_no = ID2SYM(rb_intern("no"));;
377
+ sym_true = ID2SYM(rb_intern("true"));;
378
+ sym_false = ID2SYM(rb_intern("false"));;
379
+ sym_path = ID2SYM(rb_intern("path"));;
380
+ sym_dir = ID2SYM(rb_intern("dir"));;
381
+
382
+ /* Inits */
383
+ Init_Ferret();
384
+ Init_Utils();
385
+ Init_Analysis();
386
+ Init_Store();
387
+ Init_Index();
388
+ Init_Search();
389
+ Init_QueryParser();
390
+
391
+ /* Error Classes */
392
+ cParseError =
393
+ rb_define_class_under(mFerret, "ParseError", rb_eStandardError);
394
+ cStateError =
395
+ rb_define_class_under(mFerret, "StateError", rb_eStandardError);
396
+ cFileNotFoundError =
397
+ rb_define_class_under(mFerret, "FileNotFoundError", rb_eIOError);
398
+
399
+ error_map = rb_hash_new();
400
+ rb_hash_aset(error_map, rb_intern("Exception"), rb_eStandardError);
401
+ rb_hash_aset(error_map, rb_intern("IO Error"), rb_eIOError);
402
+ rb_hash_aset(error_map, rb_intern("File Not Found Error"),
403
+ cFileNotFoundError);
404
+ rb_hash_aset(error_map, rb_intern("Argument Error"), rb_eArgError);
405
+ rb_hash_aset(error_map, rb_intern("End-of-File Error"), rb_eEOFError);
406
+ rb_hash_aset(error_map, rb_intern("Unsupported Function Error"),
407
+ rb_eNotImpError);
408
+ rb_hash_aset(error_map, rb_intern("State Error"), cStateError);
409
+ rb_hash_aset(error_map, rb_intern("ParseError"), cParseError);
410
+ rb_hash_aset(error_map, rb_intern("Memory Error"), rb_eNoMemError);
411
+ rb_hash_aset(error_map, rb_intern("Index Error"), rb_eIndexError);
412
+ rb_hash_aset(error_map, rb_intern("Lock Error"), cLockError);
413
+
414
+ rb_define_const(mFerret, "EXCEPTION_MAP", error_map);
415
+ rb_define_const(mFerret, "FIX_INT_MAX", INT2FIX(INT_MAX >> 1));
416
+ }