isomorfeus-ferret 0.12.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (222) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +612 -0
  3. data/README.md +44 -0
  4. data/ext/isomorfeus_ferret_ext/benchmark.c +223 -0
  5. data/ext/isomorfeus_ferret_ext/benchmark.h +45 -0
  6. data/ext/isomorfeus_ferret_ext/benchmarks_all.h +25 -0
  7. data/ext/isomorfeus_ferret_ext/bm_bitvector.c +123 -0
  8. data/ext/isomorfeus_ferret_ext/bm_hash.c +118 -0
  9. data/ext/isomorfeus_ferret_ext/bm_micro_string.c +40 -0
  10. data/ext/isomorfeus_ferret_ext/bm_store.c +93 -0
  11. data/ext/isomorfeus_ferret_ext/email.rl +21 -0
  12. data/ext/isomorfeus_ferret_ext/extconf.rb +5 -0
  13. data/ext/isomorfeus_ferret_ext/fio_tmpfile.h +53 -0
  14. data/ext/isomorfeus_ferret_ext/frb_analysis.c +2577 -0
  15. data/ext/isomorfeus_ferret_ext/frb_index.c +3457 -0
  16. data/ext/isomorfeus_ferret_ext/frb_lang.c +9 -0
  17. data/ext/isomorfeus_ferret_ext/frb_lang.h +17 -0
  18. data/ext/isomorfeus_ferret_ext/frb_qparser.c +629 -0
  19. data/ext/isomorfeus_ferret_ext/frb_search.c +4460 -0
  20. data/ext/isomorfeus_ferret_ext/frb_store.c +515 -0
  21. data/ext/isomorfeus_ferret_ext/frb_threading.h +30 -0
  22. data/ext/isomorfeus_ferret_ext/frb_utils.c +1127 -0
  23. data/ext/isomorfeus_ferret_ext/frt_analysis.c +1644 -0
  24. data/ext/isomorfeus_ferret_ext/frt_analysis.h +247 -0
  25. data/ext/isomorfeus_ferret_ext/frt_array.c +124 -0
  26. data/ext/isomorfeus_ferret_ext/frt_array.h +54 -0
  27. data/ext/isomorfeus_ferret_ext/frt_bitvector.c +95 -0
  28. data/ext/isomorfeus_ferret_ext/frt_bitvector.h +586 -0
  29. data/ext/isomorfeus_ferret_ext/frt_compound_io.c +374 -0
  30. data/ext/isomorfeus_ferret_ext/frt_config.h +44 -0
  31. data/ext/isomorfeus_ferret_ext/frt_document.c +134 -0
  32. data/ext/isomorfeus_ferret_ext/frt_document.h +52 -0
  33. data/ext/isomorfeus_ferret_ext/frt_except.c +95 -0
  34. data/ext/isomorfeus_ferret_ext/frt_except.h +188 -0
  35. data/ext/isomorfeus_ferret_ext/frt_field_index.c +233 -0
  36. data/ext/isomorfeus_ferret_ext/frt_field_index.h +42 -0
  37. data/ext/isomorfeus_ferret_ext/frt_filter.c +157 -0
  38. data/ext/isomorfeus_ferret_ext/frt_fs_store.c +502 -0
  39. data/ext/isomorfeus_ferret_ext/frt_global.c +427 -0
  40. data/ext/isomorfeus_ferret_ext/frt_global.h +290 -0
  41. data/ext/isomorfeus_ferret_ext/frt_hash.c +518 -0
  42. data/ext/isomorfeus_ferret_ext/frt_hash.h +466 -0
  43. data/ext/isomorfeus_ferret_ext/frt_hashset.c +191 -0
  44. data/ext/isomorfeus_ferret_ext/frt_hashset.h +206 -0
  45. data/ext/isomorfeus_ferret_ext/frt_helper.c +62 -0
  46. data/ext/isomorfeus_ferret_ext/frt_helper.h +13 -0
  47. data/ext/isomorfeus_ferret_ext/frt_ind.c +353 -0
  48. data/ext/isomorfeus_ferret_ext/frt_ind.h +54 -0
  49. data/ext/isomorfeus_ferret_ext/frt_index.c +6377 -0
  50. data/ext/isomorfeus_ferret_ext/frt_index.h +880 -0
  51. data/ext/isomorfeus_ferret_ext/frt_lang.c +104 -0
  52. data/ext/isomorfeus_ferret_ext/frt_lang.h +44 -0
  53. data/ext/isomorfeus_ferret_ext/frt_mempool.c +87 -0
  54. data/ext/isomorfeus_ferret_ext/frt_mempool.h +33 -0
  55. data/ext/isomorfeus_ferret_ext/frt_multimapper.c +349 -0
  56. data/ext/isomorfeus_ferret_ext/frt_multimapper.h +52 -0
  57. data/ext/isomorfeus_ferret_ext/frt_posh.c +1006 -0
  58. data/ext/isomorfeus_ferret_ext/frt_posh.h +973 -0
  59. data/ext/isomorfeus_ferret_ext/frt_priorityqueue.c +147 -0
  60. data/ext/isomorfeus_ferret_ext/frt_priorityqueue.h +147 -0
  61. data/ext/isomorfeus_ferret_ext/frt_q_boolean.c +1612 -0
  62. data/ext/isomorfeus_ferret_ext/frt_q_const_score.c +157 -0
  63. data/ext/isomorfeus_ferret_ext/frt_q_filtered_query.c +209 -0
  64. data/ext/isomorfeus_ferret_ext/frt_q_fuzzy.c +281 -0
  65. data/ext/isomorfeus_ferret_ext/frt_q_match_all.c +147 -0
  66. data/ext/isomorfeus_ferret_ext/frt_q_multi_term.c +672 -0
  67. data/ext/isomorfeus_ferret_ext/frt_q_parser.c +3084 -0
  68. data/ext/isomorfeus_ferret_ext/frt_q_phrase.c +1182 -0
  69. data/ext/isomorfeus_ferret_ext/frt_q_prefix.c +98 -0
  70. data/ext/isomorfeus_ferret_ext/frt_q_range.c +665 -0
  71. data/ext/isomorfeus_ferret_ext/frt_q_span.c +2386 -0
  72. data/ext/isomorfeus_ferret_ext/frt_q_term.c +311 -0
  73. data/ext/isomorfeus_ferret_ext/frt_q_wildcard.c +166 -0
  74. data/ext/isomorfeus_ferret_ext/frt_ram_store.c +460 -0
  75. data/ext/isomorfeus_ferret_ext/frt_scanner.c +899 -0
  76. data/ext/isomorfeus_ferret_ext/frt_scanner.h +28 -0
  77. data/ext/isomorfeus_ferret_ext/frt_scanner_mb.c +6705 -0
  78. data/ext/isomorfeus_ferret_ext/frt_scanner_utf8.c +4419 -0
  79. data/ext/isomorfeus_ferret_ext/frt_search.c +1824 -0
  80. data/ext/isomorfeus_ferret_ext/frt_search.h +924 -0
  81. data/ext/isomorfeus_ferret_ext/frt_similarity.c +150 -0
  82. data/ext/isomorfeus_ferret_ext/frt_similarity.h +79 -0
  83. data/ext/isomorfeus_ferret_ext/frt_sort.c +796 -0
  84. data/ext/isomorfeus_ferret_ext/frt_stopwords.c +395 -0
  85. data/ext/isomorfeus_ferret_ext/frt_store.c +680 -0
  86. data/ext/isomorfeus_ferret_ext/frt_store.h +789 -0
  87. data/ext/isomorfeus_ferret_ext/frt_term_vectors.c +72 -0
  88. data/ext/isomorfeus_ferret_ext/frt_threading.h +23 -0
  89. data/ext/isomorfeus_ferret_ext/frt_win32.h +54 -0
  90. data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.c +409 -0
  91. data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.h +95 -0
  92. data/ext/isomorfeus_ferret_ext/libstemmer.c +93 -0
  93. data/ext/isomorfeus_ferret_ext/libstemmer.h +73 -0
  94. data/ext/isomorfeus_ferret_ext/q_parser.y +1366 -0
  95. data/ext/isomorfeus_ferret_ext/scanner.h +28 -0
  96. data/ext/isomorfeus_ferret_ext/scanner.in +43 -0
  97. data/ext/isomorfeus_ferret_ext/scanner.rl +84 -0
  98. data/ext/isomorfeus_ferret_ext/scanner_mb.rl +200 -0
  99. data/ext/isomorfeus_ferret_ext/scanner_utf8.rl +85 -0
  100. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.c +324 -0
  101. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.h +7 -0
  102. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.c +610 -0
  103. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.h +6 -0
  104. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.c +1104 -0
  105. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.h +6 -0
  106. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.c +749 -0
  107. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.h +7 -0
  108. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.c +1233 -0
  109. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.h +6 -0
  110. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.c +490 -0
  111. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.h +6 -0
  112. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_hungarian.c +1217 -0
  113. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_hungarian.h +7 -0
  114. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.c +1052 -0
  115. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.h +6 -0
  116. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.c +283 -0
  117. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.h +6 -0
  118. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.c +735 -0
  119. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.h +6 -0
  120. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.c +1003 -0
  121. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.h +7 -0
  122. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.c +1079 -0
  123. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.h +6 -0
  124. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.c +293 -0
  125. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.h +6 -0
  126. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.c +984 -0
  127. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.h +6 -0
  128. data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.c +686 -0
  129. data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.h +6 -0
  130. data/ext/isomorfeus_ferret_ext/stem_UTF_8_danish.c +325 -0
  131. data/ext/isomorfeus_ferret_ext/stem_UTF_8_danish.h +6 -0
  132. data/ext/isomorfeus_ferret_ext/stem_UTF_8_dutch.c +620 -0
  133. data/ext/isomorfeus_ferret_ext/stem_UTF_8_dutch.h +6 -0
  134. data/ext/isomorfeus_ferret_ext/stem_UTF_8_english.c +1111 -0
  135. data/ext/isomorfeus_ferret_ext/stem_UTF_8_english.h +6 -0
  136. data/ext/isomorfeus_ferret_ext/stem_UTF_8_finnish.c +754 -0
  137. data/ext/isomorfeus_ferret_ext/stem_UTF_8_finnish.h +6 -0
  138. data/ext/isomorfeus_ferret_ext/stem_UTF_8_french.c +1242 -0
  139. data/ext/isomorfeus_ferret_ext/stem_UTF_8_french.h +6 -0
  140. data/ext/isomorfeus_ferret_ext/stem_UTF_8_german.c +495 -0
  141. data/ext/isomorfeus_ferret_ext/stem_UTF_8_german.h +6 -0
  142. data/ext/isomorfeus_ferret_ext/stem_UTF_8_hungarian.c +1220 -0
  143. data/ext/isomorfeus_ferret_ext/stem_UTF_8_hungarian.h +6 -0
  144. data/ext/isomorfeus_ferret_ext/stem_UTF_8_italian.c +1059 -0
  145. data/ext/isomorfeus_ferret_ext/stem_UTF_8_italian.h +6 -0
  146. data/ext/isomorfeus_ferret_ext/stem_UTF_8_norwegian.c +285 -0
  147. data/ext/isomorfeus_ferret_ext/stem_UTF_8_norwegian.h +6 -0
  148. data/ext/isomorfeus_ferret_ext/stem_UTF_8_porter.c +741 -0
  149. data/ext/isomorfeus_ferret_ext/stem_UTF_8_porter.h +6 -0
  150. data/ext/isomorfeus_ferret_ext/stem_UTF_8_portuguese.c +1009 -0
  151. data/ext/isomorfeus_ferret_ext/stem_UTF_8_portuguese.h +6 -0
  152. data/ext/isomorfeus_ferret_ext/stem_UTF_8_romanian.c +990 -0
  153. data/ext/isomorfeus_ferret_ext/stem_UTF_8_romanian.h +6 -0
  154. data/ext/isomorfeus_ferret_ext/stem_UTF_8_russian.c +680 -0
  155. data/ext/isomorfeus_ferret_ext/stem_UTF_8_russian.h +6 -0
  156. data/ext/isomorfeus_ferret_ext/stem_UTF_8_spanish.c +1083 -0
  157. data/ext/isomorfeus_ferret_ext/stem_UTF_8_spanish.h +6 -0
  158. data/ext/isomorfeus_ferret_ext/stem_UTF_8_swedish.c +294 -0
  159. data/ext/isomorfeus_ferret_ext/stem_UTF_8_swedish.h +6 -0
  160. data/ext/isomorfeus_ferret_ext/stem_UTF_8_turkish.c +2191 -0
  161. data/ext/isomorfeus_ferret_ext/stem_UTF_8_turkish.h +6 -0
  162. data/ext/isomorfeus_ferret_ext/stem_api.c +66 -0
  163. data/ext/isomorfeus_ferret_ext/stem_api.h +26 -0
  164. data/ext/isomorfeus_ferret_ext/stem_header.h +57 -0
  165. data/ext/isomorfeus_ferret_ext/stem_modules.h +190 -0
  166. data/ext/isomorfeus_ferret_ext/stem_modules.txt +50 -0
  167. data/ext/isomorfeus_ferret_ext/stem_utilities.c +478 -0
  168. data/ext/isomorfeus_ferret_ext/test.c +850 -0
  169. data/ext/isomorfeus_ferret_ext/test.h +416 -0
  170. data/ext/isomorfeus_ferret_ext/test_1710.c +63 -0
  171. data/ext/isomorfeus_ferret_ext/test_analysis.c +1221 -0
  172. data/ext/isomorfeus_ferret_ext/test_array.c +272 -0
  173. data/ext/isomorfeus_ferret_ext/test_bitvector.c +600 -0
  174. data/ext/isomorfeus_ferret_ext/test_compound_io.c +170 -0
  175. data/ext/isomorfeus_ferret_ext/test_document.c +156 -0
  176. data/ext/isomorfeus_ferret_ext/test_except.c +244 -0
  177. data/ext/isomorfeus_ferret_ext/test_fields.c +522 -0
  178. data/ext/isomorfeus_ferret_ext/test_file_deleter.c +185 -0
  179. data/ext/isomorfeus_ferret_ext/test_filter.c +331 -0
  180. data/ext/isomorfeus_ferret_ext/test_fs_store.c +25 -0
  181. data/ext/isomorfeus_ferret_ext/test_global.c +299 -0
  182. data/ext/isomorfeus_ferret_ext/test_hash.c +485 -0
  183. data/ext/isomorfeus_ferret_ext/test_hashset.c +288 -0
  184. data/ext/isomorfeus_ferret_ext/test_helper.c +47 -0
  185. data/ext/isomorfeus_ferret_ext/test_highlighter.c +548 -0
  186. data/ext/isomorfeus_ferret_ext/test_index.c +2323 -0
  187. data/ext/isomorfeus_ferret_ext/test_lang.c +74 -0
  188. data/ext/isomorfeus_ferret_ext/test_mempool.c +102 -0
  189. data/ext/isomorfeus_ferret_ext/test_multimapper.c +64 -0
  190. data/ext/isomorfeus_ferret_ext/test_priorityqueue.c +213 -0
  191. data/ext/isomorfeus_ferret_ext/test_q_const_score.c +84 -0
  192. data/ext/isomorfeus_ferret_ext/test_q_filtered.c +61 -0
  193. data/ext/isomorfeus_ferret_ext/test_q_fuzzy.c +241 -0
  194. data/ext/isomorfeus_ferret_ext/test_q_parser.c +464 -0
  195. data/ext/isomorfeus_ferret_ext/test_q_span.c +575 -0
  196. data/ext/isomorfeus_ferret_ext/test_ram_store.c +77 -0
  197. data/ext/isomorfeus_ferret_ext/test_search.c +1874 -0
  198. data/ext/isomorfeus_ferret_ext/test_segments.c +167 -0
  199. data/ext/isomorfeus_ferret_ext/test_similarity.c +25 -0
  200. data/ext/isomorfeus_ferret_ext/test_sort.c +333 -0
  201. data/ext/isomorfeus_ferret_ext/test_store.c +591 -0
  202. data/ext/isomorfeus_ferret_ext/test_store.h +3 -0
  203. data/ext/isomorfeus_ferret_ext/test_term.c +351 -0
  204. data/ext/isomorfeus_ferret_ext/test_term_vectors.c +373 -0
  205. data/ext/isomorfeus_ferret_ext/test_test.c +83 -0
  206. data/ext/isomorfeus_ferret_ext/test_threading.c +188 -0
  207. data/ext/isomorfeus_ferret_ext/testhelper.c +561 -0
  208. data/ext/isomorfeus_ferret_ext/testhelper.h +25 -0
  209. data/ext/isomorfeus_ferret_ext/tests_all.h +87 -0
  210. data/ext/isomorfeus_ferret_ext/uchar-ucs4.rl +1854 -0
  211. data/ext/isomorfeus_ferret_ext/uchar-utf8.rl +1999 -0
  212. data/ext/isomorfeus_ferret_ext/url.rl +27 -0
  213. data/ext/isomorfeus_ferret_ext/word_list.h +15156 -0
  214. data/lib/isomorfeus/ferret/document.rb +132 -0
  215. data/lib/isomorfeus/ferret/field_symbol.rb +85 -0
  216. data/lib/isomorfeus/ferret/index/field_infos.rb +48 -0
  217. data/lib/isomorfeus/ferret/index/index.rb +970 -0
  218. data/lib/isomorfeus/ferret/monitor.rb +323 -0
  219. data/lib/isomorfeus/ferret/stdlib_patches.rb +151 -0
  220. data/lib/isomorfeus/ferret/version.rb +5 -0
  221. data/lib/isomorfeus-ferret.rb +8 -0
  222. metadata +307 -0
@@ -0,0 +1,223 @@
1
+ #include <unistd.h>
2
+ #include <string.h>
3
+ #include "ruby.h"
4
+ #include "frt_global.h"
5
+ #include "benchmark.h"
6
+ #include "benchmarks_all.h"
7
+ #include "word_list.h"
8
+
9
+ extern VALUE mFerret;
10
+ static VALUE mBenchmark;
11
+
12
+ #if (defined POSH_OS_WIN32 || defined POSH_OS_WIN64)
13
+
14
+ #define RUSAGE_SELF 0
15
+
16
+ struct rusage
17
+ {
18
+ struct timeval ru_utime; /* user time used */
19
+ struct timeval ru_stime; /* system time used */
20
+ };
21
+
22
+ int getrusage(int who, struct rusage *rusage)
23
+ {
24
+ FILETIME starttime;
25
+ FILETIME exittime;
26
+ FILETIME kerneltime;
27
+ FILETIME usertime;
28
+ ULARGE_INTEGER li;
29
+
30
+ if (who != RUSAGE_SELF) {
31
+ errno = EINVAL;
32
+ return -1;
33
+ }
34
+
35
+ if (rusage == (struct rusage *) NULL) {
36
+ errno = EFAULT;
37
+ return -1;
38
+ }
39
+ memset(rusage, 0, sizeof(struct rusage));
40
+ if (GetProcessTimes(GetCurrentProcess(), &starttime, &exittime, &kerneltime, &usertime) == 0) {
41
+ fprintf(stderr, "getrusage: error in GetProcessTimes %lu\n", GetLastError());
42
+ return -1;
43
+ }
44
+
45
+ memcpy(&li, &kerneltime, sizeof(FILETIME));
46
+ li.QuadPart /= 10L;
47
+ rusage->ru_stime.tv_sec = li.QuadPart / 1000000L;
48
+ rusage->ru_stime.tv_usec = li.QuadPart % 1000000L;
49
+
50
+ memcpy(&li, &usertime, sizeof(FILETIME));
51
+ li.QuadPart /= 10L;
52
+ rusage->ru_utime.tv_sec = li.QuadPart / 1000000L;
53
+ rusage->ru_utime.tv_usec = li.QuadPart % 1000000L;
54
+ return 0;
55
+ }
56
+ #else
57
+ #include <sys/time.h>
58
+ #include <sys/resource.h>
59
+ #endif
60
+
61
+ static int bmtcmp(const void *p1, const void *p2)
62
+ {
63
+ BenchMarkTimes *bmt1 = *(BenchMarkTimes **)p1;
64
+ BenchMarkTimes *bmt2 = *(BenchMarkTimes **)p2;
65
+
66
+ if (bmt1->rtime > bmt2->rtime) return 1;
67
+ else if (bmt1->rtime < bmt2->rtime) return -1;
68
+ else return 0;
69
+ }
70
+
71
+ void bm_add(BenchMark *benchmark, bm_run_ft run, const char *name)
72
+ {
73
+ BenchMarkUnit *unit =
74
+ (BenchMarkUnit *)frt_emalloc(sizeof(BenchMarkUnit) +
75
+ benchmark->count * sizeof(BenchMarkTimes *));
76
+ int i;
77
+ unit->name = frt_estrdup(name);
78
+ unit->run = run;
79
+ unit->next = NULL;
80
+ if (benchmark->count > 1) {
81
+ for (i = 0; i < benchmark->count; i++) {
82
+ unit->times[i] = FRT_ALLOC(BenchMarkTimes);
83
+ }
84
+ }
85
+ if (benchmark->tail) {
86
+ benchmark->tail = benchmark->tail->next = unit;
87
+ }
88
+ else {
89
+ benchmark->tail = benchmark->head = unit;
90
+ }
91
+ }
92
+
93
+ static void bm_clear(BenchMark *benchmark)
94
+ {
95
+ BenchMarkUnit *unit, *next = benchmark->head;
96
+ while (NULL != (unit = next)) {
97
+ next = unit->next;
98
+ if (benchmark->count > 1) {
99
+ int i;
100
+ for (i = 0; i < benchmark->count; i++) {
101
+ free(unit->times[i]);
102
+ }
103
+ }
104
+ free(unit->name);
105
+ free(unit);
106
+ }
107
+ benchmark->head = benchmark->tail = NULL;
108
+ }
109
+
110
+ #define TVAL_TO_SEC(before, after) \
111
+ ((double)after.tv_sec + ((double)after.tv_usec/1000000)) - \
112
+ ((double)before.tv_sec + ((double)before.tv_usec/1000000))
113
+
114
+ static void bm_single_run(BenchMarkUnit *unit, BenchMarkTimes *bm_times)
115
+ {
116
+ struct timeval tv_before, tv_after;
117
+ struct rusage ru_before, ru_after;
118
+
119
+ if (gettimeofday(&tv_before, NULL) == -1)
120
+ FRT_RAISE(FRT_UNSUPPORTED_ERROR, "gettimeofday failed\n");
121
+ getrusage(RUSAGE_SELF, &ru_before);
122
+
123
+ unit->run();
124
+
125
+ if (gettimeofday(&tv_after, NULL) == -1)
126
+ FRT_RAISE(FRT_UNSUPPORTED_ERROR, "gettimeofday failed\n");
127
+ getrusage(RUSAGE_SELF, &ru_after);
128
+
129
+ bm_times->rtime = TVAL_TO_SEC(tv_before, tv_after);
130
+ bm_times->utime = TVAL_TO_SEC(ru_before.ru_utime, ru_after.ru_utime);
131
+ bm_times->stime = TVAL_TO_SEC(ru_before.ru_stime, ru_after.ru_stime);
132
+ }
133
+
134
+ #define DO_SETUP(bm) if (bm->setup) bm->setup();
135
+ #define DO_TEARDOWN(bm) if (bm->teardown) bm->teardown();
136
+
137
+ static void bm_run(BenchMark *benchmark)
138
+ {
139
+ int i;
140
+ BenchMarkUnit *unit;
141
+ int max_name_len = 0;
142
+ char fmt[40];
143
+ int start = 0, end = benchmark->count;
144
+ if (benchmark->discard) {
145
+ start += benchmark->discard;
146
+ end -= benchmark->discard;
147
+ }
148
+ if (benchmark->count > 1) {
149
+ for (i = 0; i < benchmark->count; i++) {
150
+ DO_SETUP(benchmark);
151
+ for (unit = benchmark->head; unit; unit = unit->next) {
152
+ bm_single_run(unit, unit->times[i]);
153
+ }
154
+ DO_TEARDOWN(benchmark);
155
+ }
156
+ for (unit = benchmark->head; unit; unit = unit->next) {
157
+ double rtime = 0.0, utime = 0.0, stime = 0.0;
158
+ int result_count = end - start;
159
+ /* we only need to sort if we are discarding outliers */
160
+ if (benchmark->discard > 0) {
161
+ qsort(unit->times, benchmark->count,
162
+ sizeof(BenchMarkTimes*), &bmtcmp);
163
+ }
164
+
165
+ for (i = start; i < end; i++) {
166
+ rtime += unit->times[i]->rtime;
167
+ utime += unit->times[i]->utime;
168
+ stime += unit->times[i]->stime;
169
+ }
170
+ unit->final_times.utime = utime/result_count;
171
+ unit->final_times.stime = stime/result_count;
172
+ unit->final_times.rtime = rtime/result_count;
173
+ }
174
+ }
175
+ else {
176
+ DO_SETUP(benchmark);
177
+ for (unit = benchmark->head; unit; unit = unit->next) {
178
+ bm_single_run(unit, &(unit->final_times));
179
+ }
180
+ DO_TEARDOWN(benchmark);
181
+ }
182
+
183
+ /* get maximum unit name length for print out */
184
+ for (unit = benchmark->head; unit; unit = unit->next) {
185
+ int name_len = (int)strlen(unit->name);
186
+ if (name_len > max_name_len) {
187
+ max_name_len = name_len;
188
+ }
189
+ }
190
+
191
+ for (i = 0; i < max_name_len; i++) putchar(' ');
192
+ puts("\t user system real");
193
+ sprintf(fmt, "\t%%%ds %%10.6lf %%10.6lf %%10.6lf\n", max_name_len);
194
+ for (unit = benchmark->head; unit; unit = unit->next) {
195
+ printf(fmt,
196
+ unit->name,
197
+ unit->final_times.utime,
198
+ unit->final_times.stime,
199
+ unit->final_times.rtime);
200
+ }
201
+ }
202
+
203
+ static VALUE frb_bm_run_all(VALUE v) {
204
+ int i;
205
+ BenchMark benchmark;
206
+ benchmark.head = benchmark.tail = NULL;
207
+
208
+ for (i = 0; i < FRT_NELEMS(all_benchmarks); i++) {
209
+ printf("\nBenching [%s]...\n", all_benchmarks[i].name);
210
+ benchmark.count = 1;
211
+ benchmark.discard = 0;
212
+ benchmark.setup = benchmark.teardown = NULL;
213
+ all_benchmarks[i].initialize(&benchmark);
214
+ bm_run(&benchmark);
215
+ bm_clear(&benchmark);
216
+ }
217
+ return INT2FIX(0);
218
+ }
219
+
220
+ void Init_Benchmark(void) {
221
+ mBenchmark = rb_define_module_under(mFerret, "Benchmark");
222
+ rb_define_singleton_method(mBenchmark, "run_all", frb_bm_run_all, 0);
223
+ }
@@ -0,0 +1,45 @@
1
+ #ifndef BENCHMARK_H
2
+ #define BENCHMARK_H
3
+
4
+ #include "frt_global.h"
5
+ #include <time.h>
6
+
7
+ #define BM_RUN_COUNT 6
8
+ #define BENCH(name) extern void bm_##name(BenchMark *bm)
9
+
10
+ extern const char *WORD_LIST[];
11
+
12
+ typedef void (*bm_run_ft)();
13
+
14
+ typedef struct BenchMarkTimes {
15
+ double utime;
16
+ double stime;
17
+ double rtime;
18
+ } BenchMarkTimes;
19
+
20
+ typedef struct BenchMarkUnit {
21
+ char *name;
22
+ bm_run_ft run;
23
+ struct BenchMarkUnit *next;
24
+ BenchMarkTimes final_times;
25
+ BenchMarkTimes *times[1];
26
+ } BenchMarkUnit;
27
+
28
+ typedef struct BenchMark {
29
+ int count; /* the number of bench runs to complete */
30
+ int discard; /* the number of outliers to discard */
31
+ void (*setup)();
32
+ void (*teardown)();
33
+ BenchMarkUnit *head;
34
+ BenchMarkUnit *tail;
35
+ } BenchMark;
36
+
37
+ void bm_add(BenchMark *benchmark, bm_run_ft call, const char *name);
38
+
39
+ #define BM_SETUP(func) bm->setup = &func;
40
+ #define BM_TEARDOWN(func) bm->teardown = &func;
41
+ #define BM_ADD(call) bm_add(bm, &call, #call)
42
+ #define BM_COUNT(num) bm->count = num;
43
+ #define BM_DISCARD(num) bm->discard = num;
44
+
45
+ #endif
@@ -0,0 +1,25 @@
1
+ #ifndef ALL_BENCHMARKS_H
2
+ #define ALL_BENCHMARKS_H
3
+
4
+ #include "benchmark.h"
5
+
6
+ void bm_vint_io(BenchMark *bm);
7
+ void bm_strcmp_when_length_is_known(BenchMark *bm);
8
+ void bm_snprintf_vs_strncat(BenchMark *bm);
9
+ void bm_hash_implementations(BenchMark *bm);
10
+ void bm_specialized_string_hash(BenchMark *bm);
11
+ void bm_bitvector_implementations(BenchMark *bm);
12
+
13
+ const struct BenchMarkList
14
+ {
15
+ void (*initialize)(BenchMark *benchmark);
16
+ const char *name;
17
+ } all_benchmarks[] = {
18
+ {bm_vint_io, "vint_io"},
19
+ {bm_strcmp_when_length_is_known, "strcmp_when_length_is_known"},
20
+ {bm_hash_implementations, "hash_implementations"},
21
+ {bm_specialized_string_hash, "specialized_string_hash"},
22
+ {bm_bitvector_implementations, "bitvector_implementations"}
23
+ };
24
+
25
+ #endif
@@ -0,0 +1,123 @@
1
+ #include <assert.h>
2
+ #include "frt_bitvector.h"
3
+ #include "benchmark.h"
4
+
5
+ #define N 10
6
+ #define DENSE_SCAN_SIZE 20000000
7
+ #define SCAN_INC 97
8
+ #define SCAN_SIZE DENSE_SCAN_SIZE * SCAN_INC
9
+
10
+ static FrtBitVector *bv;
11
+
12
+ static void setup()
13
+ {
14
+ bv = frt_bv_new_capa(SCAN_SIZE);
15
+ }
16
+
17
+ static void teardown()
18
+ {
19
+ frt_bv_destroy(bv);
20
+ }
21
+
22
+ static void ferret_bv_and_sparse()
23
+ {
24
+ FrtBitVector * _bv = frt_bv_and(bv, bv);
25
+ free(_bv);
26
+ }
27
+ static void ferret_bv_or_sparse()
28
+ {
29
+ FrtBitVector * _bv = frt_bv_or(bv, bv);
30
+ free(_bv);
31
+ }
32
+ static void ferret_bv_xor_sparse()
33
+ {
34
+ FrtBitVector * _bv = frt_bv_xor(bv, bv);
35
+ free(_bv);
36
+ }
37
+ static void ferret_bv_not_sparse()
38
+ {
39
+ FrtBitVector * _bv = frt_bv_not(bv);
40
+ free(_bv);
41
+ }
42
+ static void ferret_bv_and_dense()
43
+ {
44
+ ferret_bv_and_sparse();
45
+ }
46
+ static void ferret_bv_or_dense()
47
+ {
48
+ ferret_bv_or_sparse();
49
+ }
50
+ static void ferret_bv_xor_dense()
51
+ {
52
+ ferret_bv_xor_sparse();
53
+ }
54
+ static void ferret_bv_not_dense()
55
+ {
56
+ ferret_bv_not_sparse();
57
+ }
58
+
59
+ static void ferret_bv_set_sparse()
60
+ {
61
+ int i;
62
+
63
+ for (i = SCAN_INC; i < SCAN_SIZE; i += SCAN_INC) {
64
+ frt_bv_set_fast(bv, i);
65
+ assert(frt_bv_get(bv, i) == 1);
66
+ assert(frt_bv_get(bv, i+1) == 0);
67
+ }
68
+ }
69
+
70
+ static void ferret_bv_scan_sparse()
71
+ {
72
+ int i, j;
73
+
74
+ for (i = 0; i < N; i++) {
75
+ frt_bv_scan_reset(bv);
76
+ for (j = SCAN_INC; j < SCAN_SIZE; j += SCAN_INC) {
77
+ assert(j == frt_bv_scan_next(bv));
78
+ }
79
+ assert(-1 == frt_bv_scan_next(bv));
80
+ }
81
+ }
82
+
83
+ static void ferret_bv_set_dense()
84
+ {
85
+ int i;
86
+ frt_bv_clear(bv);
87
+ for (i = 0; i < DENSE_SCAN_SIZE; i++) {
88
+ frt_bv_set(bv, i);
89
+ }
90
+ }
91
+
92
+ static void ferret_bv_scan_dense()
93
+ {
94
+ int i, j;
95
+
96
+ for (i = 0; i < N; i++) {
97
+ frt_bv_scan_reset(bv);
98
+ for (j = 0; j < DENSE_SCAN_SIZE; j++) {
99
+ assert(j == frt_bv_scan_next(bv));
100
+ }
101
+ assert(-1 == frt_bv_scan_next(bv));
102
+ }
103
+ }
104
+
105
+ BENCH(bitvector_implementations)
106
+ {
107
+ BM_SETUP(setup);
108
+
109
+ BM_ADD(ferret_bv_set_sparse);
110
+ BM_ADD(ferret_bv_scan_sparse);
111
+ BM_ADD(ferret_bv_and_sparse);
112
+ BM_ADD(ferret_bv_or_sparse);
113
+ BM_ADD(ferret_bv_not_sparse);
114
+ BM_ADD(ferret_bv_xor_sparse);
115
+
116
+ BM_ADD(ferret_bv_set_dense);
117
+ BM_ADD(ferret_bv_scan_dense);
118
+ BM_ADD(ferret_bv_and_dense);
119
+ BM_ADD(ferret_bv_or_dense);
120
+ BM_ADD(ferret_bv_not_dense);
121
+ BM_ADD(ferret_bv_xor_dense);
122
+ BM_TEARDOWN(teardown);
123
+ }
@@ -0,0 +1,118 @@
1
+ #include <string.h>
2
+ #include "frt_hash.h"
3
+ #include "benchmark.h"
4
+
5
+ #define N 20
6
+
7
+ static void ferret_hash()
8
+ {
9
+ int i;
10
+ for (i = 0; i < N; i++) {
11
+ FrtHash *h = frt_h_new_str(NULL, NULL);
12
+ const char **word;
13
+ char buf[100];
14
+ long res;
15
+ for (word = WORD_LIST; *word; word++) {
16
+ frt_h_set(h, *word, (void *)1);
17
+ }
18
+ for (word = WORD_LIST; *word; word++) {
19
+ strcpy(buf, *word);
20
+ res = (long)frt_h_get(h, buf);
21
+ }
22
+ frt_h_destroy(h);
23
+ }
24
+ }
25
+
26
+ BENCH(hash_implementations)
27
+ {
28
+ BM_ADD(ferret_hash);
29
+ }
30
+
31
+ static void standard_hash()
32
+ {
33
+ int i;
34
+ for (i = 0; i < N; i++) {
35
+ FrtHash *h = frt_h_new_str(NULL, NULL);
36
+ const char **word;
37
+ char buf[100];
38
+ long res;
39
+ for (word = WORD_LIST; *word; word++) {
40
+ frt_h_set(h, *word, (void *)1);
41
+ strcpy(buf, *word);
42
+ res = (long)frt_h_get(h, buf);
43
+ }
44
+ frt_h_destroy(h);
45
+ }
46
+ }
47
+
48
+ #define PERTURB_SHIFT 5
49
+ static const char *dummy_key = "";
50
+ static FrtHashEntry *h_lookup_str(FrtHash *ht, register const void *key)
51
+ {
52
+ register const unsigned long hash = frt_str_hash((const char *)key);
53
+ register unsigned int perturb;
54
+ register int mask = ht->mask;
55
+ register FrtHashEntry *he0 = ht->table;
56
+ register int i = hash & mask;
57
+ register FrtHashEntry *he = &he0[i];
58
+ register FrtHashEntry *freeslot = NULL;
59
+
60
+ if (he->key == NULL || he->key == key) {
61
+ he->hash = hash;
62
+ return he;
63
+ }
64
+ if (he->key == dummy_key) {
65
+ freeslot = he;
66
+ }
67
+ else {
68
+ if ((he->hash == hash)
69
+ && 0 == strcmp((const char *)he->key, (const char *)key)) {
70
+ return he;
71
+ }
72
+ }
73
+
74
+ for (perturb = hash;; perturb >>= PERTURB_SHIFT) {
75
+ i = (i << 2) + i + perturb + 1;
76
+ he = &he0[i & mask];
77
+ if (he->key == NULL) {
78
+ if (freeslot != NULL) {
79
+ he = freeslot;
80
+ }
81
+ he->hash = hash;
82
+ return he;
83
+ }
84
+ if (he->key == key
85
+ || (he->hash == hash
86
+ && he->key != dummy_key
87
+ && 0 == strcmp((const char *)he->key, (const char *)key))) {
88
+ return he;
89
+ }
90
+ if (he->key == dummy_key && freeslot == NULL) {
91
+ freeslot = he;
92
+ }
93
+ }
94
+ }
95
+
96
+ static void string_hash()
97
+ {
98
+ int i;
99
+ for (i = 0; i < N; i++) {
100
+ FrtHash *h = frt_h_new_str(NULL, NULL);
101
+ const char **word;
102
+ char buf[100];
103
+ long res;
104
+ h->lookup_i = &h_lookup_str;
105
+ for (word = WORD_LIST; *word; word++) {
106
+ frt_h_set(h, *word, (void *)1);
107
+ strcpy(buf, *word);
108
+ res = (long)frt_h_get(h, buf);
109
+ }
110
+ frt_h_destroy(h);
111
+ }
112
+ }
113
+
114
+ BENCH(specialized_string_hash)
115
+ {
116
+ BM_ADD(standard_hash);
117
+ BM_ADD(string_hash);
118
+ }
@@ -0,0 +1,40 @@
1
+ #include <string.h>
2
+ #include "benchmark.h"
3
+
4
+ #define N 10
5
+
6
+ static void do_strcmp()
7
+ {
8
+ char **word;
9
+ char buf[100];
10
+ int res, i;
11
+
12
+ for (i = 0; i < N; i++)
13
+ for (word = WORD_LIST; *word; word++) {
14
+ size_t len = strlen(*word);
15
+ memcpy(buf, *word, len+1);
16
+ res = strcmp(buf, *word);
17
+ }
18
+ }
19
+
20
+ static void do_strncmp()
21
+ {
22
+ char **word;
23
+ char buf[100];
24
+ int res, i;
25
+
26
+ for (i = 0; i < N; i++)
27
+ for (word = WORD_LIST; *word; word++) {
28
+ size_t len = strlen(*word);
29
+ memcpy(buf, *word, len+1);
30
+ res = strncmp(buf, *word, len + 1);
31
+ }
32
+ }
33
+
34
+ BENCH(strcmp_when_length_is_known)
35
+ {
36
+ BM_COUNT(6);
37
+ BM_DISCARD(1);
38
+ BM_ADD(do_strcmp);
39
+ BM_ADD(do_strncmp);
40
+ }
@@ -0,0 +1,93 @@
1
+ #include <string.h>
2
+ #include "benchmark.h"
3
+ #include "frt_config.h"
4
+ #include "frt_store.h"
5
+
6
+ #define N 10
7
+ #define write_byte(os, b) os->buf.buf[os->buf.pos++] = (frt_uchar)b
8
+
9
+ void my_os_write_voff_t(FrtOutStream *os, register off_t num)
10
+ {
11
+ if (!(num&0x7f)) {
12
+ if (os->buf.pos >= FRT_BUFFER_SIZE) {
13
+ frt_os_write_byte(os, (frt_uchar)num);
14
+ }
15
+ else {
16
+ write_byte(os, (frt_uchar)num);
17
+ }
18
+ }
19
+ else if (!(num&0x3fff)) {
20
+ if (os->buf.pos >= FRT_BUFFER_SIZE - 1) {
21
+ frt_os_write_byte(os, (frt_uchar)(0x80 | (0x3f & num))); num >>= 6;
22
+ frt_os_write_byte(os, (frt_uchar)num);
23
+ }
24
+ else {
25
+ write_byte(os, (frt_uchar)(0x80 | (0x3f & num))); num >>= 6;
26
+ write_byte(os, (frt_uchar)num);
27
+ }
28
+ }
29
+ else if (!(num&0x1fffff)) {
30
+ if (os->buf.pos >= FRT_BUFFER_SIZE - 2) {
31
+ frt_os_write_byte(os, (frt_uchar)(0xc0 | (0x1f & num))); num >>= 5;
32
+ frt_os_write_byte(os, (frt_uchar)(0xff| num)); num >>= 8;
33
+ frt_os_write_byte(os, (frt_uchar)num);
34
+ }
35
+ else {
36
+ write_byte(os, (frt_uchar)(0xc0 | (0x1f & num))); num >>= 5;
37
+ write_byte(os, (frt_uchar)(0xff| num)); num >>= 8;
38
+ write_byte(os, (frt_uchar)num);
39
+ }
40
+ }
41
+ else if (!(num&0xfffff)) {
42
+ if (os->buf.pos >= FRT_BUFFER_SIZE - 3) {
43
+ frt_os_write_byte(os, (frt_uchar)(0xe0 | (0x0f & num))); num >>= 4;
44
+ frt_os_write_byte(os, (frt_uchar)(0xff | num)); num >>= 8;
45
+ frt_os_write_byte(os, (frt_uchar)(0xff | num)); num >>= 8;
46
+ frt_os_write_byte(os, (frt_uchar)num);
47
+ }
48
+ else {
49
+ write_byte(os, (frt_uchar)(0xe0 | (0x0f & num))); num >>= 4;
50
+ write_byte(os, (frt_uchar)(0xff | num)); num >>= 8;
51
+ write_byte(os, (frt_uchar)(0xff | num)); num >>= 8;
52
+ write_byte(os, (frt_uchar)num);
53
+ }
54
+ }
55
+ }
56
+
57
+ static void vint_out()
58
+ {
59
+ int n;
60
+ off_t i;
61
+ FrtOutStream *os;
62
+
63
+ for (n = 0; n < N; n++) {
64
+ os = frt_ram_new_buffer();
65
+ for (i = 0; i < 10000000; i++) {
66
+ frt_os_write_voff_t(os, i);
67
+ }
68
+ frt_ram_destroy_buffer(os);
69
+ }
70
+
71
+ }
72
+
73
+ static void unrolled_vint_out()
74
+ {
75
+ int n;
76
+ off_t i;
77
+ FrtOutStream *os;
78
+
79
+ for (n = 0; n < N; n++) {
80
+ os = frt_ram_new_buffer();
81
+ for (i = 0; i < 10000000; i++) {
82
+ frt_os_write_voff_t(os, i);
83
+ }
84
+ frt_ram_destroy_buffer(os);
85
+ }
86
+
87
+ }
88
+
89
+ BENCH(vint_io)
90
+ {
91
+ BM_ADD(vint_out);
92
+ BM_ADD(unrolled_vint_out);
93
+ }
@@ -0,0 +1,21 @@
1
+ #// email.rl -*-C-*-
2
+ %%{
3
+ machine Email;
4
+
5
+ #// RFC 2822 - matching email addresses
6
+ NO_WS_CTL = ( 1..8 | 11 | 12 | 14..31 | 127 );
7
+ ASCII = 1..127;
8
+ atext = [a-zA-Z0-9!#$%&\'*+\-/=?^_`{|}~];
9
+ qtext = ( NO_WS_CTL | 33 | 35..91 | 93..126 );
10
+ dtext = ( NO_WS_CTL | 33..90 | 94..126 );
11
+ dot_atom = atext+ ('.' atext+)*;
12
+ text = ( 1..9 | 11 | 12 | 14..127 );
13
+ quoted_pair = '\\' text;
14
+ quoted_string = '"' ( qtext | quoted_pair )* '"';
15
+ domain_literal = '[' (dtext | quoted_pair)* ']';
16
+
17
+ local_part = dot_atom | quoted_string;
18
+ domain = dot_atom | domain_literal;
19
+
20
+ email = local_part '@' domain;
21
+ }%%
@@ -0,0 +1,5 @@
1
+ require 'mkmf'
2
+
3
+ $CFLAGS << ' -O2 -Wall -Wno-sizeof-pointer-div'
4
+
5
+ create_makefile('isomorfeus_ferret_ext')