jk-ferret 0.11.8.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (228) hide show
  1. data/CHANGELOG +24 -0
  2. data/MIT-LICENSE +20 -0
  3. data/README +90 -0
  4. data/RELEASE_CHANGES +137 -0
  5. data/RELEASE_NOTES +60 -0
  6. data/Rakefile +443 -0
  7. data/TODO +109 -0
  8. data/TUTORIAL +231 -0
  9. data/bin/ferret-browser +79 -0
  10. data/ext/BZLIB_blocksort.c +1094 -0
  11. data/ext/BZLIB_bzlib.c +1578 -0
  12. data/ext/BZLIB_compress.c +672 -0
  13. data/ext/BZLIB_crctable.c +104 -0
  14. data/ext/BZLIB_decompress.c +626 -0
  15. data/ext/BZLIB_huffman.c +205 -0
  16. data/ext/BZLIB_randtable.c +84 -0
  17. data/ext/STEMMER_api.c +66 -0
  18. data/ext/STEMMER_libstemmer.c +93 -0
  19. data/ext/STEMMER_stem_ISO_8859_1_danish.c +337 -0
  20. data/ext/STEMMER_stem_ISO_8859_1_dutch.c +624 -0
  21. data/ext/STEMMER_stem_ISO_8859_1_english.c +1117 -0
  22. data/ext/STEMMER_stem_ISO_8859_1_finnish.c +762 -0
  23. data/ext/STEMMER_stem_ISO_8859_1_french.c +1246 -0
  24. data/ext/STEMMER_stem_ISO_8859_1_german.c +503 -0
  25. data/ext/STEMMER_stem_ISO_8859_1_hungarian.c +1230 -0
  26. data/ext/STEMMER_stem_ISO_8859_1_italian.c +1065 -0
  27. data/ext/STEMMER_stem_ISO_8859_1_norwegian.c +297 -0
  28. data/ext/STEMMER_stem_ISO_8859_1_porter.c +749 -0
  29. data/ext/STEMMER_stem_ISO_8859_1_portuguese.c +1017 -0
  30. data/ext/STEMMER_stem_ISO_8859_1_spanish.c +1093 -0
  31. data/ext/STEMMER_stem_ISO_8859_1_swedish.c +307 -0
  32. data/ext/STEMMER_stem_ISO_8859_2_romanian.c +998 -0
  33. data/ext/STEMMER_stem_KOI8_R_russian.c +700 -0
  34. data/ext/STEMMER_stem_UTF_8_danish.c +339 -0
  35. data/ext/STEMMER_stem_UTF_8_dutch.c +634 -0
  36. data/ext/STEMMER_stem_UTF_8_english.c +1125 -0
  37. data/ext/STEMMER_stem_UTF_8_finnish.c +768 -0
  38. data/ext/STEMMER_stem_UTF_8_french.c +1256 -0
  39. data/ext/STEMMER_stem_UTF_8_german.c +509 -0
  40. data/ext/STEMMER_stem_UTF_8_hungarian.c +1234 -0
  41. data/ext/STEMMER_stem_UTF_8_italian.c +1073 -0
  42. data/ext/STEMMER_stem_UTF_8_norwegian.c +299 -0
  43. data/ext/STEMMER_stem_UTF_8_porter.c +755 -0
  44. data/ext/STEMMER_stem_UTF_8_portuguese.c +1023 -0
  45. data/ext/STEMMER_stem_UTF_8_romanian.c +1004 -0
  46. data/ext/STEMMER_stem_UTF_8_russian.c +694 -0
  47. data/ext/STEMMER_stem_UTF_8_spanish.c +1097 -0
  48. data/ext/STEMMER_stem_UTF_8_swedish.c +309 -0
  49. data/ext/STEMMER_stem_UTF_8_turkish.c +2205 -0
  50. data/ext/STEMMER_utilities.c +478 -0
  51. data/ext/analysis.c +1710 -0
  52. data/ext/analysis.h +266 -0
  53. data/ext/api.h +26 -0
  54. data/ext/array.c +125 -0
  55. data/ext/array.h +62 -0
  56. data/ext/bitvector.c +96 -0
  57. data/ext/bitvector.h +594 -0
  58. data/ext/bzlib.h +282 -0
  59. data/ext/bzlib_private.h +503 -0
  60. data/ext/compound_io.c +384 -0
  61. data/ext/config.h +52 -0
  62. data/ext/document.c +159 -0
  63. data/ext/document.h +63 -0
  64. data/ext/except.c +102 -0
  65. data/ext/except.h +176 -0
  66. data/ext/extconf.rb +15 -0
  67. data/ext/ferret.c +416 -0
  68. data/ext/ferret.h +94 -0
  69. data/ext/field_index.c +262 -0
  70. data/ext/field_index.h +52 -0
  71. data/ext/filter.c +157 -0
  72. data/ext/fs_store.c +493 -0
  73. data/ext/global.c +458 -0
  74. data/ext/global.h +302 -0
  75. data/ext/hash.c +524 -0
  76. data/ext/hash.h +515 -0
  77. data/ext/hashset.c +192 -0
  78. data/ext/hashset.h +215 -0
  79. data/ext/header.h +58 -0
  80. data/ext/helper.c +63 -0
  81. data/ext/helper.h +21 -0
  82. data/ext/index.c +6804 -0
  83. data/ext/index.h +935 -0
  84. data/ext/internal.h +1019 -0
  85. data/ext/lang.c +10 -0
  86. data/ext/lang.h +68 -0
  87. data/ext/libstemmer.h +79 -0
  88. data/ext/mempool.c +88 -0
  89. data/ext/mempool.h +43 -0
  90. data/ext/modules.h +190 -0
  91. data/ext/multimapper.c +351 -0
  92. data/ext/multimapper.h +60 -0
  93. data/ext/posh.c +1006 -0
  94. data/ext/posh.h +973 -0
  95. data/ext/priorityqueue.c +149 -0
  96. data/ext/priorityqueue.h +155 -0
  97. data/ext/q_boolean.c +1621 -0
  98. data/ext/q_const_score.c +162 -0
  99. data/ext/q_filtered_query.c +212 -0
  100. data/ext/q_fuzzy.c +280 -0
  101. data/ext/q_match_all.c +149 -0
  102. data/ext/q_multi_term.c +673 -0
  103. data/ext/q_parser.c +3103 -0
  104. data/ext/q_phrase.c +1206 -0
  105. data/ext/q_prefix.c +98 -0
  106. data/ext/q_range.c +682 -0
  107. data/ext/q_span.c +2390 -0
  108. data/ext/q_term.c +337 -0
  109. data/ext/q_wildcard.c +167 -0
  110. data/ext/r_analysis.c +2626 -0
  111. data/ext/r_index.c +3468 -0
  112. data/ext/r_qparser.c +635 -0
  113. data/ext/r_search.c +4490 -0
  114. data/ext/r_store.c +513 -0
  115. data/ext/r_utils.c +1131 -0
  116. data/ext/ram_store.c +476 -0
  117. data/ext/scanner.c +895 -0
  118. data/ext/scanner.h +36 -0
  119. data/ext/scanner_mb.c +6701 -0
  120. data/ext/scanner_utf8.c +4415 -0
  121. data/ext/search.c +1864 -0
  122. data/ext/search.h +953 -0
  123. data/ext/similarity.c +151 -0
  124. data/ext/similarity.h +89 -0
  125. data/ext/sort.c +786 -0
  126. data/ext/stem_ISO_8859_1_danish.h +16 -0
  127. data/ext/stem_ISO_8859_1_dutch.h +16 -0
  128. data/ext/stem_ISO_8859_1_english.h +16 -0
  129. data/ext/stem_ISO_8859_1_finnish.h +16 -0
  130. data/ext/stem_ISO_8859_1_french.h +16 -0
  131. data/ext/stem_ISO_8859_1_german.h +16 -0
  132. data/ext/stem_ISO_8859_1_hungarian.h +16 -0
  133. data/ext/stem_ISO_8859_1_italian.h +16 -0
  134. data/ext/stem_ISO_8859_1_norwegian.h +16 -0
  135. data/ext/stem_ISO_8859_1_porter.h +16 -0
  136. data/ext/stem_ISO_8859_1_portuguese.h +16 -0
  137. data/ext/stem_ISO_8859_1_spanish.h +16 -0
  138. data/ext/stem_ISO_8859_1_swedish.h +16 -0
  139. data/ext/stem_ISO_8859_2_romanian.h +16 -0
  140. data/ext/stem_KOI8_R_russian.h +16 -0
  141. data/ext/stem_UTF_8_danish.h +16 -0
  142. data/ext/stem_UTF_8_dutch.h +16 -0
  143. data/ext/stem_UTF_8_english.h +16 -0
  144. data/ext/stem_UTF_8_finnish.h +16 -0
  145. data/ext/stem_UTF_8_french.h +16 -0
  146. data/ext/stem_UTF_8_german.h +16 -0
  147. data/ext/stem_UTF_8_hungarian.h +16 -0
  148. data/ext/stem_UTF_8_italian.h +16 -0
  149. data/ext/stem_UTF_8_norwegian.h +16 -0
  150. data/ext/stem_UTF_8_porter.h +16 -0
  151. data/ext/stem_UTF_8_portuguese.h +16 -0
  152. data/ext/stem_UTF_8_romanian.h +16 -0
  153. data/ext/stem_UTF_8_russian.h +16 -0
  154. data/ext/stem_UTF_8_spanish.h +16 -0
  155. data/ext/stem_UTF_8_swedish.h +16 -0
  156. data/ext/stem_UTF_8_turkish.h +16 -0
  157. data/ext/stopwords.c +410 -0
  158. data/ext/store.c +698 -0
  159. data/ext/store.h +799 -0
  160. data/ext/symbol.c +10 -0
  161. data/ext/symbol.h +23 -0
  162. data/ext/term_vectors.c +73 -0
  163. data/ext/threading.h +31 -0
  164. data/ext/win32.h +62 -0
  165. data/lib/ferret.rb +30 -0
  166. data/lib/ferret/browser.rb +246 -0
  167. data/lib/ferret/browser/s/global.js +192 -0
  168. data/lib/ferret/browser/s/style.css +148 -0
  169. data/lib/ferret/browser/views/document/list.rhtml +49 -0
  170. data/lib/ferret/browser/views/document/show.rhtml +27 -0
  171. data/lib/ferret/browser/views/error/index.rhtml +7 -0
  172. data/lib/ferret/browser/views/help/index.rhtml +8 -0
  173. data/lib/ferret/browser/views/home/index.rhtml +29 -0
  174. data/lib/ferret/browser/views/layout.rhtml +22 -0
  175. data/lib/ferret/browser/views/term-vector/index.rhtml +4 -0
  176. data/lib/ferret/browser/views/term/index.rhtml +199 -0
  177. data/lib/ferret/browser/views/term/termdocs.rhtml +1 -0
  178. data/lib/ferret/browser/webrick.rb +14 -0
  179. data/lib/ferret/document.rb +130 -0
  180. data/lib/ferret/field_infos.rb +44 -0
  181. data/lib/ferret/field_symbol.rb +87 -0
  182. data/lib/ferret/index.rb +973 -0
  183. data/lib/ferret/number_tools.rb +157 -0
  184. data/lib/ferret/version.rb +3 -0
  185. data/setup.rb +1555 -0
  186. data/test/long_running/largefile/tc_largefile.rb +46 -0
  187. data/test/test_all.rb +5 -0
  188. data/test/test_helper.rb +29 -0
  189. data/test/test_installed.rb +1 -0
  190. data/test/threading/number_to_spoken.rb +132 -0
  191. data/test/threading/thread_safety_index_test.rb +88 -0
  192. data/test/threading/thread_safety_read_write_test.rb +73 -0
  193. data/test/threading/thread_safety_test.rb +133 -0
  194. data/test/unit/analysis/tc_analyzer.rb +550 -0
  195. data/test/unit/analysis/tc_token_stream.rb +653 -0
  196. data/test/unit/index/tc_index.rb +867 -0
  197. data/test/unit/index/tc_index_reader.rb +699 -0
  198. data/test/unit/index/tc_index_writer.rb +447 -0
  199. data/test/unit/index/th_doc.rb +332 -0
  200. data/test/unit/query_parser/tc_query_parser.rb +238 -0
  201. data/test/unit/search/tc_filter.rb +156 -0
  202. data/test/unit/search/tc_fuzzy_query.rb +147 -0
  203. data/test/unit/search/tc_index_searcher.rb +67 -0
  204. data/test/unit/search/tc_multi_searcher.rb +128 -0
  205. data/test/unit/search/tc_multiple_search_requests.rb +58 -0
  206. data/test/unit/search/tc_search_and_sort.rb +179 -0
  207. data/test/unit/search/tc_sort.rb +49 -0
  208. data/test/unit/search/tc_sort_field.rb +27 -0
  209. data/test/unit/search/tc_spans.rb +190 -0
  210. data/test/unit/search/tm_searcher.rb +436 -0
  211. data/test/unit/store/tc_fs_store.rb +115 -0
  212. data/test/unit/store/tc_ram_store.rb +35 -0
  213. data/test/unit/store/tm_store.rb +34 -0
  214. data/test/unit/store/tm_store_lock.rb +68 -0
  215. data/test/unit/tc_document.rb +81 -0
  216. data/test/unit/tc_field_symbol.rb +26 -0
  217. data/test/unit/ts_analysis.rb +2 -0
  218. data/test/unit/ts_index.rb +2 -0
  219. data/test/unit/ts_largefile.rb +4 -0
  220. data/test/unit/ts_query_parser.rb +2 -0
  221. data/test/unit/ts_search.rb +2 -0
  222. data/test/unit/ts_store.rb +2 -0
  223. data/test/unit/ts_utils.rb +2 -0
  224. data/test/unit/utils/tc_bit_vector.rb +295 -0
  225. data/test/unit/utils/tc_number_tools.rb +117 -0
  226. data/test/unit/utils/tc_priority_queue.rb +106 -0
  227. data/test/utils/content_generator.rb +226 -0
  228. metadata +319 -0
@@ -0,0 +1,22 @@
1
+ <?xml version="1.0" encoding="utf-8"?>
2
+ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd" >
3
+ <html xmlns="http://www.w3.org/1999/xhtml">
4
+ <head>
5
+ <title>Ferret-Browser:: <%= @path %></title>
6
+ <link rel="shortcut icon" type="image/x-icon" href="/s/i/favicon.ico" />
7
+ <link rel="stylesheet" type="text/css" href="/s/style.css" />
8
+ <script type="text/javascript" src="/s/global.js"></script>
9
+ </head>
10
+ <body id="<%= options[:controller] %>">
11
+ <ul id="top-menu">
12
+ <li class="home"><a href="/">Home</a></li>
13
+ <li class="document"><a href="/document">Documents</a></li>
14
+ <li class="term"><a href="/term">Terms</a></li>
15
+ <li class="term-vector"><a href="/term-vector">Term Vectors</a></li>
16
+ <li class="help"><a href="/help">Help</a></li>
17
+ </ul>
18
+ <div id="content">
19
+ <%= content %>
20
+ </div>
21
+ </body>
22
+ </html>
@@ -0,0 +1,4 @@
1
+ <h3>Term Vectors</h3>
2
+ <p>
3
+ Nothing to see here yet
4
+ </p>
@@ -0,0 +1,199 @@
1
+ <h3>Terms</h3>
2
+ <form action="" method="get">
3
+ <label for="field">Choose a field:
4
+ <select id="field" name="field" onchange="location.href='/term/show/' + this.value;">
5
+ <option value="">--</option>
6
+ <% @reader.field_infos.each do |fi| next unless fi.indexed? %>
7
+ <option value="<%=fi.name%>" <%= 'selected="selected"' if @field == fi.name %>><%=fi.name%></option>
8
+ <% end %>
9
+ </select>
10
+ </label>
11
+ </form>
12
+ <hr/>
13
+ <% if @terms %>
14
+ <div style="float:left; padding-right:30px;">
15
+ <h4>Field: <%= @field %></h4>
16
+ <form action="" onsubmit="return findTerm(document.getElementById('choose-term').value);">
17
+ <input id="choose-term" type="text"/></form>
18
+ <hr/>
19
+ <div id="terms">
20
+ </div>
21
+ <hr/>
22
+ <form action="">
23
+ <input type="submit" value="Previous" onclick="return prevTerm();"/>
24
+ <input type="submit" value="Next" onclick="return nextTerm();"/><br/>
25
+ </form>
26
+ </div>
27
+ <div style="float:left; padding-right:30px;">
28
+ <h4>Documents
29
+ <input name="show-documents" type="checkbox" id="show-documents" onchange="displayDoc();"/></h4>
30
+ <div id="doc-display">
31
+ <form action="" onsubmit="return findDoc(document.getElementById('choose-doc').value);">
32
+ <input id="choose-doc" type="text"/></form>
33
+ <hr/>
34
+ <div id="documents">
35
+ </div>
36
+ <hr/>
37
+ <form action="">
38
+ <input type="submit" value="Previous" onclick="return prevDoc();"/>
39
+ <input type="submit" value="Next" onclick="return nextDoc();"/><br/>
40
+ </form>
41
+ </div>
42
+ </div>
43
+ <script type="text/javascript"><!--
44
+ function createSideHeaderedTable(content) {
45
+ var table = document.createElement("table");
46
+ table.setAttribute('cellpadding', "0");
47
+ table.setAttribute('cellspacing', "0");
48
+ table.className = "left-headed";
49
+ for (var i = 0; i < content.length; i++) {
50
+ var tr = document.createElement("tr");
51
+ var th = document.createElement("th");
52
+ th.appendChild(document.createTextNode(content[i][0]));
53
+ tr.appendChild(th);
54
+ var td = document.createElement("td");
55
+ var c = content[i][1];
56
+ td.appendChild(c instanceof Object ? c : document.createTextNode(c));
57
+ tr.appendChild(td);
58
+ table.appendChild(tr);
59
+ }
60
+ return table;
61
+ }
62
+
63
+ var terms = <%= @terms %>;
64
+ var numTerms = terms.length;
65
+ var termI = 0;
66
+ var termDiv = document.getElementById("terms");
67
+ var termChooserTxt = document.getElementById('choose-term');
68
+
69
+ var docs = null;
70
+ var numDocs = 0;
71
+ var docI = 0;
72
+ var docDiv = document.getElementById("documents");
73
+ var showDocs = document.getElementById("show-documents");
74
+ var docChooserTxt = document.getElementById('choose-doc');
75
+ var docDisplayDiv = document.getElementById('doc-display');
76
+
77
+ function getDocs() {
78
+ var req = new XMLHttpRequest();
79
+ req.open('GET', '/term/termdocs/<%=@field%>/' + terms[termI][0], true);
80
+ req.onreadystatechange = function() {
81
+ if (req.readyState == 4) {
82
+ docs = eval(req.responseText);
83
+ numDocs = docs.length;
84
+ docI = 0;
85
+ displayDoc();
86
+ }
87
+ };
88
+ req.send(/*no params*/null);
89
+ return false;
90
+ }
91
+ function displayDoc() {
92
+ if (showDocs.checked) {
93
+ docDisplayDiv.style.display = 'block';
94
+ if (docs == null) {
95
+ getDocs();
96
+ } else {
97
+ docChooserTxt.value = docs[docI][0];
98
+ var docLink = document.createElement('a');
99
+ docLink.setAttribute('href', "/document/show/" + docs[docI][0]);
100
+ docLink.appendChild(document.createTextNode(docs[docI][0]));
101
+ var table = createSideHeaderedTable([
102
+ ['index', '' + (docI + 1) + ' of ' + numDocs],
103
+ ['document', docLink],
104
+ ['number of occurrences', docs[docI][1]],
105
+ ['positions', docs[docI][2].join(',')]
106
+ ]);
107
+ if (docDiv.firstChild) docDiv.replaceChild(table, docDiv.firstChild);
108
+ else docDiv.appendChild(table);
109
+ }
110
+ } else {
111
+ docDiv.innerHTML = '';
112
+ docDisplayDiv.style.display = 'none';
113
+ }
114
+ return false;
115
+ }
116
+ function nextDoc() {
117
+ if (docI < numDocs - 1) {
118
+ docI++;
119
+ displayDoc();
120
+ } else alert('No more docs. Already at the end.');
121
+ return false;
122
+ }
123
+ function prevDoc() {
124
+ if (docI > 0) {
125
+ docI -= 1;
126
+ displayDoc();
127
+ } else alert('No more docs. Already at the start.');
128
+ return false;
129
+ }
130
+ function findDoc(doc) {
131
+ if (docs && docs.length > 0) {
132
+ docI = bsearch(docs, doc, function(a, b) {return a[0] < b});
133
+ if (docI >= docs.length) docI -= 1;
134
+ displayDoc();
135
+ }
136
+ return false;
137
+ }
138
+ function displayTerm() {
139
+ docs = null;
140
+ termChooserTxt.value = terms[termI][0];
141
+ var table = createSideHeaderedTable([
142
+ ['index', '' + (termI + 1) + ' of ' + numTerms],
143
+ ['term', terms[termI][0]],
144
+ ['number of documents', terms[termI][1]],
145
+ ]);
146
+ termDiv.replaceChild(table, termDiv.firstChild);
147
+ displayDoc();
148
+ }
149
+ function nextTerm() {
150
+ if (termI < numTerms - 1) {
151
+ termI++;
152
+ displayTerm();
153
+ } else alert('No more terms. Already at the end.');
154
+ return false;
155
+ }
156
+ function prevTerm() {
157
+ if (termI > 0) {
158
+ termI -= 1;
159
+ displayTerm();
160
+ } else alert('No more terms. Already at the start.');
161
+ return false;
162
+ }
163
+ function findTerm(term) {
164
+ if (terms && terms.length > 0) {
165
+ termI = bsearch(terms, term, function(a, b) {return a[0] < b});
166
+ var match = (terms[termI]||[])[0];
167
+ if (term != match) {
168
+ alert('Term <%=@field%>:' + term + ' not found in index');
169
+ } else {
170
+ displayTerm();
171
+ }
172
+ }
173
+ return false;
174
+ }
175
+
176
+ function TermSuggestionProvider() {
177
+ }
178
+ TermSuggestionProvider.prototype.requestSuggestions = function(oAutoSuggestControl, bTypeAhead) {
179
+ var aSuggestions = new Array();
180
+ var sTextboxValue = oAutoSuggestControl.textbox.value;
181
+
182
+ if (sTextboxValue.length > 0) {
183
+ start = bsearch(terms, sTextboxValue, function(a, b) {return a[0] < b});
184
+ for (var i = start; i < terms.length
185
+ && terms[i][0].indexOf(sTextboxValue) == 0
186
+ && aSuggestions.length < 10; i++) {
187
+ aSuggestions.push(terms[i][0]);
188
+ }
189
+ oAutoSuggestControl.autosuggest(aSuggestions, bTypeAhead);
190
+ }
191
+ };
192
+ window.onload = function() {
193
+ var oTermTextbox = new AutoSuggestControl(termChooserTxt, new TermSuggestionProvider());
194
+ }
195
+ displayTerm();
196
+ //-->
197
+ </script>
198
+ <% end %>
199
+ <hr/>
@@ -0,0 +1 @@
1
+ <%= @reader.term_positions_for(@field, @term).to_json(:fast) %>
@@ -0,0 +1,14 @@
1
+ module WEBrick
2
+ class FerretBrowserHandler < WEBrick::HTTPServlet::AbstractServlet
3
+ # Creates a FerretBrowserHandler, which answers for the application
4
+ # within +klass+.
5
+ def initialize(server, reader, path)
6
+ super(server)
7
+ @delegator = Ferret::Browser::Delegator.new(reader, path)
8
+ end
9
+ # Handler for WEBrick requests (also aliased as do_POST).
10
+ def do_GET(req, res)
11
+ res.status, res.content_type, res.body = @delegator.run(req.meta_vars)
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,130 @@
1
+ module Ferret
2
+ # Instead of using documents to add data to an index you can use Hashes and
3
+ # Arrays. The only real benefits of using a Document over a Hash are pretty
4
+ # printing and the boost attribute. You can add the boost attribute to
5
+ # Hashes and arrays using the BoostMixin. For example;
6
+ #
7
+ # class Hash
8
+ # include BoostMixin
9
+ # end
10
+ #
11
+ # class Array
12
+ # include BoostMixin
13
+ # end
14
+ #
15
+ # class String
16
+ # include BoostMixin
17
+ # end
18
+ module BoostMixin
19
+ attr_accessor :boost
20
+ end
21
+
22
+ # Documents are the unit of indexing and search.
23
+ #
24
+ # A Document is a set of fields. Each field has a name and an array of
25
+ # textual values. If you are coming from a Lucene background you should note
26
+ # that Fields don't have any properties except for the boost property. You
27
+ # should use the Ferret::Index::FieldInfos class to set field properties
28
+ # across the whole index instead.
29
+ #
30
+ # === Boost
31
+ #
32
+ # The boost attribute makes a Document more important in the index. That is,
33
+ # you can increase the score of a match for queries that match a particular
34
+ # document, making it more likely to appear at the top of search results.
35
+ # You may, for example, want to boost products that have a higher user
36
+ # rating so that they are more likely to appear in search results.
37
+ #
38
+ # Note: that fields which are _not_ stored (see Ferret::Index::FieldInfos)
39
+ # are _not_ available in documents retrieved from the index, e.g.
40
+ # Ferret::Search::Searcher#doc or Ferret::Index::IndexReader#doc.
41
+ #
42
+ # Note: that modifying a Document retrieved from the index will not modify
43
+ # the document contained within the index. You need to delete the old
44
+ # version of the document and add the new version of the document.
45
+ class Document < Hash
46
+ include BoostMixin
47
+
48
+ # Create a new Document object with a boost. The boost defaults to 1.0.
49
+ def initialize(boost = 1.0)
50
+ @boost = boost
51
+ end
52
+
53
+ # Return true if the documents are equal, ie they have the same fields
54
+ def eql?(o)
55
+ return (o.is_a? Document and (o.boost == @boost) and
56
+ (self.keys == o.keys) and (self.values == o.values))
57
+ end
58
+ alias :== :eql?
59
+
60
+ # Create a string representation of the document
61
+ def to_s
62
+ buf = ["Document {"]
63
+ self.keys.sort_by {|key| key.to_s}.each do |key|
64
+ val = self[key]
65
+ val_str = if val.instance_of? Array then %{["#{val.join('", "')}"]}
66
+ elsif val.is_a? Field then val.to_s
67
+ else %{"#{val.to_s}"}
68
+ end
69
+ buf << " :#{key} => #{val_str}"
70
+ end
71
+ buf << ["}#{@boost == 1.0 ? "" : "^" + @boost.to_s}"]
72
+ return buf.join("\n")
73
+ end
74
+ end
75
+
76
+ # A Field is a section of a Document. A Field is basically an array with a
77
+ # boost attribute. It also provides pretty printing of the field with the
78
+ # #to_s method.
79
+ #
80
+ # === Boost
81
+ #
82
+ # The boost attribute makes a field more important in the index. That is,
83
+ # you can increase the score of a match for queries that match terms in a
84
+ # boosted field. You may, for example, want to boost a title field so that
85
+ # matches that match in the :title field score more highly than matches that
86
+ # match in the :contents field.
87
+ #
88
+ # Note: If you'd like to use boosted fields without having to use
89
+ # the Field class you can just include the BoostMixin in the Array class.
90
+ # See BoostMixin.
91
+ class Field < Array
92
+ include BoostMixin
93
+
94
+ # Create a new Field object. You can pass data to the field as either a
95
+ # string;
96
+ #
97
+ # f = Field.new("This is the fields data")
98
+ #
99
+ # or as an array of strings;
100
+ #
101
+ # f = Field.new(["this", "is", "an", "array", "of", "field", "data"])
102
+ #
103
+ # Of course Fields can also be boosted;
104
+ #
105
+ # f = Field.new("field data", 1000.0)
106
+ def initialize(data = [], boost = 1.0)
107
+ @boost = boost
108
+ if data.is_a? Array
109
+ data.each {|v| self << v}
110
+ else
111
+ self << data.to_s
112
+ end
113
+ end
114
+
115
+ def eql?(o)
116
+ return (o.is_a? Field and (o.boost == @boost) and super(o))
117
+ end
118
+ alias :== :eql?
119
+
120
+ def +(o)
121
+ return Field.new(super(o), self.boost)
122
+ end
123
+
124
+ def to_s
125
+ buf = %{["#{self.join('", "')}"]}
126
+ buf << "^#@boost" if @boost != 1.0
127
+ return buf
128
+ end
129
+ end
130
+ end
@@ -0,0 +1,44 @@
1
+ require 'yaml'
2
+
3
+ module Ferret::Index
4
+ class FieldInfos
5
+ # Load FieldInfos from a YAML file. The YAML file should look something like
6
+ # this:
7
+ # default:
8
+ # store: :yes
9
+ # index: :yes
10
+ # term_vector: :no
11
+ #
12
+ # fields:
13
+ # id:
14
+ # index: :untokenized
15
+ # term_vector: :no
16
+ #
17
+ # title:
18
+ # boost: 20.0
19
+ # term_vector: :no
20
+ #
21
+ # content:
22
+ # term_vector: :with_positions_offsets
23
+ #
24
+ def self.load(yaml_str)
25
+ info = YAML.load(yaml_str)
26
+ convert_strings_to_symbols(info)
27
+ fis = FieldInfos.new(info[:default])
28
+ fields = info[:fields]
29
+ fields.keys.each {|key| fis.add_field(key, fields[key])} if fields
30
+ fis
31
+ end
32
+
33
+ private
34
+ def self.convert_strings_to_symbols(hash)
35
+ hash.keys.each do |key|
36
+ convert_strings_to_symbols(hash[key]) if hash[key].is_a?(Hash)
37
+ if key.is_a?(String)
38
+ hash[key.intern] = hash[key]
39
+ hash.delete(key)
40
+ end
41
+ end
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,87 @@
1
+ module Ferret
2
+ FIELD_TYPES = %w(integer float string byte).map{|t| t.to_sym}
3
+
4
+ # BlankSlate is a class with no instance methods except for __send__ and
5
+ # __id__. It is useful for creating proxy classes. It is currently used by
6
+ # the FieldSymbol class which is a proxy to the Symbol class
7
+ class BlankSlate
8
+ instance_methods.each { |m| undef_method m unless m =~ /^__/ }
9
+ end
10
+
11
+ # The FieldSymbolMethods module contains the methods that are added to both
12
+ # the Symbol class and the FieldSymbol class. These methods allow you to set
13
+ # the type easily set the type of a field by calling a method on a symbol.
14
+ #
15
+ # Right now this is only useful for Sorting and grouping, but some day Ferret
16
+ # may have typed fields, in which case these this methods will come in handy.
17
+ #
18
+ # The available types are specified in Ferret::FIELD_TYPES.
19
+ #
20
+ # == Examples
21
+ #
22
+ # index.search(query, :sort => :title.string.desc)
23
+ #
24
+ # index.search(query, :sort => [:price.float, :count.integer.desc])
25
+ #
26
+ # index.search(query, :group_by => :catalogue.string)
27
+ #
28
+ # == Note
29
+ #
30
+ # If you set the field type multiple times, the last type specified will be
31
+ # the type used. For example;
32
+ #
33
+ # puts :title.integer.float.byte.string.type.inspect # => :string
34
+ #
35
+ # Calling #desc twice will set desc? to false
36
+ #
37
+ # puts :title.desc? # => false
38
+ # puts :title.desc.desc? # => true
39
+ # puts :title.desc.desc.desc? # => false
40
+ module FieldSymbolMethods
41
+ FIELD_TYPES.each do |method|
42
+ define_method(method) do
43
+ fsym = FieldSymbol.new(self, respond_to?(:desc?) ? desc? : false)
44
+ fsym.type = method
45
+ fsym
46
+ end
47
+ end
48
+
49
+ # Set a field to be a descending field. This only makes sense in sort
50
+ # specifications.
51
+ def desc
52
+ fsym = FieldSymbol.new(self, respond_to?(:desc?) ? !desc? : true)
53
+ fsym.type = type if respond_to? :type
54
+ fsym
55
+ end
56
+
57
+ # Return whether or not this field should be a descending field
58
+ def desc?
59
+ @desc == true
60
+ end
61
+
62
+ # Return the type of this field
63
+ def type
64
+ @type || nil
65
+ end
66
+ end
67
+
68
+ # See FieldSymbolMethods
69
+ class FieldSymbol < BlankSlate
70
+ include FieldSymbolMethods
71
+ def initialize(symbol, desc = false)
72
+ @symbol = symbol
73
+ @desc = desc
74
+ end
75
+
76
+ def method_missing(method, *args)
77
+ @symbol.__send__(method, *args)
78
+ end
79
+
80
+ attr_writer :type, :desc
81
+ end
82
+ end
83
+
84
+ # See FieldSymbolMethods
85
+ class Symbol
86
+ include Ferret::FieldSymbolMethods
87
+ end