jk-ferret 0.11.8.2
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +24 -0
- data/MIT-LICENSE +20 -0
- data/README +90 -0
- data/RELEASE_CHANGES +137 -0
- data/RELEASE_NOTES +60 -0
- data/Rakefile +443 -0
- data/TODO +109 -0
- data/TUTORIAL +231 -0
- data/bin/ferret-browser +79 -0
- data/ext/BZLIB_blocksort.c +1094 -0
- data/ext/BZLIB_bzlib.c +1578 -0
- data/ext/BZLIB_compress.c +672 -0
- data/ext/BZLIB_crctable.c +104 -0
- data/ext/BZLIB_decompress.c +626 -0
- data/ext/BZLIB_huffman.c +205 -0
- data/ext/BZLIB_randtable.c +84 -0
- data/ext/STEMMER_api.c +66 -0
- data/ext/STEMMER_libstemmer.c +93 -0
- data/ext/STEMMER_stem_ISO_8859_1_danish.c +337 -0
- data/ext/STEMMER_stem_ISO_8859_1_dutch.c +624 -0
- data/ext/STEMMER_stem_ISO_8859_1_english.c +1117 -0
- data/ext/STEMMER_stem_ISO_8859_1_finnish.c +762 -0
- data/ext/STEMMER_stem_ISO_8859_1_french.c +1246 -0
- data/ext/STEMMER_stem_ISO_8859_1_german.c +503 -0
- data/ext/STEMMER_stem_ISO_8859_1_hungarian.c +1230 -0
- data/ext/STEMMER_stem_ISO_8859_1_italian.c +1065 -0
- data/ext/STEMMER_stem_ISO_8859_1_norwegian.c +297 -0
- data/ext/STEMMER_stem_ISO_8859_1_porter.c +749 -0
- data/ext/STEMMER_stem_ISO_8859_1_portuguese.c +1017 -0
- data/ext/STEMMER_stem_ISO_8859_1_spanish.c +1093 -0
- data/ext/STEMMER_stem_ISO_8859_1_swedish.c +307 -0
- data/ext/STEMMER_stem_ISO_8859_2_romanian.c +998 -0
- data/ext/STEMMER_stem_KOI8_R_russian.c +700 -0
- data/ext/STEMMER_stem_UTF_8_danish.c +339 -0
- data/ext/STEMMER_stem_UTF_8_dutch.c +634 -0
- data/ext/STEMMER_stem_UTF_8_english.c +1125 -0
- data/ext/STEMMER_stem_UTF_8_finnish.c +768 -0
- data/ext/STEMMER_stem_UTF_8_french.c +1256 -0
- data/ext/STEMMER_stem_UTF_8_german.c +509 -0
- data/ext/STEMMER_stem_UTF_8_hungarian.c +1234 -0
- data/ext/STEMMER_stem_UTF_8_italian.c +1073 -0
- data/ext/STEMMER_stem_UTF_8_norwegian.c +299 -0
- data/ext/STEMMER_stem_UTF_8_porter.c +755 -0
- data/ext/STEMMER_stem_UTF_8_portuguese.c +1023 -0
- data/ext/STEMMER_stem_UTF_8_romanian.c +1004 -0
- data/ext/STEMMER_stem_UTF_8_russian.c +694 -0
- data/ext/STEMMER_stem_UTF_8_spanish.c +1097 -0
- data/ext/STEMMER_stem_UTF_8_swedish.c +309 -0
- data/ext/STEMMER_stem_UTF_8_turkish.c +2205 -0
- data/ext/STEMMER_utilities.c +478 -0
- data/ext/analysis.c +1710 -0
- data/ext/analysis.h +266 -0
- data/ext/api.h +26 -0
- data/ext/array.c +125 -0
- data/ext/array.h +62 -0
- data/ext/bitvector.c +96 -0
- data/ext/bitvector.h +594 -0
- data/ext/bzlib.h +282 -0
- data/ext/bzlib_private.h +503 -0
- data/ext/compound_io.c +384 -0
- data/ext/config.h +52 -0
- data/ext/document.c +159 -0
- data/ext/document.h +63 -0
- data/ext/except.c +102 -0
- data/ext/except.h +176 -0
- data/ext/extconf.rb +15 -0
- data/ext/ferret.c +416 -0
- data/ext/ferret.h +94 -0
- data/ext/field_index.c +262 -0
- data/ext/field_index.h +52 -0
- data/ext/filter.c +157 -0
- data/ext/fs_store.c +493 -0
- data/ext/global.c +458 -0
- data/ext/global.h +302 -0
- data/ext/hash.c +524 -0
- data/ext/hash.h +515 -0
- data/ext/hashset.c +192 -0
- data/ext/hashset.h +215 -0
- data/ext/header.h +58 -0
- data/ext/helper.c +63 -0
- data/ext/helper.h +21 -0
- data/ext/index.c +6804 -0
- data/ext/index.h +935 -0
- data/ext/internal.h +1019 -0
- data/ext/lang.c +10 -0
- data/ext/lang.h +68 -0
- data/ext/libstemmer.h +79 -0
- data/ext/mempool.c +88 -0
- data/ext/mempool.h +43 -0
- data/ext/modules.h +190 -0
- data/ext/multimapper.c +351 -0
- data/ext/multimapper.h +60 -0
- data/ext/posh.c +1006 -0
- data/ext/posh.h +973 -0
- data/ext/priorityqueue.c +149 -0
- data/ext/priorityqueue.h +155 -0
- data/ext/q_boolean.c +1621 -0
- data/ext/q_const_score.c +162 -0
- data/ext/q_filtered_query.c +212 -0
- data/ext/q_fuzzy.c +280 -0
- data/ext/q_match_all.c +149 -0
- data/ext/q_multi_term.c +673 -0
- data/ext/q_parser.c +3103 -0
- data/ext/q_phrase.c +1206 -0
- data/ext/q_prefix.c +98 -0
- data/ext/q_range.c +682 -0
- data/ext/q_span.c +2390 -0
- data/ext/q_term.c +337 -0
- data/ext/q_wildcard.c +167 -0
- data/ext/r_analysis.c +2626 -0
- data/ext/r_index.c +3468 -0
- data/ext/r_qparser.c +635 -0
- data/ext/r_search.c +4490 -0
- data/ext/r_store.c +513 -0
- data/ext/r_utils.c +1131 -0
- data/ext/ram_store.c +476 -0
- data/ext/scanner.c +895 -0
- data/ext/scanner.h +36 -0
- data/ext/scanner_mb.c +6701 -0
- data/ext/scanner_utf8.c +4415 -0
- data/ext/search.c +1864 -0
- data/ext/search.h +953 -0
- data/ext/similarity.c +151 -0
- data/ext/similarity.h +89 -0
- data/ext/sort.c +786 -0
- data/ext/stem_ISO_8859_1_danish.h +16 -0
- data/ext/stem_ISO_8859_1_dutch.h +16 -0
- data/ext/stem_ISO_8859_1_english.h +16 -0
- data/ext/stem_ISO_8859_1_finnish.h +16 -0
- data/ext/stem_ISO_8859_1_french.h +16 -0
- data/ext/stem_ISO_8859_1_german.h +16 -0
- data/ext/stem_ISO_8859_1_hungarian.h +16 -0
- data/ext/stem_ISO_8859_1_italian.h +16 -0
- data/ext/stem_ISO_8859_1_norwegian.h +16 -0
- data/ext/stem_ISO_8859_1_porter.h +16 -0
- data/ext/stem_ISO_8859_1_portuguese.h +16 -0
- data/ext/stem_ISO_8859_1_spanish.h +16 -0
- data/ext/stem_ISO_8859_1_swedish.h +16 -0
- data/ext/stem_ISO_8859_2_romanian.h +16 -0
- data/ext/stem_KOI8_R_russian.h +16 -0
- data/ext/stem_UTF_8_danish.h +16 -0
- data/ext/stem_UTF_8_dutch.h +16 -0
- data/ext/stem_UTF_8_english.h +16 -0
- data/ext/stem_UTF_8_finnish.h +16 -0
- data/ext/stem_UTF_8_french.h +16 -0
- data/ext/stem_UTF_8_german.h +16 -0
- data/ext/stem_UTF_8_hungarian.h +16 -0
- data/ext/stem_UTF_8_italian.h +16 -0
- data/ext/stem_UTF_8_norwegian.h +16 -0
- data/ext/stem_UTF_8_porter.h +16 -0
- data/ext/stem_UTF_8_portuguese.h +16 -0
- data/ext/stem_UTF_8_romanian.h +16 -0
- data/ext/stem_UTF_8_russian.h +16 -0
- data/ext/stem_UTF_8_spanish.h +16 -0
- data/ext/stem_UTF_8_swedish.h +16 -0
- data/ext/stem_UTF_8_turkish.h +16 -0
- data/ext/stopwords.c +410 -0
- data/ext/store.c +698 -0
- data/ext/store.h +799 -0
- data/ext/symbol.c +10 -0
- data/ext/symbol.h +23 -0
- data/ext/term_vectors.c +73 -0
- data/ext/threading.h +31 -0
- data/ext/win32.h +62 -0
- data/lib/ferret.rb +30 -0
- data/lib/ferret/browser.rb +246 -0
- data/lib/ferret/browser/s/global.js +192 -0
- data/lib/ferret/browser/s/style.css +148 -0
- data/lib/ferret/browser/views/document/list.rhtml +49 -0
- data/lib/ferret/browser/views/document/show.rhtml +27 -0
- data/lib/ferret/browser/views/error/index.rhtml +7 -0
- data/lib/ferret/browser/views/help/index.rhtml +8 -0
- data/lib/ferret/browser/views/home/index.rhtml +29 -0
- data/lib/ferret/browser/views/layout.rhtml +22 -0
- data/lib/ferret/browser/views/term-vector/index.rhtml +4 -0
- data/lib/ferret/browser/views/term/index.rhtml +199 -0
- data/lib/ferret/browser/views/term/termdocs.rhtml +1 -0
- data/lib/ferret/browser/webrick.rb +14 -0
- data/lib/ferret/document.rb +130 -0
- data/lib/ferret/field_infos.rb +44 -0
- data/lib/ferret/field_symbol.rb +87 -0
- data/lib/ferret/index.rb +973 -0
- data/lib/ferret/number_tools.rb +157 -0
- data/lib/ferret/version.rb +3 -0
- data/setup.rb +1555 -0
- data/test/long_running/largefile/tc_largefile.rb +46 -0
- data/test/test_all.rb +5 -0
- data/test/test_helper.rb +29 -0
- data/test/test_installed.rb +1 -0
- data/test/threading/number_to_spoken.rb +132 -0
- data/test/threading/thread_safety_index_test.rb +88 -0
- data/test/threading/thread_safety_read_write_test.rb +73 -0
- data/test/threading/thread_safety_test.rb +133 -0
- data/test/unit/analysis/tc_analyzer.rb +550 -0
- data/test/unit/analysis/tc_token_stream.rb +653 -0
- data/test/unit/index/tc_index.rb +867 -0
- data/test/unit/index/tc_index_reader.rb +699 -0
- data/test/unit/index/tc_index_writer.rb +447 -0
- data/test/unit/index/th_doc.rb +332 -0
- data/test/unit/query_parser/tc_query_parser.rb +238 -0
- data/test/unit/search/tc_filter.rb +156 -0
- data/test/unit/search/tc_fuzzy_query.rb +147 -0
- data/test/unit/search/tc_index_searcher.rb +67 -0
- data/test/unit/search/tc_multi_searcher.rb +128 -0
- data/test/unit/search/tc_multiple_search_requests.rb +58 -0
- data/test/unit/search/tc_search_and_sort.rb +179 -0
- data/test/unit/search/tc_sort.rb +49 -0
- data/test/unit/search/tc_sort_field.rb +27 -0
- data/test/unit/search/tc_spans.rb +190 -0
- data/test/unit/search/tm_searcher.rb +436 -0
- data/test/unit/store/tc_fs_store.rb +115 -0
- data/test/unit/store/tc_ram_store.rb +35 -0
- data/test/unit/store/tm_store.rb +34 -0
- data/test/unit/store/tm_store_lock.rb +68 -0
- data/test/unit/tc_document.rb +81 -0
- data/test/unit/tc_field_symbol.rb +26 -0
- data/test/unit/ts_analysis.rb +2 -0
- data/test/unit/ts_index.rb +2 -0
- data/test/unit/ts_largefile.rb +4 -0
- data/test/unit/ts_query_parser.rb +2 -0
- data/test/unit/ts_search.rb +2 -0
- data/test/unit/ts_store.rb +2 -0
- data/test/unit/ts_utils.rb +2 -0
- data/test/unit/utils/tc_bit_vector.rb +295 -0
- data/test/unit/utils/tc_number_tools.rb +117 -0
- data/test/unit/utils/tc_priority_queue.rb +106 -0
- data/test/utils/content_generator.rb +226 -0
- metadata +319 -0
@@ -0,0 +1,22 @@
|
|
1
|
+
<?xml version="1.0" encoding="utf-8"?>
|
2
|
+
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd" >
|
3
|
+
<html xmlns="http://www.w3.org/1999/xhtml">
|
4
|
+
<head>
|
5
|
+
<title>Ferret-Browser:: <%= @path %></title>
|
6
|
+
<link rel="shortcut icon" type="image/x-icon" href="/s/i/favicon.ico" />
|
7
|
+
<link rel="stylesheet" type="text/css" href="/s/style.css" />
|
8
|
+
<script type="text/javascript" src="/s/global.js"></script>
|
9
|
+
</head>
|
10
|
+
<body id="<%= options[:controller] %>">
|
11
|
+
<ul id="top-menu">
|
12
|
+
<li class="home"><a href="/">Home</a></li>
|
13
|
+
<li class="document"><a href="/document">Documents</a></li>
|
14
|
+
<li class="term"><a href="/term">Terms</a></li>
|
15
|
+
<li class="term-vector"><a href="/term-vector">Term Vectors</a></li>
|
16
|
+
<li class="help"><a href="/help">Help</a></li>
|
17
|
+
</ul>
|
18
|
+
<div id="content">
|
19
|
+
<%= content %>
|
20
|
+
</div>
|
21
|
+
</body>
|
22
|
+
</html>
|
@@ -0,0 +1,199 @@
|
|
1
|
+
<h3>Terms</h3>
|
2
|
+
<form action="" method="get">
|
3
|
+
<label for="field">Choose a field:
|
4
|
+
<select id="field" name="field" onchange="location.href='/term/show/' + this.value;">
|
5
|
+
<option value="">--</option>
|
6
|
+
<% @reader.field_infos.each do |fi| next unless fi.indexed? %>
|
7
|
+
<option value="<%=fi.name%>" <%= 'selected="selected"' if @field == fi.name %>><%=fi.name%></option>
|
8
|
+
<% end %>
|
9
|
+
</select>
|
10
|
+
</label>
|
11
|
+
</form>
|
12
|
+
<hr/>
|
13
|
+
<% if @terms %>
|
14
|
+
<div style="float:left; padding-right:30px;">
|
15
|
+
<h4>Field: <%= @field %></h4>
|
16
|
+
<form action="" onsubmit="return findTerm(document.getElementById('choose-term').value);">
|
17
|
+
<input id="choose-term" type="text"/></form>
|
18
|
+
<hr/>
|
19
|
+
<div id="terms">
|
20
|
+
</div>
|
21
|
+
<hr/>
|
22
|
+
<form action="">
|
23
|
+
<input type="submit" value="Previous" onclick="return prevTerm();"/>
|
24
|
+
<input type="submit" value="Next" onclick="return nextTerm();"/><br/>
|
25
|
+
</form>
|
26
|
+
</div>
|
27
|
+
<div style="float:left; padding-right:30px;">
|
28
|
+
<h4>Documents
|
29
|
+
<input name="show-documents" type="checkbox" id="show-documents" onchange="displayDoc();"/></h4>
|
30
|
+
<div id="doc-display">
|
31
|
+
<form action="" onsubmit="return findDoc(document.getElementById('choose-doc').value);">
|
32
|
+
<input id="choose-doc" type="text"/></form>
|
33
|
+
<hr/>
|
34
|
+
<div id="documents">
|
35
|
+
</div>
|
36
|
+
<hr/>
|
37
|
+
<form action="">
|
38
|
+
<input type="submit" value="Previous" onclick="return prevDoc();"/>
|
39
|
+
<input type="submit" value="Next" onclick="return nextDoc();"/><br/>
|
40
|
+
</form>
|
41
|
+
</div>
|
42
|
+
</div>
|
43
|
+
<script type="text/javascript"><!--
|
44
|
+
function createSideHeaderedTable(content) {
|
45
|
+
var table = document.createElement("table");
|
46
|
+
table.setAttribute('cellpadding', "0");
|
47
|
+
table.setAttribute('cellspacing', "0");
|
48
|
+
table.className = "left-headed";
|
49
|
+
for (var i = 0; i < content.length; i++) {
|
50
|
+
var tr = document.createElement("tr");
|
51
|
+
var th = document.createElement("th");
|
52
|
+
th.appendChild(document.createTextNode(content[i][0]));
|
53
|
+
tr.appendChild(th);
|
54
|
+
var td = document.createElement("td");
|
55
|
+
var c = content[i][1];
|
56
|
+
td.appendChild(c instanceof Object ? c : document.createTextNode(c));
|
57
|
+
tr.appendChild(td);
|
58
|
+
table.appendChild(tr);
|
59
|
+
}
|
60
|
+
return table;
|
61
|
+
}
|
62
|
+
|
63
|
+
var terms = <%= @terms %>;
|
64
|
+
var numTerms = terms.length;
|
65
|
+
var termI = 0;
|
66
|
+
var termDiv = document.getElementById("terms");
|
67
|
+
var termChooserTxt = document.getElementById('choose-term');
|
68
|
+
|
69
|
+
var docs = null;
|
70
|
+
var numDocs = 0;
|
71
|
+
var docI = 0;
|
72
|
+
var docDiv = document.getElementById("documents");
|
73
|
+
var showDocs = document.getElementById("show-documents");
|
74
|
+
var docChooserTxt = document.getElementById('choose-doc');
|
75
|
+
var docDisplayDiv = document.getElementById('doc-display');
|
76
|
+
|
77
|
+
function getDocs() {
|
78
|
+
var req = new XMLHttpRequest();
|
79
|
+
req.open('GET', '/term/termdocs/<%=@field%>/' + terms[termI][0], true);
|
80
|
+
req.onreadystatechange = function() {
|
81
|
+
if (req.readyState == 4) {
|
82
|
+
docs = eval(req.responseText);
|
83
|
+
numDocs = docs.length;
|
84
|
+
docI = 0;
|
85
|
+
displayDoc();
|
86
|
+
}
|
87
|
+
};
|
88
|
+
req.send(/*no params*/null);
|
89
|
+
return false;
|
90
|
+
}
|
91
|
+
function displayDoc() {
|
92
|
+
if (showDocs.checked) {
|
93
|
+
docDisplayDiv.style.display = 'block';
|
94
|
+
if (docs == null) {
|
95
|
+
getDocs();
|
96
|
+
} else {
|
97
|
+
docChooserTxt.value = docs[docI][0];
|
98
|
+
var docLink = document.createElement('a');
|
99
|
+
docLink.setAttribute('href', "/document/show/" + docs[docI][0]);
|
100
|
+
docLink.appendChild(document.createTextNode(docs[docI][0]));
|
101
|
+
var table = createSideHeaderedTable([
|
102
|
+
['index', '' + (docI + 1) + ' of ' + numDocs],
|
103
|
+
['document', docLink],
|
104
|
+
['number of occurrences', docs[docI][1]],
|
105
|
+
['positions', docs[docI][2].join(',')]
|
106
|
+
]);
|
107
|
+
if (docDiv.firstChild) docDiv.replaceChild(table, docDiv.firstChild);
|
108
|
+
else docDiv.appendChild(table);
|
109
|
+
}
|
110
|
+
} else {
|
111
|
+
docDiv.innerHTML = '';
|
112
|
+
docDisplayDiv.style.display = 'none';
|
113
|
+
}
|
114
|
+
return false;
|
115
|
+
}
|
116
|
+
function nextDoc() {
|
117
|
+
if (docI < numDocs - 1) {
|
118
|
+
docI++;
|
119
|
+
displayDoc();
|
120
|
+
} else alert('No more docs. Already at the end.');
|
121
|
+
return false;
|
122
|
+
}
|
123
|
+
function prevDoc() {
|
124
|
+
if (docI > 0) {
|
125
|
+
docI -= 1;
|
126
|
+
displayDoc();
|
127
|
+
} else alert('No more docs. Already at the start.');
|
128
|
+
return false;
|
129
|
+
}
|
130
|
+
function findDoc(doc) {
|
131
|
+
if (docs && docs.length > 0) {
|
132
|
+
docI = bsearch(docs, doc, function(a, b) {return a[0] < b});
|
133
|
+
if (docI >= docs.length) docI -= 1;
|
134
|
+
displayDoc();
|
135
|
+
}
|
136
|
+
return false;
|
137
|
+
}
|
138
|
+
function displayTerm() {
|
139
|
+
docs = null;
|
140
|
+
termChooserTxt.value = terms[termI][0];
|
141
|
+
var table = createSideHeaderedTable([
|
142
|
+
['index', '' + (termI + 1) + ' of ' + numTerms],
|
143
|
+
['term', terms[termI][0]],
|
144
|
+
['number of documents', terms[termI][1]],
|
145
|
+
]);
|
146
|
+
termDiv.replaceChild(table, termDiv.firstChild);
|
147
|
+
displayDoc();
|
148
|
+
}
|
149
|
+
function nextTerm() {
|
150
|
+
if (termI < numTerms - 1) {
|
151
|
+
termI++;
|
152
|
+
displayTerm();
|
153
|
+
} else alert('No more terms. Already at the end.');
|
154
|
+
return false;
|
155
|
+
}
|
156
|
+
function prevTerm() {
|
157
|
+
if (termI > 0) {
|
158
|
+
termI -= 1;
|
159
|
+
displayTerm();
|
160
|
+
} else alert('No more terms. Already at the start.');
|
161
|
+
return false;
|
162
|
+
}
|
163
|
+
function findTerm(term) {
|
164
|
+
if (terms && terms.length > 0) {
|
165
|
+
termI = bsearch(terms, term, function(a, b) {return a[0] < b});
|
166
|
+
var match = (terms[termI]||[])[0];
|
167
|
+
if (term != match) {
|
168
|
+
alert('Term <%=@field%>:' + term + ' not found in index');
|
169
|
+
} else {
|
170
|
+
displayTerm();
|
171
|
+
}
|
172
|
+
}
|
173
|
+
return false;
|
174
|
+
}
|
175
|
+
|
176
|
+
function TermSuggestionProvider() {
|
177
|
+
}
|
178
|
+
TermSuggestionProvider.prototype.requestSuggestions = function(oAutoSuggestControl, bTypeAhead) {
|
179
|
+
var aSuggestions = new Array();
|
180
|
+
var sTextboxValue = oAutoSuggestControl.textbox.value;
|
181
|
+
|
182
|
+
if (sTextboxValue.length > 0) {
|
183
|
+
start = bsearch(terms, sTextboxValue, function(a, b) {return a[0] < b});
|
184
|
+
for (var i = start; i < terms.length
|
185
|
+
&& terms[i][0].indexOf(sTextboxValue) == 0
|
186
|
+
&& aSuggestions.length < 10; i++) {
|
187
|
+
aSuggestions.push(terms[i][0]);
|
188
|
+
}
|
189
|
+
oAutoSuggestControl.autosuggest(aSuggestions, bTypeAhead);
|
190
|
+
}
|
191
|
+
};
|
192
|
+
window.onload = function() {
|
193
|
+
var oTermTextbox = new AutoSuggestControl(termChooserTxt, new TermSuggestionProvider());
|
194
|
+
}
|
195
|
+
displayTerm();
|
196
|
+
//-->
|
197
|
+
</script>
|
198
|
+
<% end %>
|
199
|
+
<hr/>
|
@@ -0,0 +1 @@
|
|
1
|
+
<%= @reader.term_positions_for(@field, @term).to_json(:fast) %>
|
@@ -0,0 +1,14 @@
|
|
1
|
+
module WEBrick
|
2
|
+
class FerretBrowserHandler < WEBrick::HTTPServlet::AbstractServlet
|
3
|
+
# Creates a FerretBrowserHandler, which answers for the application
|
4
|
+
# within +klass+.
|
5
|
+
def initialize(server, reader, path)
|
6
|
+
super(server)
|
7
|
+
@delegator = Ferret::Browser::Delegator.new(reader, path)
|
8
|
+
end
|
9
|
+
# Handler for WEBrick requests (also aliased as do_POST).
|
10
|
+
def do_GET(req, res)
|
11
|
+
res.status, res.content_type, res.body = @delegator.run(req.meta_vars)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
@@ -0,0 +1,130 @@
|
|
1
|
+
module Ferret
|
2
|
+
# Instead of using documents to add data to an index you can use Hashes and
|
3
|
+
# Arrays. The only real benefits of using a Document over a Hash are pretty
|
4
|
+
# printing and the boost attribute. You can add the boost attribute to
|
5
|
+
# Hashes and arrays using the BoostMixin. For example;
|
6
|
+
#
|
7
|
+
# class Hash
|
8
|
+
# include BoostMixin
|
9
|
+
# end
|
10
|
+
#
|
11
|
+
# class Array
|
12
|
+
# include BoostMixin
|
13
|
+
# end
|
14
|
+
#
|
15
|
+
# class String
|
16
|
+
# include BoostMixin
|
17
|
+
# end
|
18
|
+
module BoostMixin
|
19
|
+
attr_accessor :boost
|
20
|
+
end
|
21
|
+
|
22
|
+
# Documents are the unit of indexing and search.
|
23
|
+
#
|
24
|
+
# A Document is a set of fields. Each field has a name and an array of
|
25
|
+
# textual values. If you are coming from a Lucene background you should note
|
26
|
+
# that Fields don't have any properties except for the boost property. You
|
27
|
+
# should use the Ferret::Index::FieldInfos class to set field properties
|
28
|
+
# across the whole index instead.
|
29
|
+
#
|
30
|
+
# === Boost
|
31
|
+
#
|
32
|
+
# The boost attribute makes a Document more important in the index. That is,
|
33
|
+
# you can increase the score of a match for queries that match a particular
|
34
|
+
# document, making it more likely to appear at the top of search results.
|
35
|
+
# You may, for example, want to boost products that have a higher user
|
36
|
+
# rating so that they are more likely to appear in search results.
|
37
|
+
#
|
38
|
+
# Note: that fields which are _not_ stored (see Ferret::Index::FieldInfos)
|
39
|
+
# are _not_ available in documents retrieved from the index, e.g.
|
40
|
+
# Ferret::Search::Searcher#doc or Ferret::Index::IndexReader#doc.
|
41
|
+
#
|
42
|
+
# Note: that modifying a Document retrieved from the index will not modify
|
43
|
+
# the document contained within the index. You need to delete the old
|
44
|
+
# version of the document and add the new version of the document.
|
45
|
+
class Document < Hash
|
46
|
+
include BoostMixin
|
47
|
+
|
48
|
+
# Create a new Document object with a boost. The boost defaults to 1.0.
|
49
|
+
def initialize(boost = 1.0)
|
50
|
+
@boost = boost
|
51
|
+
end
|
52
|
+
|
53
|
+
# Return true if the documents are equal, ie they have the same fields
|
54
|
+
def eql?(o)
|
55
|
+
return (o.is_a? Document and (o.boost == @boost) and
|
56
|
+
(self.keys == o.keys) and (self.values == o.values))
|
57
|
+
end
|
58
|
+
alias :== :eql?
|
59
|
+
|
60
|
+
# Create a string representation of the document
|
61
|
+
def to_s
|
62
|
+
buf = ["Document {"]
|
63
|
+
self.keys.sort_by {|key| key.to_s}.each do |key|
|
64
|
+
val = self[key]
|
65
|
+
val_str = if val.instance_of? Array then %{["#{val.join('", "')}"]}
|
66
|
+
elsif val.is_a? Field then val.to_s
|
67
|
+
else %{"#{val.to_s}"}
|
68
|
+
end
|
69
|
+
buf << " :#{key} => #{val_str}"
|
70
|
+
end
|
71
|
+
buf << ["}#{@boost == 1.0 ? "" : "^" + @boost.to_s}"]
|
72
|
+
return buf.join("\n")
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
# A Field is a section of a Document. A Field is basically an array with a
|
77
|
+
# boost attribute. It also provides pretty printing of the field with the
|
78
|
+
# #to_s method.
|
79
|
+
#
|
80
|
+
# === Boost
|
81
|
+
#
|
82
|
+
# The boost attribute makes a field more important in the index. That is,
|
83
|
+
# you can increase the score of a match for queries that match terms in a
|
84
|
+
# boosted field. You may, for example, want to boost a title field so that
|
85
|
+
# matches that match in the :title field score more highly than matches that
|
86
|
+
# match in the :contents field.
|
87
|
+
#
|
88
|
+
# Note: If you'd like to use boosted fields without having to use
|
89
|
+
# the Field class you can just include the BoostMixin in the Array class.
|
90
|
+
# See BoostMixin.
|
91
|
+
class Field < Array
|
92
|
+
include BoostMixin
|
93
|
+
|
94
|
+
# Create a new Field object. You can pass data to the field as either a
|
95
|
+
# string;
|
96
|
+
#
|
97
|
+
# f = Field.new("This is the fields data")
|
98
|
+
#
|
99
|
+
# or as an array of strings;
|
100
|
+
#
|
101
|
+
# f = Field.new(["this", "is", "an", "array", "of", "field", "data"])
|
102
|
+
#
|
103
|
+
# Of course Fields can also be boosted;
|
104
|
+
#
|
105
|
+
# f = Field.new("field data", 1000.0)
|
106
|
+
def initialize(data = [], boost = 1.0)
|
107
|
+
@boost = boost
|
108
|
+
if data.is_a? Array
|
109
|
+
data.each {|v| self << v}
|
110
|
+
else
|
111
|
+
self << data.to_s
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
def eql?(o)
|
116
|
+
return (o.is_a? Field and (o.boost == @boost) and super(o))
|
117
|
+
end
|
118
|
+
alias :== :eql?
|
119
|
+
|
120
|
+
def +(o)
|
121
|
+
return Field.new(super(o), self.boost)
|
122
|
+
end
|
123
|
+
|
124
|
+
def to_s
|
125
|
+
buf = %{["#{self.join('", "')}"]}
|
126
|
+
buf << "^#@boost" if @boost != 1.0
|
127
|
+
return buf
|
128
|
+
end
|
129
|
+
end
|
130
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
require 'yaml'
|
2
|
+
|
3
|
+
module Ferret::Index
|
4
|
+
class FieldInfos
|
5
|
+
# Load FieldInfos from a YAML file. The YAML file should look something like
|
6
|
+
# this:
|
7
|
+
# default:
|
8
|
+
# store: :yes
|
9
|
+
# index: :yes
|
10
|
+
# term_vector: :no
|
11
|
+
#
|
12
|
+
# fields:
|
13
|
+
# id:
|
14
|
+
# index: :untokenized
|
15
|
+
# term_vector: :no
|
16
|
+
#
|
17
|
+
# title:
|
18
|
+
# boost: 20.0
|
19
|
+
# term_vector: :no
|
20
|
+
#
|
21
|
+
# content:
|
22
|
+
# term_vector: :with_positions_offsets
|
23
|
+
#
|
24
|
+
def self.load(yaml_str)
|
25
|
+
info = YAML.load(yaml_str)
|
26
|
+
convert_strings_to_symbols(info)
|
27
|
+
fis = FieldInfos.new(info[:default])
|
28
|
+
fields = info[:fields]
|
29
|
+
fields.keys.each {|key| fis.add_field(key, fields[key])} if fields
|
30
|
+
fis
|
31
|
+
end
|
32
|
+
|
33
|
+
private
|
34
|
+
def self.convert_strings_to_symbols(hash)
|
35
|
+
hash.keys.each do |key|
|
36
|
+
convert_strings_to_symbols(hash[key]) if hash[key].is_a?(Hash)
|
37
|
+
if key.is_a?(String)
|
38
|
+
hash[key.intern] = hash[key]
|
39
|
+
hash.delete(key)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
@@ -0,0 +1,87 @@
|
|
1
|
+
module Ferret
|
2
|
+
FIELD_TYPES = %w(integer float string byte).map{|t| t.to_sym}
|
3
|
+
|
4
|
+
# BlankSlate is a class with no instance methods except for __send__ and
|
5
|
+
# __id__. It is useful for creating proxy classes. It is currently used by
|
6
|
+
# the FieldSymbol class which is a proxy to the Symbol class
|
7
|
+
class BlankSlate
|
8
|
+
instance_methods.each { |m| undef_method m unless m =~ /^__/ }
|
9
|
+
end
|
10
|
+
|
11
|
+
# The FieldSymbolMethods module contains the methods that are added to both
|
12
|
+
# the Symbol class and the FieldSymbol class. These methods allow you to set
|
13
|
+
# the type easily set the type of a field by calling a method on a symbol.
|
14
|
+
#
|
15
|
+
# Right now this is only useful for Sorting and grouping, but some day Ferret
|
16
|
+
# may have typed fields, in which case these this methods will come in handy.
|
17
|
+
#
|
18
|
+
# The available types are specified in Ferret::FIELD_TYPES.
|
19
|
+
#
|
20
|
+
# == Examples
|
21
|
+
#
|
22
|
+
# index.search(query, :sort => :title.string.desc)
|
23
|
+
#
|
24
|
+
# index.search(query, :sort => [:price.float, :count.integer.desc])
|
25
|
+
#
|
26
|
+
# index.search(query, :group_by => :catalogue.string)
|
27
|
+
#
|
28
|
+
# == Note
|
29
|
+
#
|
30
|
+
# If you set the field type multiple times, the last type specified will be
|
31
|
+
# the type used. For example;
|
32
|
+
#
|
33
|
+
# puts :title.integer.float.byte.string.type.inspect # => :string
|
34
|
+
#
|
35
|
+
# Calling #desc twice will set desc? to false
|
36
|
+
#
|
37
|
+
# puts :title.desc? # => false
|
38
|
+
# puts :title.desc.desc? # => true
|
39
|
+
# puts :title.desc.desc.desc? # => false
|
40
|
+
module FieldSymbolMethods
|
41
|
+
FIELD_TYPES.each do |method|
|
42
|
+
define_method(method) do
|
43
|
+
fsym = FieldSymbol.new(self, respond_to?(:desc?) ? desc? : false)
|
44
|
+
fsym.type = method
|
45
|
+
fsym
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
# Set a field to be a descending field. This only makes sense in sort
|
50
|
+
# specifications.
|
51
|
+
def desc
|
52
|
+
fsym = FieldSymbol.new(self, respond_to?(:desc?) ? !desc? : true)
|
53
|
+
fsym.type = type if respond_to? :type
|
54
|
+
fsym
|
55
|
+
end
|
56
|
+
|
57
|
+
# Return whether or not this field should be a descending field
|
58
|
+
def desc?
|
59
|
+
@desc == true
|
60
|
+
end
|
61
|
+
|
62
|
+
# Return the type of this field
|
63
|
+
def type
|
64
|
+
@type || nil
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
# See FieldSymbolMethods
|
69
|
+
class FieldSymbol < BlankSlate
|
70
|
+
include FieldSymbolMethods
|
71
|
+
def initialize(symbol, desc = false)
|
72
|
+
@symbol = symbol
|
73
|
+
@desc = desc
|
74
|
+
end
|
75
|
+
|
76
|
+
def method_missing(method, *args)
|
77
|
+
@symbol.__send__(method, *args)
|
78
|
+
end
|
79
|
+
|
80
|
+
attr_writer :type, :desc
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
# See FieldSymbolMethods
|
85
|
+
class Symbol
|
86
|
+
include Ferret::FieldSymbolMethods
|
87
|
+
end
|