jk-ferret 0.11.8.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +24 -0
- data/MIT-LICENSE +20 -0
- data/README +90 -0
- data/RELEASE_CHANGES +137 -0
- data/RELEASE_NOTES +60 -0
- data/Rakefile +443 -0
- data/TODO +109 -0
- data/TUTORIAL +231 -0
- data/bin/ferret-browser +79 -0
- data/ext/BZLIB_blocksort.c +1094 -0
- data/ext/BZLIB_bzlib.c +1578 -0
- data/ext/BZLIB_compress.c +672 -0
- data/ext/BZLIB_crctable.c +104 -0
- data/ext/BZLIB_decompress.c +626 -0
- data/ext/BZLIB_huffman.c +205 -0
- data/ext/BZLIB_randtable.c +84 -0
- data/ext/STEMMER_api.c +66 -0
- data/ext/STEMMER_libstemmer.c +93 -0
- data/ext/STEMMER_stem_ISO_8859_1_danish.c +337 -0
- data/ext/STEMMER_stem_ISO_8859_1_dutch.c +624 -0
- data/ext/STEMMER_stem_ISO_8859_1_english.c +1117 -0
- data/ext/STEMMER_stem_ISO_8859_1_finnish.c +762 -0
- data/ext/STEMMER_stem_ISO_8859_1_french.c +1246 -0
- data/ext/STEMMER_stem_ISO_8859_1_german.c +503 -0
- data/ext/STEMMER_stem_ISO_8859_1_hungarian.c +1230 -0
- data/ext/STEMMER_stem_ISO_8859_1_italian.c +1065 -0
- data/ext/STEMMER_stem_ISO_8859_1_norwegian.c +297 -0
- data/ext/STEMMER_stem_ISO_8859_1_porter.c +749 -0
- data/ext/STEMMER_stem_ISO_8859_1_portuguese.c +1017 -0
- data/ext/STEMMER_stem_ISO_8859_1_spanish.c +1093 -0
- data/ext/STEMMER_stem_ISO_8859_1_swedish.c +307 -0
- data/ext/STEMMER_stem_ISO_8859_2_romanian.c +998 -0
- data/ext/STEMMER_stem_KOI8_R_russian.c +700 -0
- data/ext/STEMMER_stem_UTF_8_danish.c +339 -0
- data/ext/STEMMER_stem_UTF_8_dutch.c +634 -0
- data/ext/STEMMER_stem_UTF_8_english.c +1125 -0
- data/ext/STEMMER_stem_UTF_8_finnish.c +768 -0
- data/ext/STEMMER_stem_UTF_8_french.c +1256 -0
- data/ext/STEMMER_stem_UTF_8_german.c +509 -0
- data/ext/STEMMER_stem_UTF_8_hungarian.c +1234 -0
- data/ext/STEMMER_stem_UTF_8_italian.c +1073 -0
- data/ext/STEMMER_stem_UTF_8_norwegian.c +299 -0
- data/ext/STEMMER_stem_UTF_8_porter.c +755 -0
- data/ext/STEMMER_stem_UTF_8_portuguese.c +1023 -0
- data/ext/STEMMER_stem_UTF_8_romanian.c +1004 -0
- data/ext/STEMMER_stem_UTF_8_russian.c +694 -0
- data/ext/STEMMER_stem_UTF_8_spanish.c +1097 -0
- data/ext/STEMMER_stem_UTF_8_swedish.c +309 -0
- data/ext/STEMMER_stem_UTF_8_turkish.c +2205 -0
- data/ext/STEMMER_utilities.c +478 -0
- data/ext/analysis.c +1710 -0
- data/ext/analysis.h +266 -0
- data/ext/api.h +26 -0
- data/ext/array.c +125 -0
- data/ext/array.h +62 -0
- data/ext/bitvector.c +96 -0
- data/ext/bitvector.h +594 -0
- data/ext/bzlib.h +282 -0
- data/ext/bzlib_private.h +503 -0
- data/ext/compound_io.c +384 -0
- data/ext/config.h +52 -0
- data/ext/document.c +159 -0
- data/ext/document.h +63 -0
- data/ext/except.c +102 -0
- data/ext/except.h +176 -0
- data/ext/extconf.rb +15 -0
- data/ext/ferret.c +416 -0
- data/ext/ferret.h +94 -0
- data/ext/field_index.c +262 -0
- data/ext/field_index.h +52 -0
- data/ext/filter.c +157 -0
- data/ext/fs_store.c +493 -0
- data/ext/global.c +458 -0
- data/ext/global.h +302 -0
- data/ext/hash.c +524 -0
- data/ext/hash.h +515 -0
- data/ext/hashset.c +192 -0
- data/ext/hashset.h +215 -0
- data/ext/header.h +58 -0
- data/ext/helper.c +63 -0
- data/ext/helper.h +21 -0
- data/ext/index.c +6804 -0
- data/ext/index.h +935 -0
- data/ext/internal.h +1019 -0
- data/ext/lang.c +10 -0
- data/ext/lang.h +68 -0
- data/ext/libstemmer.h +79 -0
- data/ext/mempool.c +88 -0
- data/ext/mempool.h +43 -0
- data/ext/modules.h +190 -0
- data/ext/multimapper.c +351 -0
- data/ext/multimapper.h +60 -0
- data/ext/posh.c +1006 -0
- data/ext/posh.h +973 -0
- data/ext/priorityqueue.c +149 -0
- data/ext/priorityqueue.h +155 -0
- data/ext/q_boolean.c +1621 -0
- data/ext/q_const_score.c +162 -0
- data/ext/q_filtered_query.c +212 -0
- data/ext/q_fuzzy.c +280 -0
- data/ext/q_match_all.c +149 -0
- data/ext/q_multi_term.c +673 -0
- data/ext/q_parser.c +3103 -0
- data/ext/q_phrase.c +1206 -0
- data/ext/q_prefix.c +98 -0
- data/ext/q_range.c +682 -0
- data/ext/q_span.c +2390 -0
- data/ext/q_term.c +337 -0
- data/ext/q_wildcard.c +167 -0
- data/ext/r_analysis.c +2626 -0
- data/ext/r_index.c +3468 -0
- data/ext/r_qparser.c +635 -0
- data/ext/r_search.c +4490 -0
- data/ext/r_store.c +513 -0
- data/ext/r_utils.c +1131 -0
- data/ext/ram_store.c +476 -0
- data/ext/scanner.c +895 -0
- data/ext/scanner.h +36 -0
- data/ext/scanner_mb.c +6701 -0
- data/ext/scanner_utf8.c +4415 -0
- data/ext/search.c +1864 -0
- data/ext/search.h +953 -0
- data/ext/similarity.c +151 -0
- data/ext/similarity.h +89 -0
- data/ext/sort.c +786 -0
- data/ext/stem_ISO_8859_1_danish.h +16 -0
- data/ext/stem_ISO_8859_1_dutch.h +16 -0
- data/ext/stem_ISO_8859_1_english.h +16 -0
- data/ext/stem_ISO_8859_1_finnish.h +16 -0
- data/ext/stem_ISO_8859_1_french.h +16 -0
- data/ext/stem_ISO_8859_1_german.h +16 -0
- data/ext/stem_ISO_8859_1_hungarian.h +16 -0
- data/ext/stem_ISO_8859_1_italian.h +16 -0
- data/ext/stem_ISO_8859_1_norwegian.h +16 -0
- data/ext/stem_ISO_8859_1_porter.h +16 -0
- data/ext/stem_ISO_8859_1_portuguese.h +16 -0
- data/ext/stem_ISO_8859_1_spanish.h +16 -0
- data/ext/stem_ISO_8859_1_swedish.h +16 -0
- data/ext/stem_ISO_8859_2_romanian.h +16 -0
- data/ext/stem_KOI8_R_russian.h +16 -0
- data/ext/stem_UTF_8_danish.h +16 -0
- data/ext/stem_UTF_8_dutch.h +16 -0
- data/ext/stem_UTF_8_english.h +16 -0
- data/ext/stem_UTF_8_finnish.h +16 -0
- data/ext/stem_UTF_8_french.h +16 -0
- data/ext/stem_UTF_8_german.h +16 -0
- data/ext/stem_UTF_8_hungarian.h +16 -0
- data/ext/stem_UTF_8_italian.h +16 -0
- data/ext/stem_UTF_8_norwegian.h +16 -0
- data/ext/stem_UTF_8_porter.h +16 -0
- data/ext/stem_UTF_8_portuguese.h +16 -0
- data/ext/stem_UTF_8_romanian.h +16 -0
- data/ext/stem_UTF_8_russian.h +16 -0
- data/ext/stem_UTF_8_spanish.h +16 -0
- data/ext/stem_UTF_8_swedish.h +16 -0
- data/ext/stem_UTF_8_turkish.h +16 -0
- data/ext/stopwords.c +410 -0
- data/ext/store.c +698 -0
- data/ext/store.h +799 -0
- data/ext/symbol.c +10 -0
- data/ext/symbol.h +23 -0
- data/ext/term_vectors.c +73 -0
- data/ext/threading.h +31 -0
- data/ext/win32.h +62 -0
- data/lib/ferret.rb +30 -0
- data/lib/ferret/browser.rb +246 -0
- data/lib/ferret/browser/s/global.js +192 -0
- data/lib/ferret/browser/s/style.css +148 -0
- data/lib/ferret/browser/views/document/list.rhtml +49 -0
- data/lib/ferret/browser/views/document/show.rhtml +27 -0
- data/lib/ferret/browser/views/error/index.rhtml +7 -0
- data/lib/ferret/browser/views/help/index.rhtml +8 -0
- data/lib/ferret/browser/views/home/index.rhtml +29 -0
- data/lib/ferret/browser/views/layout.rhtml +22 -0
- data/lib/ferret/browser/views/term-vector/index.rhtml +4 -0
- data/lib/ferret/browser/views/term/index.rhtml +199 -0
- data/lib/ferret/browser/views/term/termdocs.rhtml +1 -0
- data/lib/ferret/browser/webrick.rb +14 -0
- data/lib/ferret/document.rb +130 -0
- data/lib/ferret/field_infos.rb +44 -0
- data/lib/ferret/field_symbol.rb +87 -0
- data/lib/ferret/index.rb +973 -0
- data/lib/ferret/number_tools.rb +157 -0
- data/lib/ferret/version.rb +3 -0
- data/setup.rb +1555 -0
- data/test/long_running/largefile/tc_largefile.rb +46 -0
- data/test/test_all.rb +5 -0
- data/test/test_helper.rb +29 -0
- data/test/test_installed.rb +1 -0
- data/test/threading/number_to_spoken.rb +132 -0
- data/test/threading/thread_safety_index_test.rb +88 -0
- data/test/threading/thread_safety_read_write_test.rb +73 -0
- data/test/threading/thread_safety_test.rb +133 -0
- data/test/unit/analysis/tc_analyzer.rb +550 -0
- data/test/unit/analysis/tc_token_stream.rb +653 -0
- data/test/unit/index/tc_index.rb +867 -0
- data/test/unit/index/tc_index_reader.rb +699 -0
- data/test/unit/index/tc_index_writer.rb +447 -0
- data/test/unit/index/th_doc.rb +332 -0
- data/test/unit/query_parser/tc_query_parser.rb +238 -0
- data/test/unit/search/tc_filter.rb +156 -0
- data/test/unit/search/tc_fuzzy_query.rb +147 -0
- data/test/unit/search/tc_index_searcher.rb +67 -0
- data/test/unit/search/tc_multi_searcher.rb +128 -0
- data/test/unit/search/tc_multiple_search_requests.rb +58 -0
- data/test/unit/search/tc_search_and_sort.rb +179 -0
- data/test/unit/search/tc_sort.rb +49 -0
- data/test/unit/search/tc_sort_field.rb +27 -0
- data/test/unit/search/tc_spans.rb +190 -0
- data/test/unit/search/tm_searcher.rb +436 -0
- data/test/unit/store/tc_fs_store.rb +115 -0
- data/test/unit/store/tc_ram_store.rb +35 -0
- data/test/unit/store/tm_store.rb +34 -0
- data/test/unit/store/tm_store_lock.rb +68 -0
- data/test/unit/tc_document.rb +81 -0
- data/test/unit/tc_field_symbol.rb +26 -0
- data/test/unit/ts_analysis.rb +2 -0
- data/test/unit/ts_index.rb +2 -0
- data/test/unit/ts_largefile.rb +4 -0
- data/test/unit/ts_query_parser.rb +2 -0
- data/test/unit/ts_search.rb +2 -0
- data/test/unit/ts_store.rb +2 -0
- data/test/unit/ts_utils.rb +2 -0
- data/test/unit/utils/tc_bit_vector.rb +295 -0
- data/test/unit/utils/tc_number_tools.rb +117 -0
- data/test/unit/utils/tc_priority_queue.rb +106 -0
- data/test/utils/content_generator.rb +226 -0
- metadata +319 -0
@@ -0,0 +1,22 @@
|
|
1
|
+
<?xml version="1.0" encoding="utf-8"?>
|
2
|
+
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd" >
|
3
|
+
<html xmlns="http://www.w3.org/1999/xhtml">
|
4
|
+
<head>
|
5
|
+
<title>Ferret-Browser:: <%= @path %></title>
|
6
|
+
<link rel="shortcut icon" type="image/x-icon" href="/s/i/favicon.ico" />
|
7
|
+
<link rel="stylesheet" type="text/css" href="/s/style.css" />
|
8
|
+
<script type="text/javascript" src="/s/global.js"></script>
|
9
|
+
</head>
|
10
|
+
<body id="<%= options[:controller] %>">
|
11
|
+
<ul id="top-menu">
|
12
|
+
<li class="home"><a href="/">Home</a></li>
|
13
|
+
<li class="document"><a href="/document">Documents</a></li>
|
14
|
+
<li class="term"><a href="/term">Terms</a></li>
|
15
|
+
<li class="term-vector"><a href="/term-vector">Term Vectors</a></li>
|
16
|
+
<li class="help"><a href="/help">Help</a></li>
|
17
|
+
</ul>
|
18
|
+
<div id="content">
|
19
|
+
<%= content %>
|
20
|
+
</div>
|
21
|
+
</body>
|
22
|
+
</html>
|
@@ -0,0 +1,199 @@
|
|
1
|
+
<h3>Terms</h3>
|
2
|
+
<form action="" method="get">
|
3
|
+
<label for="field">Choose a field:
|
4
|
+
<select id="field" name="field" onchange="location.href='/term/show/' + this.value;">
|
5
|
+
<option value="">--</option>
|
6
|
+
<% @reader.field_infos.each do |fi| next unless fi.indexed? %>
|
7
|
+
<option value="<%=fi.name%>" <%= 'selected="selected"' if @field == fi.name %>><%=fi.name%></option>
|
8
|
+
<% end %>
|
9
|
+
</select>
|
10
|
+
</label>
|
11
|
+
</form>
|
12
|
+
<hr/>
|
13
|
+
<% if @terms %>
|
14
|
+
<div style="float:left; padding-right:30px;">
|
15
|
+
<h4>Field: <%= @field %></h4>
|
16
|
+
<form action="" onsubmit="return findTerm(document.getElementById('choose-term').value);">
|
17
|
+
<input id="choose-term" type="text"/></form>
|
18
|
+
<hr/>
|
19
|
+
<div id="terms">
|
20
|
+
</div>
|
21
|
+
<hr/>
|
22
|
+
<form action="">
|
23
|
+
<input type="submit" value="Previous" onclick="return prevTerm();"/>
|
24
|
+
<input type="submit" value="Next" onclick="return nextTerm();"/><br/>
|
25
|
+
</form>
|
26
|
+
</div>
|
27
|
+
<div style="float:left; padding-right:30px;">
|
28
|
+
<h4>Documents
|
29
|
+
<input name="show-documents" type="checkbox" id="show-documents" onchange="displayDoc();"/></h4>
|
30
|
+
<div id="doc-display">
|
31
|
+
<form action="" onsubmit="return findDoc(document.getElementById('choose-doc').value);">
|
32
|
+
<input id="choose-doc" type="text"/></form>
|
33
|
+
<hr/>
|
34
|
+
<div id="documents">
|
35
|
+
</div>
|
36
|
+
<hr/>
|
37
|
+
<form action="">
|
38
|
+
<input type="submit" value="Previous" onclick="return prevDoc();"/>
|
39
|
+
<input type="submit" value="Next" onclick="return nextDoc();"/><br/>
|
40
|
+
</form>
|
41
|
+
</div>
|
42
|
+
</div>
|
43
|
+
<script type="text/javascript"><!--
|
44
|
+
function createSideHeaderedTable(content) {
|
45
|
+
var table = document.createElement("table");
|
46
|
+
table.setAttribute('cellpadding', "0");
|
47
|
+
table.setAttribute('cellspacing', "0");
|
48
|
+
table.className = "left-headed";
|
49
|
+
for (var i = 0; i < content.length; i++) {
|
50
|
+
var tr = document.createElement("tr");
|
51
|
+
var th = document.createElement("th");
|
52
|
+
th.appendChild(document.createTextNode(content[i][0]));
|
53
|
+
tr.appendChild(th);
|
54
|
+
var td = document.createElement("td");
|
55
|
+
var c = content[i][1];
|
56
|
+
td.appendChild(c instanceof Object ? c : document.createTextNode(c));
|
57
|
+
tr.appendChild(td);
|
58
|
+
table.appendChild(tr);
|
59
|
+
}
|
60
|
+
return table;
|
61
|
+
}
|
62
|
+
|
63
|
+
var terms = <%= @terms %>;
|
64
|
+
var numTerms = terms.length;
|
65
|
+
var termI = 0;
|
66
|
+
var termDiv = document.getElementById("terms");
|
67
|
+
var termChooserTxt = document.getElementById('choose-term');
|
68
|
+
|
69
|
+
var docs = null;
|
70
|
+
var numDocs = 0;
|
71
|
+
var docI = 0;
|
72
|
+
var docDiv = document.getElementById("documents");
|
73
|
+
var showDocs = document.getElementById("show-documents");
|
74
|
+
var docChooserTxt = document.getElementById('choose-doc');
|
75
|
+
var docDisplayDiv = document.getElementById('doc-display');
|
76
|
+
|
77
|
+
function getDocs() {
|
78
|
+
var req = new XMLHttpRequest();
|
79
|
+
req.open('GET', '/term/termdocs/<%=@field%>/' + terms[termI][0], true);
|
80
|
+
req.onreadystatechange = function() {
|
81
|
+
if (req.readyState == 4) {
|
82
|
+
docs = eval(req.responseText);
|
83
|
+
numDocs = docs.length;
|
84
|
+
docI = 0;
|
85
|
+
displayDoc();
|
86
|
+
}
|
87
|
+
};
|
88
|
+
req.send(/*no params*/null);
|
89
|
+
return false;
|
90
|
+
}
|
91
|
+
function displayDoc() {
|
92
|
+
if (showDocs.checked) {
|
93
|
+
docDisplayDiv.style.display = 'block';
|
94
|
+
if (docs == null) {
|
95
|
+
getDocs();
|
96
|
+
} else {
|
97
|
+
docChooserTxt.value = docs[docI][0];
|
98
|
+
var docLink = document.createElement('a');
|
99
|
+
docLink.setAttribute('href', "/document/show/" + docs[docI][0]);
|
100
|
+
docLink.appendChild(document.createTextNode(docs[docI][0]));
|
101
|
+
var table = createSideHeaderedTable([
|
102
|
+
['index', '' + (docI + 1) + ' of ' + numDocs],
|
103
|
+
['document', docLink],
|
104
|
+
['number of occurrences', docs[docI][1]],
|
105
|
+
['positions', docs[docI][2].join(',')]
|
106
|
+
]);
|
107
|
+
if (docDiv.firstChild) docDiv.replaceChild(table, docDiv.firstChild);
|
108
|
+
else docDiv.appendChild(table);
|
109
|
+
}
|
110
|
+
} else {
|
111
|
+
docDiv.innerHTML = '';
|
112
|
+
docDisplayDiv.style.display = 'none';
|
113
|
+
}
|
114
|
+
return false;
|
115
|
+
}
|
116
|
+
function nextDoc() {
|
117
|
+
if (docI < numDocs - 1) {
|
118
|
+
docI++;
|
119
|
+
displayDoc();
|
120
|
+
} else alert('No more docs. Already at the end.');
|
121
|
+
return false;
|
122
|
+
}
|
123
|
+
function prevDoc() {
|
124
|
+
if (docI > 0) {
|
125
|
+
docI -= 1;
|
126
|
+
displayDoc();
|
127
|
+
} else alert('No more docs. Already at the start.');
|
128
|
+
return false;
|
129
|
+
}
|
130
|
+
function findDoc(doc) {
|
131
|
+
if (docs && docs.length > 0) {
|
132
|
+
docI = bsearch(docs, doc, function(a, b) {return a[0] < b});
|
133
|
+
if (docI >= docs.length) docI -= 1;
|
134
|
+
displayDoc();
|
135
|
+
}
|
136
|
+
return false;
|
137
|
+
}
|
138
|
+
function displayTerm() {
|
139
|
+
docs = null;
|
140
|
+
termChooserTxt.value = terms[termI][0];
|
141
|
+
var table = createSideHeaderedTable([
|
142
|
+
['index', '' + (termI + 1) + ' of ' + numTerms],
|
143
|
+
['term', terms[termI][0]],
|
144
|
+
['number of documents', terms[termI][1]],
|
145
|
+
]);
|
146
|
+
termDiv.replaceChild(table, termDiv.firstChild);
|
147
|
+
displayDoc();
|
148
|
+
}
|
149
|
+
function nextTerm() {
|
150
|
+
if (termI < numTerms - 1) {
|
151
|
+
termI++;
|
152
|
+
displayTerm();
|
153
|
+
} else alert('No more terms. Already at the end.');
|
154
|
+
return false;
|
155
|
+
}
|
156
|
+
function prevTerm() {
|
157
|
+
if (termI > 0) {
|
158
|
+
termI -= 1;
|
159
|
+
displayTerm();
|
160
|
+
} else alert('No more terms. Already at the start.');
|
161
|
+
return false;
|
162
|
+
}
|
163
|
+
function findTerm(term) {
|
164
|
+
if (terms && terms.length > 0) {
|
165
|
+
termI = bsearch(terms, term, function(a, b) {return a[0] < b});
|
166
|
+
var match = (terms[termI]||[])[0];
|
167
|
+
if (term != match) {
|
168
|
+
alert('Term <%=@field%>:' + term + ' not found in index');
|
169
|
+
} else {
|
170
|
+
displayTerm();
|
171
|
+
}
|
172
|
+
}
|
173
|
+
return false;
|
174
|
+
}
|
175
|
+
|
176
|
+
function TermSuggestionProvider() {
|
177
|
+
}
|
178
|
+
TermSuggestionProvider.prototype.requestSuggestions = function(oAutoSuggestControl, bTypeAhead) {
|
179
|
+
var aSuggestions = new Array();
|
180
|
+
var sTextboxValue = oAutoSuggestControl.textbox.value;
|
181
|
+
|
182
|
+
if (sTextboxValue.length > 0) {
|
183
|
+
start = bsearch(terms, sTextboxValue, function(a, b) {return a[0] < b});
|
184
|
+
for (var i = start; i < terms.length
|
185
|
+
&& terms[i][0].indexOf(sTextboxValue) == 0
|
186
|
+
&& aSuggestions.length < 10; i++) {
|
187
|
+
aSuggestions.push(terms[i][0]);
|
188
|
+
}
|
189
|
+
oAutoSuggestControl.autosuggest(aSuggestions, bTypeAhead);
|
190
|
+
}
|
191
|
+
};
|
192
|
+
window.onload = function() {
|
193
|
+
var oTermTextbox = new AutoSuggestControl(termChooserTxt, new TermSuggestionProvider());
|
194
|
+
}
|
195
|
+
displayTerm();
|
196
|
+
//-->
|
197
|
+
</script>
|
198
|
+
<% end %>
|
199
|
+
<hr/>
|
@@ -0,0 +1 @@
|
|
1
|
+
<%= @reader.term_positions_for(@field, @term).to_json(:fast) %>
|
@@ -0,0 +1,14 @@
|
|
1
|
+
module WEBrick
|
2
|
+
class FerretBrowserHandler < WEBrick::HTTPServlet::AbstractServlet
|
3
|
+
# Creates a FerretBrowserHandler, which answers for the application
|
4
|
+
# within +klass+.
|
5
|
+
def initialize(server, reader, path)
|
6
|
+
super(server)
|
7
|
+
@delegator = Ferret::Browser::Delegator.new(reader, path)
|
8
|
+
end
|
9
|
+
# Handler for WEBrick requests (also aliased as do_POST).
|
10
|
+
def do_GET(req, res)
|
11
|
+
res.status, res.content_type, res.body = @delegator.run(req.meta_vars)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
@@ -0,0 +1,130 @@
|
|
1
|
+
module Ferret
|
2
|
+
# Instead of using documents to add data to an index you can use Hashes and
|
3
|
+
# Arrays. The only real benefits of using a Document over a Hash are pretty
|
4
|
+
# printing and the boost attribute. You can add the boost attribute to
|
5
|
+
# Hashes and arrays using the BoostMixin. For example;
|
6
|
+
#
|
7
|
+
# class Hash
|
8
|
+
# include BoostMixin
|
9
|
+
# end
|
10
|
+
#
|
11
|
+
# class Array
|
12
|
+
# include BoostMixin
|
13
|
+
# end
|
14
|
+
#
|
15
|
+
# class String
|
16
|
+
# include BoostMixin
|
17
|
+
# end
|
18
|
+
module BoostMixin
|
19
|
+
attr_accessor :boost
|
20
|
+
end
|
21
|
+
|
22
|
+
# Documents are the unit of indexing and search.
|
23
|
+
#
|
24
|
+
# A Document is a set of fields. Each field has a name and an array of
|
25
|
+
# textual values. If you are coming from a Lucene background you should note
|
26
|
+
# that Fields don't have any properties except for the boost property. You
|
27
|
+
# should use the Ferret::Index::FieldInfos class to set field properties
|
28
|
+
# across the whole index instead.
|
29
|
+
#
|
30
|
+
# === Boost
|
31
|
+
#
|
32
|
+
# The boost attribute makes a Document more important in the index. That is,
|
33
|
+
# you can increase the score of a match for queries that match a particular
|
34
|
+
# document, making it more likely to appear at the top of search results.
|
35
|
+
# You may, for example, want to boost products that have a higher user
|
36
|
+
# rating so that they are more likely to appear in search results.
|
37
|
+
#
|
38
|
+
# Note: that fields which are _not_ stored (see Ferret::Index::FieldInfos)
|
39
|
+
# are _not_ available in documents retrieved from the index, e.g.
|
40
|
+
# Ferret::Search::Searcher#doc or Ferret::Index::IndexReader#doc.
|
41
|
+
#
|
42
|
+
# Note: that modifying a Document retrieved from the index will not modify
|
43
|
+
# the document contained within the index. You need to delete the old
|
44
|
+
# version of the document and add the new version of the document.
|
45
|
+
class Document < Hash
|
46
|
+
include BoostMixin
|
47
|
+
|
48
|
+
# Create a new Document object with a boost. The boost defaults to 1.0.
|
49
|
+
def initialize(boost = 1.0)
|
50
|
+
@boost = boost
|
51
|
+
end
|
52
|
+
|
53
|
+
# Return true if the documents are equal, ie they have the same fields
|
54
|
+
def eql?(o)
|
55
|
+
return (o.is_a? Document and (o.boost == @boost) and
|
56
|
+
(self.keys == o.keys) and (self.values == o.values))
|
57
|
+
end
|
58
|
+
alias :== :eql?
|
59
|
+
|
60
|
+
# Create a string representation of the document
|
61
|
+
def to_s
|
62
|
+
buf = ["Document {"]
|
63
|
+
self.keys.sort_by {|key| key.to_s}.each do |key|
|
64
|
+
val = self[key]
|
65
|
+
val_str = if val.instance_of? Array then %{["#{val.join('", "')}"]}
|
66
|
+
elsif val.is_a? Field then val.to_s
|
67
|
+
else %{"#{val.to_s}"}
|
68
|
+
end
|
69
|
+
buf << " :#{key} => #{val_str}"
|
70
|
+
end
|
71
|
+
buf << ["}#{@boost == 1.0 ? "" : "^" + @boost.to_s}"]
|
72
|
+
return buf.join("\n")
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
# A Field is a section of a Document. A Field is basically an array with a
|
77
|
+
# boost attribute. It also provides pretty printing of the field with the
|
78
|
+
# #to_s method.
|
79
|
+
#
|
80
|
+
# === Boost
|
81
|
+
#
|
82
|
+
# The boost attribute makes a field more important in the index. That is,
|
83
|
+
# you can increase the score of a match for queries that match terms in a
|
84
|
+
# boosted field. You may, for example, want to boost a title field so that
|
85
|
+
# matches that match in the :title field score more highly than matches that
|
86
|
+
# match in the :contents field.
|
87
|
+
#
|
88
|
+
# Note: If you'd like to use boosted fields without having to use
|
89
|
+
# the Field class you can just include the BoostMixin in the Array class.
|
90
|
+
# See BoostMixin.
|
91
|
+
class Field < Array
|
92
|
+
include BoostMixin
|
93
|
+
|
94
|
+
# Create a new Field object. You can pass data to the field as either a
|
95
|
+
# string;
|
96
|
+
#
|
97
|
+
# f = Field.new("This is the fields data")
|
98
|
+
#
|
99
|
+
# or as an array of strings;
|
100
|
+
#
|
101
|
+
# f = Field.new(["this", "is", "an", "array", "of", "field", "data"])
|
102
|
+
#
|
103
|
+
# Of course Fields can also be boosted;
|
104
|
+
#
|
105
|
+
# f = Field.new("field data", 1000.0)
|
106
|
+
def initialize(data = [], boost = 1.0)
|
107
|
+
@boost = boost
|
108
|
+
if data.is_a? Array
|
109
|
+
data.each {|v| self << v}
|
110
|
+
else
|
111
|
+
self << data.to_s
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
def eql?(o)
|
116
|
+
return (o.is_a? Field and (o.boost == @boost) and super(o))
|
117
|
+
end
|
118
|
+
alias :== :eql?
|
119
|
+
|
120
|
+
def +(o)
|
121
|
+
return Field.new(super(o), self.boost)
|
122
|
+
end
|
123
|
+
|
124
|
+
def to_s
|
125
|
+
buf = %{["#{self.join('", "')}"]}
|
126
|
+
buf << "^#@boost" if @boost != 1.0
|
127
|
+
return buf
|
128
|
+
end
|
129
|
+
end
|
130
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
require 'yaml'
|
2
|
+
|
3
|
+
module Ferret::Index
|
4
|
+
class FieldInfos
|
5
|
+
# Load FieldInfos from a YAML file. The YAML file should look something like
|
6
|
+
# this:
|
7
|
+
# default:
|
8
|
+
# store: :yes
|
9
|
+
# index: :yes
|
10
|
+
# term_vector: :no
|
11
|
+
#
|
12
|
+
# fields:
|
13
|
+
# id:
|
14
|
+
# index: :untokenized
|
15
|
+
# term_vector: :no
|
16
|
+
#
|
17
|
+
# title:
|
18
|
+
# boost: 20.0
|
19
|
+
# term_vector: :no
|
20
|
+
#
|
21
|
+
# content:
|
22
|
+
# term_vector: :with_positions_offsets
|
23
|
+
#
|
24
|
+
def self.load(yaml_str)
|
25
|
+
info = YAML.load(yaml_str)
|
26
|
+
convert_strings_to_symbols(info)
|
27
|
+
fis = FieldInfos.new(info[:default])
|
28
|
+
fields = info[:fields]
|
29
|
+
fields.keys.each {|key| fis.add_field(key, fields[key])} if fields
|
30
|
+
fis
|
31
|
+
end
|
32
|
+
|
33
|
+
private
|
34
|
+
def self.convert_strings_to_symbols(hash)
|
35
|
+
hash.keys.each do |key|
|
36
|
+
convert_strings_to_symbols(hash[key]) if hash[key].is_a?(Hash)
|
37
|
+
if key.is_a?(String)
|
38
|
+
hash[key.intern] = hash[key]
|
39
|
+
hash.delete(key)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
@@ -0,0 +1,87 @@
|
|
1
|
+
module Ferret
|
2
|
+
FIELD_TYPES = %w(integer float string byte).map{|t| t.to_sym}
|
3
|
+
|
4
|
+
# BlankSlate is a class with no instance methods except for __send__ and
|
5
|
+
# __id__. It is useful for creating proxy classes. It is currently used by
|
6
|
+
# the FieldSymbol class which is a proxy to the Symbol class
|
7
|
+
class BlankSlate
|
8
|
+
instance_methods.each { |m| undef_method m unless m =~ /^__/ }
|
9
|
+
end
|
10
|
+
|
11
|
+
# The FieldSymbolMethods module contains the methods that are added to both
|
12
|
+
# the Symbol class and the FieldSymbol class. These methods allow you to set
|
13
|
+
# the type easily set the type of a field by calling a method on a symbol.
|
14
|
+
#
|
15
|
+
# Right now this is only useful for Sorting and grouping, but some day Ferret
|
16
|
+
# may have typed fields, in which case these this methods will come in handy.
|
17
|
+
#
|
18
|
+
# The available types are specified in Ferret::FIELD_TYPES.
|
19
|
+
#
|
20
|
+
# == Examples
|
21
|
+
#
|
22
|
+
# index.search(query, :sort => :title.string.desc)
|
23
|
+
#
|
24
|
+
# index.search(query, :sort => [:price.float, :count.integer.desc])
|
25
|
+
#
|
26
|
+
# index.search(query, :group_by => :catalogue.string)
|
27
|
+
#
|
28
|
+
# == Note
|
29
|
+
#
|
30
|
+
# If you set the field type multiple times, the last type specified will be
|
31
|
+
# the type used. For example;
|
32
|
+
#
|
33
|
+
# puts :title.integer.float.byte.string.type.inspect # => :string
|
34
|
+
#
|
35
|
+
# Calling #desc twice will set desc? to false
|
36
|
+
#
|
37
|
+
# puts :title.desc? # => false
|
38
|
+
# puts :title.desc.desc? # => true
|
39
|
+
# puts :title.desc.desc.desc? # => false
|
40
|
+
module FieldSymbolMethods
|
41
|
+
FIELD_TYPES.each do |method|
|
42
|
+
define_method(method) do
|
43
|
+
fsym = FieldSymbol.new(self, respond_to?(:desc?) ? desc? : false)
|
44
|
+
fsym.type = method
|
45
|
+
fsym
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
# Set a field to be a descending field. This only makes sense in sort
|
50
|
+
# specifications.
|
51
|
+
def desc
|
52
|
+
fsym = FieldSymbol.new(self, respond_to?(:desc?) ? !desc? : true)
|
53
|
+
fsym.type = type if respond_to? :type
|
54
|
+
fsym
|
55
|
+
end
|
56
|
+
|
57
|
+
# Return whether or not this field should be a descending field
|
58
|
+
def desc?
|
59
|
+
@desc == true
|
60
|
+
end
|
61
|
+
|
62
|
+
# Return the type of this field
|
63
|
+
def type
|
64
|
+
@type || nil
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
# See FieldSymbolMethods
|
69
|
+
class FieldSymbol < BlankSlate
|
70
|
+
include FieldSymbolMethods
|
71
|
+
def initialize(symbol, desc = false)
|
72
|
+
@symbol = symbol
|
73
|
+
@desc = desc
|
74
|
+
end
|
75
|
+
|
76
|
+
def method_missing(method, *args)
|
77
|
+
@symbol.__send__(method, *args)
|
78
|
+
end
|
79
|
+
|
80
|
+
attr_writer :type, :desc
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
# See FieldSymbolMethods
|
85
|
+
class Symbol
|
86
|
+
include Ferret::FieldSymbolMethods
|
87
|
+
end
|