mhs-xapian 1.0.18a
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +3 -0
- data/AUTHORS +1 -0
- data/COPYING +340 -0
- data/ChangeLog +5876 -0
- data/HACKING +101 -0
- data/INSTALL +293 -0
- data/Makefile +722 -0
- data/Makefile.am +26 -0
- data/Makefile.in +722 -0
- data/NEWS +2110 -0
- data/README +59 -0
- data/Rakefile +51 -0
- data/TODO +47 -0
- data/aclocal.m4 +7675 -0
- data/config.guess +1501 -0
- data/config.h +56 -0
- data/config.h.in +55 -0
- data/config.status +1298 -0
- data/config.sub +1705 -0
- data/configure +18536 -0
- data/configure.ac +944 -0
- data/csharp/.deps/xapian_wrap.Plo +1 -0
- data/csharp/AssemblyInfo.cs +40 -0
- data/csharp/AssemblyInfo.cs.in +40 -0
- data/csharp/Auto.cs +46 -0
- data/csharp/BM25Weight.cs +107 -0
- data/csharp/BoolWeight.cs +103 -0
- data/csharp/Database.cs +275 -0
- data/csharp/DateValueRangeProcessor.cs +61 -0
- data/csharp/Document.cs +177 -0
- data/csharp/ESet.cs +94 -0
- data/csharp/ESetIterator.cs +117 -0
- data/csharp/Enquire.cs +274 -0
- data/csharp/ExpandDecider.cs +76 -0
- data/csharp/Flint.cs +58 -0
- data/csharp/InMemory.cs +46 -0
- data/csharp/MSet.cs +193 -0
- data/csharp/MSetIterator.cs +141 -0
- data/csharp/Makefile +868 -0
- data/csharp/Makefile.am +106 -0
- data/csharp/Makefile.in +868 -0
- data/csharp/MatchDecider.cs +76 -0
- data/csharp/MultiValueSorter.cs +63 -0
- data/csharp/NumberValueRangeProcessor.cs +61 -0
- data/csharp/PositionIterator.cs +101 -0
- data/csharp/PostingIterator.cs +125 -0
- data/csharp/Quartz.cs +58 -0
- data/csharp/Query.cs +150 -0
- data/csharp/QueryParser.cs +174 -0
- data/csharp/RSet.cs +102 -0
- data/csharp/Remote.cs +100 -0
- data/csharp/SWIGTYPE_p_std__string.cs +30 -0
- data/csharp/SWIGTYPE_p_std__vectorTXapian__Query_t.cs +30 -0
- data/csharp/SWIGTYPE_p_std__vectorTstd__string_t.cs +30 -0
- data/csharp/SimpleStopper.cs +64 -0
- data/csharp/SmokeTest.cs +178 -0
- data/csharp/Sorter.cs +76 -0
- data/csharp/Stem.cs +66 -0
- data/csharp/Stopper.cs +91 -0
- data/csharp/StringValueRangeProcessor.cs +53 -0
- data/csharp/TermGenerator.cs +152 -0
- data/csharp/TermIterator.cs +125 -0
- data/csharp/TradWeight.cs +107 -0
- data/csharp/ValueIterator.cs +102 -0
- data/csharp/ValueRangeProcessor.cs +76 -0
- data/csharp/Version.cs +60 -0
- data/csharp/Weight.cs +93 -0
- data/csharp/WritableDatabase.cs +153 -0
- data/csharp/Xapian.cs +65 -0
- data/csharp/XapianPINVOKE.cs +1527 -0
- data/csharp/docs/Makefile +450 -0
- data/csharp/docs/Makefile.am +16 -0
- data/csharp/docs/Makefile.in +450 -0
- data/csharp/docs/examples/SimpleExpand.cs +109 -0
- data/csharp/docs/examples/SimpleIndex.cs +71 -0
- data/csharp/docs/examples/SimpleSearch.cs +78 -0
- data/csharp/docs/index.html +211 -0
- data/csharp/util.i +233 -0
- data/csharp/xapian_wrap.cc +10338 -0
- data/csharp/xapian_wrap.h +93 -0
- data/depcomp +632 -0
- data/extconf.rb +20 -0
- data/generic/except.i +80 -0
- data/generic/generic.mk +48 -0
- data/install-sh +520 -0
- data/java-swig/.deps/xapian_wrap.Plo +1 -0
- data/java-swig/Auto.java +35 -0
- data/java-swig/BM25Weight.java +81 -0
- data/java-swig/BoolWeight.java +77 -0
- data/java-swig/Database.java +195 -0
- data/java-swig/DateValueRangeProcessor.java +51 -0
- data/java-swig/Document.java +135 -0
- data/java-swig/ESet.java +71 -0
- data/java-swig/ESetIterator.java +71 -0
- data/java-swig/Enquire.java +246 -0
- data/java-swig/ExpandDecider.java +59 -0
- data/java-swig/Flint.java +43 -0
- data/java-swig/InMemory.java +35 -0
- data/java-swig/MSet.java +143 -0
- data/java-swig/MSetIterator.java +87 -0
- data/java-swig/Makefile +781 -0
- data/java-swig/Makefile.am +132 -0
- data/java-swig/Makefile.in +781 -0
- data/java-swig/MatchDecider.java +59 -0
- data/java-swig/MultiValueSorter.java +51 -0
- data/java-swig/NumberValueRangeProcessor.java +51 -0
- data/java-swig/PositionIterator.java +63 -0
- data/java-swig/PostingIterator.java +83 -0
- data/java-swig/Quartz.java +43 -0
- data/java-swig/Query.java +189 -0
- data/java-swig/QueryParser.java +214 -0
- data/java-swig/RSet.java +79 -0
- data/java-swig/Remote.java +71 -0
- data/java-swig/SWIGTYPE_p_std__string.java +25 -0
- data/java-swig/SimpleStopper.java +51 -0
- data/java-swig/SmokeTest.java +161 -0
- data/java-swig/Sorter.java +59 -0
- data/java-swig/Stem.java +51 -0
- data/java-swig/Stopper.java +63 -0
- data/java-swig/StringValueRangeProcessor.java +43 -0
- data/java-swig/TermGenerator.java +158 -0
- data/java-swig/TermIterator.java +83 -0
- data/java-swig/TradWeight.java +81 -0
- data/java-swig/ValueIterator.java +67 -0
- data/java-swig/ValueRangeProcessor.java +59 -0
- data/java-swig/Version.java +47 -0
- data/java-swig/Weight.java +68 -0
- data/java-swig/WritableDatabase.java +123 -0
- data/java-swig/Xapian.java +39 -0
- data/java-swig/XapianConstants.java +15 -0
- data/java-swig/XapianJNI.java +508 -0
- data/java-swig/run-java-test +6 -0
- data/java-swig/xapian_wrap.cc +12594 -0
- data/java-swig/xapian_wrap.h +91 -0
- data/java/Makefile +660 -0
- data/java/Makefile.am +35 -0
- data/java/Makefile.in +660 -0
- data/java/README +76 -0
- data/java/SmokeTest.java +148 -0
- data/java/native/.deps/Database.Plo +1 -0
- data/java/native/.deps/Document.Plo +1 -0
- data/java/native/.deps/ESet.Plo +1 -0
- data/java/native/.deps/ESetIterator.Plo +1 -0
- data/java/native/.deps/Enquire.Plo +1 -0
- data/java/native/.deps/MSet.Plo +1 -0
- data/java/native/.deps/MSetIterator.Plo +1 -0
- data/java/native/.deps/PositionIterator.Plo +1 -0
- data/java/native/.deps/Query.Plo +1 -0
- data/java/native/.deps/RSet.Plo +1 -0
- data/java/native/.deps/Stem.Plo +1 -0
- data/java/native/.deps/TermIterator.Plo +1 -0
- data/java/native/.deps/WritableDatabase.Plo +1 -0
- data/java/native/.deps/org_xapian_XapianJNI.Plo +1 -0
- data/java/native/.deps/utils.Plo +1 -0
- data/java/native/Database.cc +222 -0
- data/java/native/Document.cc +173 -0
- data/java/native/ESet.cc +79 -0
- data/java/native/ESetIterator.cc +82 -0
- data/java/native/Enquire.cc +271 -0
- data/java/native/MSet.cc +169 -0
- data/java/native/MSetIterator.cc +107 -0
- data/java/native/Makefile +594 -0
- data/java/native/Makefile.am +51 -0
- data/java/native/Makefile.in +594 -0
- data/java/native/PositionIterator.cc +64 -0
- data/java/native/Query.cc +180 -0
- data/java/native/RSet.cc +98 -0
- data/java/native/Stem.cc +75 -0
- data/java/native/TermIterator.cc +107 -0
- data/java/native/WritableDatabase.cc +118 -0
- data/java/native/XapianObjectHolder.h +115 -0
- data/java/native/org_xapian_XapianJNI.cc +78 -0
- data/java/native/org_xapian_XapianJNI.h +1369 -0
- data/java/native/utils.cc +51 -0
- data/java/native/xapian_jni.h +116 -0
- data/java/org/xapian/Database.java +148 -0
- data/java/org/xapian/Document.java +135 -0
- data/java/org/xapian/ESet.java +66 -0
- data/java/org/xapian/ESetIterator.java +97 -0
- data/java/org/xapian/Enquire.java +136 -0
- data/java/org/xapian/ExpandDecider.java +30 -0
- data/java/org/xapian/MSet.java +104 -0
- data/java/org/xapian/MSetIterator.java +132 -0
- data/java/org/xapian/Makefile +580 -0
- data/java/org/xapian/Makefile.am +38 -0
- data/java/org/xapian/Makefile.in +580 -0
- data/java/org/xapian/MatchDecider.java +30 -0
- data/java/org/xapian/PositionIterator.java +89 -0
- data/java/org/xapian/Query.java +190 -0
- data/java/org/xapian/RSet.java +89 -0
- data/java/org/xapian/Stem.java +80 -0
- data/java/org/xapian/TermIterator.java +142 -0
- data/java/org/xapian/WritableDatabase.java +92 -0
- data/java/org/xapian/Xapian.java +114 -0
- data/java/org/xapian/XapianJNI.java +444 -0
- data/java/org/xapian/errors/AssertionError.java +40 -0
- data/java/org/xapian/errors/DatabaseCorruptError.java +40 -0
- data/java/org/xapian/errors/DatabaseError.java +40 -0
- data/java/org/xapian/errors/DatabaseLockError.java +40 -0
- data/java/org/xapian/errors/DatabaseModifiedError.java +40 -0
- data/java/org/xapian/errors/DatabaseOpeningError.java +40 -0
- data/java/org/xapian/errors/DocNotFoundError.java +40 -0
- data/java/org/xapian/errors/FeatureUnavailableError.java +40 -0
- data/java/org/xapian/errors/InternalError.java +40 -0
- data/java/org/xapian/errors/InvalidArgumentError.java +40 -0
- data/java/org/xapian/errors/InvalidOperationError.java +40 -0
- data/java/org/xapian/errors/LogicError.java +40 -0
- data/java/org/xapian/errors/Makefile +416 -0
- data/java/org/xapian/errors/Makefile.am +32 -0
- data/java/org/xapian/errors/Makefile.in +416 -0
- data/java/org/xapian/errors/NetworkError.java +40 -0
- data/java/org/xapian/errors/NetworkTimeoutError.java +40 -0
- data/java/org/xapian/errors/RangeError.java +40 -0
- data/java/org/xapian/errors/RuntimeError.java +40 -0
- data/java/org/xapian/errors/UnimplementedError.java +40 -0
- data/java/org/xapian/errors/XapianError.java +40 -0
- data/java/org/xapian/errors/XapianRuntimeError.java +49 -0
- data/java/org/xapian/examples/Makefile +391 -0
- data/java/org/xapian/examples/Makefile.am +8 -0
- data/java/org/xapian/examples/Makefile.in +391 -0
- data/java/org/xapian/examples/SimpleIndex.java +68 -0
- data/java/org/xapian/examples/SimpleSearch.java +71 -0
- data/java/run-java-test +6 -0
- data/libtool +7618 -0
- data/ltmain.sh +6956 -0
- data/mhs-xapian.gemspec +368 -0
- data/missing +378 -0
- data/php/.deps/xapian_wrap.Plo +1 -0
- data/php/Makefile +871 -0
- data/php/Makefile.am +82 -0
- data/php/Makefile.in +871 -0
- data/php/docs/Makefile +453 -0
- data/php/docs/Makefile.am +19 -0
- data/php/docs/Makefile.in +453 -0
- data/php/docs/examples/simpleexpand.php4 +108 -0
- data/php/docs/examples/simpleexpand.php5 +104 -0
- data/php/docs/examples/simpleindex.php4 +76 -0
- data/php/docs/examples/simpleindex.php5 +73 -0
- data/php/docs/examples/simplesearch.php4 +75 -0
- data/php/docs/examples/simplesearch.php5 +72 -0
- data/php/docs/index.html +313 -0
- data/php/except.i +98 -0
- data/php/php4/php_xapian.h +323 -0
- data/php/php4/xapian.php +32 -0
- data/php/php4/xapian_wrap.cc +27656 -0
- data/php/php5/php_xapian.h +319 -0
- data/php/php5/xapian.php +1566 -0
- data/php/php5/xapian_wrap.cc +24330 -0
- data/php/smoketest.php +246 -0
- data/php/smoketest4.php +84 -0
- data/php/smoketest5.php +79 -0
- data/php/util.i +187 -0
- data/python/.deps/xapian_wrap.Plo +1 -0
- data/python/Makefile +891 -0
- data/python/Makefile.am +105 -0
- data/python/Makefile.in +891 -0
- data/python/doccomments.i +5134 -0
- data/python/docs/Makefile +448 -0
- data/python/docs/Makefile.am +14 -0
- data/python/docs/Makefile.in +448 -0
- data/python/docs/examples/simpleexpand.py +98 -0
- data/python/docs/examples/simpleindex.py +65 -0
- data/python/docs/examples/simplematchdecider.py +78 -0
- data/python/docs/examples/simplesearch.py +65 -0
- data/python/docs/index.html +420 -0
- data/python/except.i +290 -0
- data/python/extra.i +1048 -0
- data/python/extracomments.i +28 -0
- data/python/generate-python-exceptions +189 -0
- data/python/generate-python-exceptions.in +189 -0
- data/python/modern/xapian.py +5662 -0
- data/python/modern/xapian_wrap.cc +35170 -0
- data/python/modern/xapian_wrap.h +244 -0
- data/python/pythontest.py +1110 -0
- data/python/smoketest.py +328 -0
- data/python/testsuite.py +382 -0
- data/python/util.i +517 -0
- data/ruby/.deps/xapian_wrap.Plo +494 -0
- data/ruby/.libs/_xapian.bundle +0 -0
- data/ruby/.libs/_xapian.bundle.dSYM/Contents/Info.plist +25 -0
- data/ruby/.libs/_xapian.bundle.dSYM/Contents/Resources/DWARF/_xapian.bundle +0 -0
- data/ruby/.libs/_xapian.la +35 -0
- data/ruby/.libs/_xapian.lai +35 -0
- data/ruby/Makefile +854 -0
- data/ruby/Makefile.am +62 -0
- data/ruby/Makefile.in +854 -0
- data/ruby/_xapian.la +35 -0
- data/ruby/docs/Makefile +487 -0
- data/ruby/docs/Makefile.am +50 -0
- data/ruby/docs/Makefile.in +487 -0
- data/ruby/docs/examples/simpleexpand.rb +98 -0
- data/ruby/docs/examples/simpleindex.rb +60 -0
- data/ruby/docs/examples/simplematchdecider.rb +74 -0
- data/ruby/docs/examples/simplesearch.rb +63 -0
- data/ruby/docs/index.html +197 -0
- data/ruby/smoketest.rb +211 -0
- data/ruby/util.i +232 -0
- data/ruby/xapian.rb +280 -0
- data/ruby/xapian_wrap.cc +25837 -0
- data/ruby/xapian_wrap.h +65 -0
- data/ruby/xapian_wrap.lo +12 -0
- data/skiptest +2 -0
- data/stamp-h1 +1 -0
- data/tcl8/.deps/xapian_wrap.Plo +1 -0
- data/tcl8/Makefile +835 -0
- data/tcl8/Makefile.am +49 -0
- data/tcl8/Makefile.in +835 -0
- data/tcl8/docs/Makefile +448 -0
- data/tcl8/docs/Makefile.am +14 -0
- data/tcl8/docs/Makefile.in +448 -0
- data/tcl8/docs/examples/simpleexpand.tcl +104 -0
- data/tcl8/docs/examples/simpleindex.tcl +68 -0
- data/tcl8/docs/examples/simplesearch.tcl +66 -0
- data/tcl8/docs/index.html +208 -0
- data/tcl8/except.i +48 -0
- data/tcl8/pkgIndex.tcl +1 -0
- data/tcl8/pkgIndex.tcl.in +1 -0
- data/tcl8/run-tcl-test +15 -0
- data/tcl8/runtest.tcl +29 -0
- data/tcl8/smoketest.tcl +155 -0
- data/tcl8/util.i +76 -0
- data/tcl8/xapian_wrap.cc +20900 -0
- data/xapian-bindings.spec +206 -0
- data/xapian-bindings.spec.in +206 -0
- data/xapian-version.h +1 -0
- data/xapian-version.h.in +1 -0
- data/xapian.i +939 -0
- metadata +395 -0
@@ -0,0 +1,98 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# Simple example script demonstrating query expansion.
|
4
|
+
#
|
5
|
+
# Originally by Paul Legato (plegato@nks.net), 4/22/06.
|
6
|
+
#
|
7
|
+
# Copyright (C) 2006 Networked Knowledge Systems, Inc.
|
8
|
+
# Copyright (C) 2006,2007 Olly Betts
|
9
|
+
#
|
10
|
+
# This program is free software; you can redistribute it and/or
|
11
|
+
# modify it under the terms of the GNU General Public License as
|
12
|
+
# published by the Free Software Foundation; either version 2 of the
|
13
|
+
# License, or (at your option) any later version.
|
14
|
+
#
|
15
|
+
# This program is distributed in the hope that it will be useful,
|
16
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
17
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
18
|
+
# GNU General Public License for more details.
|
19
|
+
#
|
20
|
+
# You should have received a copy of the GNU General Public License
|
21
|
+
# along with this program; if not, write to the Free Software
|
22
|
+
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
|
23
|
+
# USA
|
24
|
+
|
25
|
+
require 'xapian'
|
26
|
+
|
27
|
+
if ARGV.size < 2
|
28
|
+
$stderr.puts "Usage: #{$0} PATH_TO_DATABASE QUERY [-- [DOCID...]]"
|
29
|
+
exit 99
|
30
|
+
end
|
31
|
+
|
32
|
+
# Open the database for searching.
|
33
|
+
database = Xapian::Database.new(ARGV[0])
|
34
|
+
|
35
|
+
# Start an enquire session.
|
36
|
+
enquire = Xapian::Enquire.new(database)
|
37
|
+
|
38
|
+
queryString = ''
|
39
|
+
relevantDocs = Xapian::RSet.new()
|
40
|
+
onDocIdsYet = false
|
41
|
+
|
42
|
+
# Combine the rest of the command line arguments with spaces between
|
43
|
+
# them, so that simple queries don't have to be quoted at the shell
|
44
|
+
# level.
|
45
|
+
ARGV.each_with_index { |arg,index|
|
46
|
+
next if index == 0 # skip path to db
|
47
|
+
|
48
|
+
if arg == '--'
|
49
|
+
onDocIdsYet = true
|
50
|
+
next
|
51
|
+
end
|
52
|
+
|
53
|
+
if onDocIdsYet
|
54
|
+
relevantDocs.add_document(arg.to_i)
|
55
|
+
else
|
56
|
+
queryString += ' ' unless queryString.empty?
|
57
|
+
queryString += arg
|
58
|
+
end
|
59
|
+
}
|
60
|
+
|
61
|
+
|
62
|
+
# Parse the query string to produce a Xapian::Query object.
|
63
|
+
qp = Xapian::QueryParser.new()
|
64
|
+
stemmer = Xapian::Stem.new("english")
|
65
|
+
qp.stemmer = stemmer
|
66
|
+
qp.database = database
|
67
|
+
qp.stemming_strategy = Xapian::QueryParser::STEM_SOME
|
68
|
+
query = qp.parse_query(queryString)
|
69
|
+
|
70
|
+
unless query.empty?
|
71
|
+
puts "Parsed query is: #{query.description()}"
|
72
|
+
|
73
|
+
# Find the top 10 results for the query.
|
74
|
+
enquire.query = query
|
75
|
+
matchset = enquire.mset(0, 10, relevantDocs)
|
76
|
+
|
77
|
+
# Display the results.
|
78
|
+
puts "#{matchset.matches_estimated()} results found."
|
79
|
+
puts "Matches 1-#{matchset.size}:\n"
|
80
|
+
|
81
|
+
matchset.matches.each {|m|
|
82
|
+
puts "#{m.rank + 1}: #{m.percent}% docid=#{m.docid} [#{m.document.data}]\n"
|
83
|
+
}
|
84
|
+
end
|
85
|
+
|
86
|
+
# Put the top 5 (at most) docs into the rset if rset is empty
|
87
|
+
if relevantDocs.empty?
|
88
|
+
matchset.matches[0..4].each {|match|
|
89
|
+
relevantDocs.add_document(match.docid())
|
90
|
+
}
|
91
|
+
end
|
92
|
+
|
93
|
+
# Get the suggested expand terms
|
94
|
+
expandTerms = enquire.eset(10, relevantDocs)
|
95
|
+
puts "#{expandTerms.size()} suggested additional terms:"
|
96
|
+
expandTerms.terms.each {|term|
|
97
|
+
puts " * Term \"#{term.name}\", weight #{term.weight}"
|
98
|
+
}
|
@@ -0,0 +1,60 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# Index each paragraph of a text file as a Xapian document.
|
4
|
+
#
|
5
|
+
# Originally by Paul Legato (plegato@nks.net), 4/22/06
|
6
|
+
# Based on Python's simplesearch.py
|
7
|
+
# Copyright (C) 2006 Networked Knowledge Systems, Inc.
|
8
|
+
# Copyright (C) 2007 Olly Betts
|
9
|
+
#
|
10
|
+
# This program is free software; you can redistribute it and/or
|
11
|
+
# modify it under the terms of the GNU General Public License as
|
12
|
+
# published by the Free Software Foundation; either version 2 of the
|
13
|
+
# License, or (at your option) any later version.
|
14
|
+
#
|
15
|
+
# This program is distributed in the hope that it will be useful,
|
16
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
17
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
18
|
+
# GNU General Public License for more details.
|
19
|
+
#
|
20
|
+
# You should have received a copy of the GNU General Public License
|
21
|
+
# along with this program; if not, write to the Free Software
|
22
|
+
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
|
23
|
+
# USA
|
24
|
+
|
25
|
+
require 'xapian'
|
26
|
+
|
27
|
+
if ARGV.size != 1
|
28
|
+
$stderr.puts "Usage: #{$0} PATH_TO_DATABASE"
|
29
|
+
exit 99
|
30
|
+
end
|
31
|
+
|
32
|
+
# Open the database for update, creating a new database if necessary.
|
33
|
+
database = Xapian::WritableDatabase.new(ARGV[0], Xapian::DB_CREATE_OR_OPEN)
|
34
|
+
|
35
|
+
indexer = Xapian::TermGenerator.new()
|
36
|
+
stemmer = Xapian::Stem.new("english")
|
37
|
+
indexer.stemmer = stemmer
|
38
|
+
|
39
|
+
para = ''
|
40
|
+
while line = $stdin.gets()
|
41
|
+
line.strip!()
|
42
|
+
if line.empty?
|
43
|
+
if not para.empty?
|
44
|
+
# We've reached the end of a paragraph, so index it.
|
45
|
+
doc = Xapian::Document.new()
|
46
|
+
doc.data = para
|
47
|
+
|
48
|
+
indexer.document = doc
|
49
|
+
indexer.index_text(para)
|
50
|
+
|
51
|
+
# Add the document to the database
|
52
|
+
database.add_document(doc)
|
53
|
+
para = ''
|
54
|
+
end # if not para.empty?
|
55
|
+
else # line not empty
|
56
|
+
para += ' ' if para != ''
|
57
|
+
para += line
|
58
|
+
end # if line empty
|
59
|
+
end
|
60
|
+
|
@@ -0,0 +1,74 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# Simple command-line match decider example.
|
4
|
+
#
|
5
|
+
#
|
6
|
+
# Copyright (C) 2006 Networked Knowledge Systems, Inc.
|
7
|
+
# Copyright (C) 2006,2007 Olly Betts
|
8
|
+
#
|
9
|
+
# This program is free software; you can redistribute it and/or
|
10
|
+
# modify it under the terms of the GNU General Public License as
|
11
|
+
# published by the Free Software Foundation; either version 2 of the
|
12
|
+
# License, or (at your option) any later version.
|
13
|
+
#
|
14
|
+
# This program is distributed in the hope that it will be useful,
|
15
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
16
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
17
|
+
# GNU General Public License for more details.
|
18
|
+
#
|
19
|
+
# You should have received a copy of the GNU General Public License
|
20
|
+
# along with this program; if not, write to the Free Software
|
21
|
+
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
|
22
|
+
# USA
|
23
|
+
|
24
|
+
require 'xapian'
|
25
|
+
|
26
|
+
if ARGV.size < 3
|
27
|
+
$stderr.puts "Usage: #{$0} PATH_TO_DATABASE AVOID_VALUE QUERY"
|
28
|
+
exit 99
|
29
|
+
end
|
30
|
+
|
31
|
+
class MyMatchDecider < Xapian::MatchDecider
|
32
|
+
def initialize(avoidvalue)
|
33
|
+
@avoidvalue = avoidvalue
|
34
|
+
super()
|
35
|
+
end
|
36
|
+
|
37
|
+
def __call__(doc)
|
38
|
+
return doc.value(0) != @avoidvalue
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
# Open the database for searching.
|
43
|
+
database = Xapian::Database.new(ARGV[0])
|
44
|
+
|
45
|
+
# Start an enquire session.
|
46
|
+
enquire = Xapian::Enquire.new(database)
|
47
|
+
|
48
|
+
# Combine the rest of the command line arguments with spaces between
|
49
|
+
# them, so that simple queries don't have to be quoted at the shell
|
50
|
+
# level.
|
51
|
+
queryString = ARGV[2..-1].join(' ')
|
52
|
+
|
53
|
+
# Parse the query string to produce a Xapian::Query object.
|
54
|
+
qp = Xapian::QueryParser.new()
|
55
|
+
stemmer = Xapian::Stem.new("english")
|
56
|
+
qp.stemmer = stemmer
|
57
|
+
qp.database = database
|
58
|
+
qp.stemming_strategy = Xapian::QueryParser::STEM_SOME
|
59
|
+
query = qp.parse_query(queryString)
|
60
|
+
|
61
|
+
puts "Parsed query is: #{query.description()}"
|
62
|
+
|
63
|
+
# Find the top 10 results for the query.
|
64
|
+
enquire.query = query
|
65
|
+
mdecider = MyMatchDecider.new(ARGV[1])
|
66
|
+
matchset = enquire.mset(0, 10, nil, mdecider)
|
67
|
+
|
68
|
+
# Display the results.
|
69
|
+
puts "#{matchset.matches_estimated()} results found."
|
70
|
+
puts "Matches 1-#{matchset.size}:\n"
|
71
|
+
|
72
|
+
matchset.matches.each {|m|
|
73
|
+
puts "#{m.rank + 1}: #{m.percent}% docid=#{m.docid} [#{m.document.data}]\n"
|
74
|
+
}
|
@@ -0,0 +1,63 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# Simple command-line search script.
|
4
|
+
#
|
5
|
+
# Originally by Paul Legato (plegato@nks.net), 4/22/06.
|
6
|
+
#
|
7
|
+
# Copyright (C) 2006 Networked Knowledge Systems, Inc.
|
8
|
+
# Copyright (C) 2006,2007 Olly Betts
|
9
|
+
#
|
10
|
+
# This program is free software; you can redistribute it and/or
|
11
|
+
# modify it under the terms of the GNU General Public License as
|
12
|
+
# published by the Free Software Foundation; either version 2 of the
|
13
|
+
# License, or (at your option) any later version.
|
14
|
+
#
|
15
|
+
# This program is distributed in the hope that it will be useful,
|
16
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
17
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
18
|
+
# GNU General Public License for more details.
|
19
|
+
#
|
20
|
+
# You should have received a copy of the GNU General Public License
|
21
|
+
# along with this program; if not, write to the Free Software
|
22
|
+
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
|
23
|
+
# USA
|
24
|
+
|
25
|
+
require 'xapian'
|
26
|
+
|
27
|
+
if ARGV.size < 2
|
28
|
+
$stderr.puts "Usage: #{$0} PATH_TO_DATABASE QUERY"
|
29
|
+
exit 99
|
30
|
+
end
|
31
|
+
|
32
|
+
# Open the database for searching.
|
33
|
+
database = Xapian::Database.new(ARGV[0])
|
34
|
+
|
35
|
+
# Start an enquire session.
|
36
|
+
enquire = Xapian::Enquire.new(database)
|
37
|
+
|
38
|
+
# Combine the rest of the command line arguments with spaces between
|
39
|
+
# them, so that simple queries don't have to be quoted at the shell
|
40
|
+
# level.
|
41
|
+
queryString = ARGV[1..-1].join(' ')
|
42
|
+
|
43
|
+
# Parse the query string to produce a Xapian::Query object.
|
44
|
+
qp = Xapian::QueryParser.new()
|
45
|
+
stemmer = Xapian::Stem.new("english")
|
46
|
+
qp.stemmer = stemmer
|
47
|
+
qp.database = database
|
48
|
+
qp.stemming_strategy = Xapian::QueryParser::STEM_SOME
|
49
|
+
query = qp.parse_query(queryString)
|
50
|
+
|
51
|
+
puts "Parsed query is: #{query.description()}"
|
52
|
+
|
53
|
+
# Find the top 10 results for the query.
|
54
|
+
enquire.query = query
|
55
|
+
matchset = enquire.mset(0, 10)
|
56
|
+
|
57
|
+
# Display the results.
|
58
|
+
puts "#{matchset.matches_estimated()} results found."
|
59
|
+
puts "Matches 1-#{matchset.size}:\n"
|
60
|
+
|
61
|
+
matchset.matches.each {|m|
|
62
|
+
puts "#{m.rank + 1}: #{m.percent}% docid=#{m.docid} [#{m.document.data}]\n"
|
63
|
+
}
|
@@ -0,0 +1,197 @@
|
|
1
|
+
<html><head><title>Ruby bindings for Xapian</title></head>
|
2
|
+
<body>
|
3
|
+
<h1>Ruby bindings for Xapian</h1>
|
4
|
+
|
5
|
+
<p>
|
6
|
+
The Ruby bindings for Xapian are packaged in the <code>xapian</code> module.
|
7
|
+
Ruby strings and arrays are converted automatically in the bindings, so
|
8
|
+
generally they should just work naturally.
|
9
|
+
</p>
|
10
|
+
|
11
|
+
<p>
|
12
|
+
The <code>examples</code> subdirectory contains examples showing how to use the
|
13
|
+
Ruby bindings based on the simple examples from <code>xapian-examples</code>:
|
14
|
+
<a href="examples/simpleindex.rb">simpleindex.rb</a>,
|
15
|
+
<a href="examples/simplesearch.rb">simplesearch.rb</a>,
|
16
|
+
<a href="examples/simpleexpand.rb">simpleexpand.rb</a>.
|
17
|
+
There's also
|
18
|
+
<a href="examples/simplematchdecider.rb">simplematchdecider.rb</a>
|
19
|
+
which shows how to define a MatchDecider in Ruby.
|
20
|
+
</p>
|
21
|
+
|
22
|
+
<h2>Usage</h2>
|
23
|
+
|
24
|
+
<p>To use the bindings, you need to use <code>require 'xapian'</code>
|
25
|
+
in your ruby program.</p>
|
26
|
+
|
27
|
+
<p>
|
28
|
+
Most standard Xapian methods are available directly
|
29
|
+
to your Ruby program. Names have been altered to conform to the
|
30
|
+
standard Ruby naming conventions (i.e. get_foo() in C++ becomes foo()
|
31
|
+
in Ruby; set_foo() becomes foo=().) C++ 'operator()' methods are
|
32
|
+
renamed to 'call' methods in Ruby.
|
33
|
+
</p>
|
34
|
+
|
35
|
+
<p>
|
36
|
+
The C++ methods are not yet documented in the <a href="rdocs/">RDocs</a>.
|
37
|
+
In the meantime, refer to the
|
38
|
+
<a href="http://xapian.org/docs/apidoc/html/annotated.html">C++ API
|
39
|
+
documentation</a> for information on how to use the various methods. Most are
|
40
|
+
available directly in the Ruby version. The RDocs currently provide information
|
41
|
+
only on methods that are unique to the Ruby version.
|
42
|
+
</p>
|
43
|
+
|
44
|
+
<p>
|
45
|
+
The dangerous/non-Rubish methods from the C++ API have been renamed to
|
46
|
+
start with underscores ('_') in the Ruby bindings. You can see them in
|
47
|
+
use in xapian.rb. It is strongly recommended that you do not call any
|
48
|
+
method that starts with _ directly in your code, but instead use the
|
49
|
+
wrappers defined in xapian.rb. Improper use of an _ method can cause
|
50
|
+
the Ruby process to segfault.
|
51
|
+
</p>
|
52
|
+
|
53
|
+
<h2>Unicode Support</h2>
|
54
|
+
|
55
|
+
<p>
|
56
|
+
In Xapian 1.0.0 and later, the Xapian::Stem, Xapian::QueryParser, and
|
57
|
+
Xapian::TermGenerator classes all assume text is in UTF-8. If you want
|
58
|
+
to index strings in a different encoding, use the Ruby
|
59
|
+
<a href="http://www.ruby-doc.org/stdlib/libdoc/iconv/rdoc/index.html"
|
60
|
+
><code>Iconv</code> class</a>
|
61
|
+
to convert them to UTF-8 before passing them to Xapian, and
|
62
|
+
when reading values back from Xapian.
|
63
|
+
</p>
|
64
|
+
|
65
|
+
<!--
|
66
|
+
<h2>Exceptions</h2>
|
67
|
+
|
68
|
+
<p>
|
69
|
+
Exceptions are thrown as SWIG exceptions instead of Xapian
|
70
|
+
exceptions. This isn't done well at the moment; in future we will
|
71
|
+
throw wrapped Xapian exceptions. For now, it's probably easier to
|
72
|
+
catch all exceptions and try to take appropriate action based on
|
73
|
+
their associated string.
|
74
|
+
</p>
|
75
|
+
-->
|
76
|
+
|
77
|
+
<h2>Iterators</h2>
|
78
|
+
|
79
|
+
<p>
|
80
|
+
One important difference from the C++ API is that *Iterator
|
81
|
+
classes should not be used from Ruby, as they fit awkwardly into
|
82
|
+
standard Ruby iteration paradigms, and as many of them cause segfaults
|
83
|
+
if used improperly. They have all been wrapped with appropriate
|
84
|
+
methods that simply return the *Iterator objects in an Array, so that
|
85
|
+
you can use 'each' to iterate through them.
|
86
|
+
</p>
|
87
|
+
|
88
|
+
<pre>
|
89
|
+
mset.matches.each {|match|
|
90
|
+
# do something
|
91
|
+
}
|
92
|
+
</pre>
|
93
|
+
|
94
|
+
<!--
|
95
|
+
<h2>Iterator dereferencing</h2>
|
96
|
+
|
97
|
+
<p>
|
98
|
+
C++ iterators are often dereferenced to get information, eg
|
99
|
+
<code>(*it)</code>. With Python these are all mapped to named methods, as
|
100
|
+
follows:
|
101
|
+
</p>
|
102
|
+
|
103
|
+
<table title="Iterator deferencing methods">
|
104
|
+
<thead><td>Iterator</td><td>Dereferencing method</td></thead>
|
105
|
+
<tr><td>PositionIterator</td> <td><code>get_termpos()</code></td></tr>
|
106
|
+
<tr><td>PostingIterator</td> <td><code>get_docid()</code></td></tr>
|
107
|
+
<tr><td>TermIterator</td> <td><code>get_term()</code></td></tr>
|
108
|
+
<tr><td>ValueIterator</td> <td><code>get_value()</code></td></tr>
|
109
|
+
<tr><td>MSetIterator</td> <td><code>get_docid()</code></td></tr>
|
110
|
+
<tr><td>ESetIterator</td> <td><code>get_term()</code></td></tr>
|
111
|
+
</table>
|
112
|
+
|
113
|
+
<p>
|
114
|
+
Other methods, such as <code>MSetIterator.get_document()</code>, are
|
115
|
+
available unchanged.
|
116
|
+
</p>
|
117
|
+
|
118
|
+
<h2>MSet</h2>
|
119
|
+
|
120
|
+
<p>
|
121
|
+
MSet objects have some additional methods to simplify access (these
|
122
|
+
work using the C++ array dereferencing):
|
123
|
+
</p>
|
124
|
+
|
125
|
+
<table title="MSet additional methods">
|
126
|
+
<thead><td>Method name</td><td>Explanation</td></thead>
|
127
|
+
<tr><td><code>get_hit(index)</code></td><td>returns MSetIterator at index</td></tr>
|
128
|
+
<tr><td><code>get_document_percentage(index)</code></td><td><code>convert_to_percent(get_hit(index))</code></td></tr>
|
129
|
+
<tr><td><code>get_document(index)</code></td><td><code>get_hit(index).get_document()</code></td></tr>
|
130
|
+
<tr><td><code>get_docid(index)</code></td><td><code>get_hit(index).get_docid()</code></td></tr>
|
131
|
+
</table>
|
132
|
+
|
133
|
+
-->
|
134
|
+
|
135
|
+
<h2>Non-Class Functions</h2>
|
136
|
+
|
137
|
+
<p>The C++ API contains a few non-class functions (the Database factory
|
138
|
+
functions, and some functions reporting version information), which are
|
139
|
+
wrapped like so for Ruby:
|
140
|
+
<ul>
|
141
|
+
<ul>
|
142
|
+
<li> <code>Xapian::version_string()</code> is wrapped as <code>Xapian::version_string()</code>
|
143
|
+
<li> <code>Xapian::major_version()</code> is wrapped as <code>Xapian::major_version()</code>
|
144
|
+
<li> <code>Xapian::minor_version()</code> is wrapped as <code>Xapian::minor_version()</code>
|
145
|
+
<li> <code>Xapian::revision()</code> is wrapped as <code>Xapian::revision()</code>
|
146
|
+
</ul>
|
147
|
+
|
148
|
+
<ul>
|
149
|
+
<li> <code>Xapian::Auto::open_stub()</code> is wrapped as <code>Xapian::open_stub()</code>
|
150
|
+
<li> <code>Xapian::Flint::open()</code> is wrapped as <code>Xapian::flint_open()</code>
|
151
|
+
<li> <code>Xapian::InMemory::open()</code> is wrapped as <code>Xapian::inmemory_open()</code>
|
152
|
+
<li> <code>Xapian::Quartz::open()</code> is wrapped as <code>Xapian::quartz_open()</code>
|
153
|
+
<li> <code>Xapian::Remote::open()</code> is wrapped as <code>Xapian::remote_open()</code> (both
|
154
|
+
the TCP and "program" versions are wrapped - the SWIG wrapper checks the parameter list to
|
155
|
+
decide which to call).
|
156
|
+
<li> <code>Xapian::Remote::open_writable()</code> is wrapped as <code>Xapian::remote_open_writable()</code> (both
|
157
|
+
the TCP and "program" versions are wrapped - the SWIG wrapper checks the parameter list to
|
158
|
+
decide which to call).
|
159
|
+
</ul>
|
160
|
+
</ul>
|
161
|
+
|
162
|
+
<h2>Query</h2>
|
163
|
+
|
164
|
+
<p>
|
165
|
+
In C++ there's a Xapian::Query constructor which takes a query operator and
|
166
|
+
start/end iterators specifying a number of terms or queries, plus an optional
|
167
|
+
parameter. In Ruby, this is wrapped to accept a Ruby array containing
|
168
|
+
terms, or queries, or even a mixture of terms and queries. For example:
|
169
|
+
</p>
|
170
|
+
|
171
|
+
<pre>
|
172
|
+
subq = Xapian::Query.new(Xapian::Query::OP_AND, "hello", "world")
|
173
|
+
q = Xapian::Query.new(Xapian::Query::OP_AND, [subq, "foo", Xapian::Query.new("bar", 2)])
|
174
|
+
</pre>
|
175
|
+
|
176
|
+
<h2>MatchDecider</h2>
|
177
|
+
|
178
|
+
<p>
|
179
|
+
Custom MatchDeciders can be created in Ruby; simply subclass
|
180
|
+
Xapian::MatchDecider, ensure you call the superclass constructor, and define a
|
181
|
+
__call__ method that will do the work. The simplest example (which does nothing
|
182
|
+
useful) would be as follows:
|
183
|
+
</p>
|
184
|
+
|
185
|
+
<pre>
|
186
|
+
class MyMatchDecider < Xapian::MatchDecider
|
187
|
+
def __call__(doc):
|
188
|
+
return true
|
189
|
+
end
|
190
|
+
end
|
191
|
+
</pre>
|
192
|
+
|
193
|
+
<address>
|
194
|
+
Last updated $Date: 2006-04-01 16:35:10 +0100 (Sat, 01 Apr 2006) $
|
195
|
+
</address>
|
196
|
+
</body>
|
197
|
+
</html>
|