mhs-xapian 1.0.18a

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (328) hide show
  1. data/.gitignore +3 -0
  2. data/AUTHORS +1 -0
  3. data/COPYING +340 -0
  4. data/ChangeLog +5876 -0
  5. data/HACKING +101 -0
  6. data/INSTALL +293 -0
  7. data/Makefile +722 -0
  8. data/Makefile.am +26 -0
  9. data/Makefile.in +722 -0
  10. data/NEWS +2110 -0
  11. data/README +59 -0
  12. data/Rakefile +51 -0
  13. data/TODO +47 -0
  14. data/aclocal.m4 +7675 -0
  15. data/config.guess +1501 -0
  16. data/config.h +56 -0
  17. data/config.h.in +55 -0
  18. data/config.status +1298 -0
  19. data/config.sub +1705 -0
  20. data/configure +18536 -0
  21. data/configure.ac +944 -0
  22. data/csharp/.deps/xapian_wrap.Plo +1 -0
  23. data/csharp/AssemblyInfo.cs +40 -0
  24. data/csharp/AssemblyInfo.cs.in +40 -0
  25. data/csharp/Auto.cs +46 -0
  26. data/csharp/BM25Weight.cs +107 -0
  27. data/csharp/BoolWeight.cs +103 -0
  28. data/csharp/Database.cs +275 -0
  29. data/csharp/DateValueRangeProcessor.cs +61 -0
  30. data/csharp/Document.cs +177 -0
  31. data/csharp/ESet.cs +94 -0
  32. data/csharp/ESetIterator.cs +117 -0
  33. data/csharp/Enquire.cs +274 -0
  34. data/csharp/ExpandDecider.cs +76 -0
  35. data/csharp/Flint.cs +58 -0
  36. data/csharp/InMemory.cs +46 -0
  37. data/csharp/MSet.cs +193 -0
  38. data/csharp/MSetIterator.cs +141 -0
  39. data/csharp/Makefile +868 -0
  40. data/csharp/Makefile.am +106 -0
  41. data/csharp/Makefile.in +868 -0
  42. data/csharp/MatchDecider.cs +76 -0
  43. data/csharp/MultiValueSorter.cs +63 -0
  44. data/csharp/NumberValueRangeProcessor.cs +61 -0
  45. data/csharp/PositionIterator.cs +101 -0
  46. data/csharp/PostingIterator.cs +125 -0
  47. data/csharp/Quartz.cs +58 -0
  48. data/csharp/Query.cs +150 -0
  49. data/csharp/QueryParser.cs +174 -0
  50. data/csharp/RSet.cs +102 -0
  51. data/csharp/Remote.cs +100 -0
  52. data/csharp/SWIGTYPE_p_std__string.cs +30 -0
  53. data/csharp/SWIGTYPE_p_std__vectorTXapian__Query_t.cs +30 -0
  54. data/csharp/SWIGTYPE_p_std__vectorTstd__string_t.cs +30 -0
  55. data/csharp/SimpleStopper.cs +64 -0
  56. data/csharp/SmokeTest.cs +178 -0
  57. data/csharp/Sorter.cs +76 -0
  58. data/csharp/Stem.cs +66 -0
  59. data/csharp/Stopper.cs +91 -0
  60. data/csharp/StringValueRangeProcessor.cs +53 -0
  61. data/csharp/TermGenerator.cs +152 -0
  62. data/csharp/TermIterator.cs +125 -0
  63. data/csharp/TradWeight.cs +107 -0
  64. data/csharp/ValueIterator.cs +102 -0
  65. data/csharp/ValueRangeProcessor.cs +76 -0
  66. data/csharp/Version.cs +60 -0
  67. data/csharp/Weight.cs +93 -0
  68. data/csharp/WritableDatabase.cs +153 -0
  69. data/csharp/Xapian.cs +65 -0
  70. data/csharp/XapianPINVOKE.cs +1527 -0
  71. data/csharp/docs/Makefile +450 -0
  72. data/csharp/docs/Makefile.am +16 -0
  73. data/csharp/docs/Makefile.in +450 -0
  74. data/csharp/docs/examples/SimpleExpand.cs +109 -0
  75. data/csharp/docs/examples/SimpleIndex.cs +71 -0
  76. data/csharp/docs/examples/SimpleSearch.cs +78 -0
  77. data/csharp/docs/index.html +211 -0
  78. data/csharp/util.i +233 -0
  79. data/csharp/xapian_wrap.cc +10338 -0
  80. data/csharp/xapian_wrap.h +93 -0
  81. data/depcomp +632 -0
  82. data/extconf.rb +20 -0
  83. data/generic/except.i +80 -0
  84. data/generic/generic.mk +48 -0
  85. data/install-sh +520 -0
  86. data/java-swig/.deps/xapian_wrap.Plo +1 -0
  87. data/java-swig/Auto.java +35 -0
  88. data/java-swig/BM25Weight.java +81 -0
  89. data/java-swig/BoolWeight.java +77 -0
  90. data/java-swig/Database.java +195 -0
  91. data/java-swig/DateValueRangeProcessor.java +51 -0
  92. data/java-swig/Document.java +135 -0
  93. data/java-swig/ESet.java +71 -0
  94. data/java-swig/ESetIterator.java +71 -0
  95. data/java-swig/Enquire.java +246 -0
  96. data/java-swig/ExpandDecider.java +59 -0
  97. data/java-swig/Flint.java +43 -0
  98. data/java-swig/InMemory.java +35 -0
  99. data/java-swig/MSet.java +143 -0
  100. data/java-swig/MSetIterator.java +87 -0
  101. data/java-swig/Makefile +781 -0
  102. data/java-swig/Makefile.am +132 -0
  103. data/java-swig/Makefile.in +781 -0
  104. data/java-swig/MatchDecider.java +59 -0
  105. data/java-swig/MultiValueSorter.java +51 -0
  106. data/java-swig/NumberValueRangeProcessor.java +51 -0
  107. data/java-swig/PositionIterator.java +63 -0
  108. data/java-swig/PostingIterator.java +83 -0
  109. data/java-swig/Quartz.java +43 -0
  110. data/java-swig/Query.java +189 -0
  111. data/java-swig/QueryParser.java +214 -0
  112. data/java-swig/RSet.java +79 -0
  113. data/java-swig/Remote.java +71 -0
  114. data/java-swig/SWIGTYPE_p_std__string.java +25 -0
  115. data/java-swig/SimpleStopper.java +51 -0
  116. data/java-swig/SmokeTest.java +161 -0
  117. data/java-swig/Sorter.java +59 -0
  118. data/java-swig/Stem.java +51 -0
  119. data/java-swig/Stopper.java +63 -0
  120. data/java-swig/StringValueRangeProcessor.java +43 -0
  121. data/java-swig/TermGenerator.java +158 -0
  122. data/java-swig/TermIterator.java +83 -0
  123. data/java-swig/TradWeight.java +81 -0
  124. data/java-swig/ValueIterator.java +67 -0
  125. data/java-swig/ValueRangeProcessor.java +59 -0
  126. data/java-swig/Version.java +47 -0
  127. data/java-swig/Weight.java +68 -0
  128. data/java-swig/WritableDatabase.java +123 -0
  129. data/java-swig/Xapian.java +39 -0
  130. data/java-swig/XapianConstants.java +15 -0
  131. data/java-swig/XapianJNI.java +508 -0
  132. data/java-swig/run-java-test +6 -0
  133. data/java-swig/xapian_wrap.cc +12594 -0
  134. data/java-swig/xapian_wrap.h +91 -0
  135. data/java/Makefile +660 -0
  136. data/java/Makefile.am +35 -0
  137. data/java/Makefile.in +660 -0
  138. data/java/README +76 -0
  139. data/java/SmokeTest.java +148 -0
  140. data/java/native/.deps/Database.Plo +1 -0
  141. data/java/native/.deps/Document.Plo +1 -0
  142. data/java/native/.deps/ESet.Plo +1 -0
  143. data/java/native/.deps/ESetIterator.Plo +1 -0
  144. data/java/native/.deps/Enquire.Plo +1 -0
  145. data/java/native/.deps/MSet.Plo +1 -0
  146. data/java/native/.deps/MSetIterator.Plo +1 -0
  147. data/java/native/.deps/PositionIterator.Plo +1 -0
  148. data/java/native/.deps/Query.Plo +1 -0
  149. data/java/native/.deps/RSet.Plo +1 -0
  150. data/java/native/.deps/Stem.Plo +1 -0
  151. data/java/native/.deps/TermIterator.Plo +1 -0
  152. data/java/native/.deps/WritableDatabase.Plo +1 -0
  153. data/java/native/.deps/org_xapian_XapianJNI.Plo +1 -0
  154. data/java/native/.deps/utils.Plo +1 -0
  155. data/java/native/Database.cc +222 -0
  156. data/java/native/Document.cc +173 -0
  157. data/java/native/ESet.cc +79 -0
  158. data/java/native/ESetIterator.cc +82 -0
  159. data/java/native/Enquire.cc +271 -0
  160. data/java/native/MSet.cc +169 -0
  161. data/java/native/MSetIterator.cc +107 -0
  162. data/java/native/Makefile +594 -0
  163. data/java/native/Makefile.am +51 -0
  164. data/java/native/Makefile.in +594 -0
  165. data/java/native/PositionIterator.cc +64 -0
  166. data/java/native/Query.cc +180 -0
  167. data/java/native/RSet.cc +98 -0
  168. data/java/native/Stem.cc +75 -0
  169. data/java/native/TermIterator.cc +107 -0
  170. data/java/native/WritableDatabase.cc +118 -0
  171. data/java/native/XapianObjectHolder.h +115 -0
  172. data/java/native/org_xapian_XapianJNI.cc +78 -0
  173. data/java/native/org_xapian_XapianJNI.h +1369 -0
  174. data/java/native/utils.cc +51 -0
  175. data/java/native/xapian_jni.h +116 -0
  176. data/java/org/xapian/Database.java +148 -0
  177. data/java/org/xapian/Document.java +135 -0
  178. data/java/org/xapian/ESet.java +66 -0
  179. data/java/org/xapian/ESetIterator.java +97 -0
  180. data/java/org/xapian/Enquire.java +136 -0
  181. data/java/org/xapian/ExpandDecider.java +30 -0
  182. data/java/org/xapian/MSet.java +104 -0
  183. data/java/org/xapian/MSetIterator.java +132 -0
  184. data/java/org/xapian/Makefile +580 -0
  185. data/java/org/xapian/Makefile.am +38 -0
  186. data/java/org/xapian/Makefile.in +580 -0
  187. data/java/org/xapian/MatchDecider.java +30 -0
  188. data/java/org/xapian/PositionIterator.java +89 -0
  189. data/java/org/xapian/Query.java +190 -0
  190. data/java/org/xapian/RSet.java +89 -0
  191. data/java/org/xapian/Stem.java +80 -0
  192. data/java/org/xapian/TermIterator.java +142 -0
  193. data/java/org/xapian/WritableDatabase.java +92 -0
  194. data/java/org/xapian/Xapian.java +114 -0
  195. data/java/org/xapian/XapianJNI.java +444 -0
  196. data/java/org/xapian/errors/AssertionError.java +40 -0
  197. data/java/org/xapian/errors/DatabaseCorruptError.java +40 -0
  198. data/java/org/xapian/errors/DatabaseError.java +40 -0
  199. data/java/org/xapian/errors/DatabaseLockError.java +40 -0
  200. data/java/org/xapian/errors/DatabaseModifiedError.java +40 -0
  201. data/java/org/xapian/errors/DatabaseOpeningError.java +40 -0
  202. data/java/org/xapian/errors/DocNotFoundError.java +40 -0
  203. data/java/org/xapian/errors/FeatureUnavailableError.java +40 -0
  204. data/java/org/xapian/errors/InternalError.java +40 -0
  205. data/java/org/xapian/errors/InvalidArgumentError.java +40 -0
  206. data/java/org/xapian/errors/InvalidOperationError.java +40 -0
  207. data/java/org/xapian/errors/LogicError.java +40 -0
  208. data/java/org/xapian/errors/Makefile +416 -0
  209. data/java/org/xapian/errors/Makefile.am +32 -0
  210. data/java/org/xapian/errors/Makefile.in +416 -0
  211. data/java/org/xapian/errors/NetworkError.java +40 -0
  212. data/java/org/xapian/errors/NetworkTimeoutError.java +40 -0
  213. data/java/org/xapian/errors/RangeError.java +40 -0
  214. data/java/org/xapian/errors/RuntimeError.java +40 -0
  215. data/java/org/xapian/errors/UnimplementedError.java +40 -0
  216. data/java/org/xapian/errors/XapianError.java +40 -0
  217. data/java/org/xapian/errors/XapianRuntimeError.java +49 -0
  218. data/java/org/xapian/examples/Makefile +391 -0
  219. data/java/org/xapian/examples/Makefile.am +8 -0
  220. data/java/org/xapian/examples/Makefile.in +391 -0
  221. data/java/org/xapian/examples/SimpleIndex.java +68 -0
  222. data/java/org/xapian/examples/SimpleSearch.java +71 -0
  223. data/java/run-java-test +6 -0
  224. data/libtool +7618 -0
  225. data/ltmain.sh +6956 -0
  226. data/mhs-xapian.gemspec +368 -0
  227. data/missing +378 -0
  228. data/php/.deps/xapian_wrap.Plo +1 -0
  229. data/php/Makefile +871 -0
  230. data/php/Makefile.am +82 -0
  231. data/php/Makefile.in +871 -0
  232. data/php/docs/Makefile +453 -0
  233. data/php/docs/Makefile.am +19 -0
  234. data/php/docs/Makefile.in +453 -0
  235. data/php/docs/examples/simpleexpand.php4 +108 -0
  236. data/php/docs/examples/simpleexpand.php5 +104 -0
  237. data/php/docs/examples/simpleindex.php4 +76 -0
  238. data/php/docs/examples/simpleindex.php5 +73 -0
  239. data/php/docs/examples/simplesearch.php4 +75 -0
  240. data/php/docs/examples/simplesearch.php5 +72 -0
  241. data/php/docs/index.html +313 -0
  242. data/php/except.i +98 -0
  243. data/php/php4/php_xapian.h +323 -0
  244. data/php/php4/xapian.php +32 -0
  245. data/php/php4/xapian_wrap.cc +27656 -0
  246. data/php/php5/php_xapian.h +319 -0
  247. data/php/php5/xapian.php +1566 -0
  248. data/php/php5/xapian_wrap.cc +24330 -0
  249. data/php/smoketest.php +246 -0
  250. data/php/smoketest4.php +84 -0
  251. data/php/smoketest5.php +79 -0
  252. data/php/util.i +187 -0
  253. data/python/.deps/xapian_wrap.Plo +1 -0
  254. data/python/Makefile +891 -0
  255. data/python/Makefile.am +105 -0
  256. data/python/Makefile.in +891 -0
  257. data/python/doccomments.i +5134 -0
  258. data/python/docs/Makefile +448 -0
  259. data/python/docs/Makefile.am +14 -0
  260. data/python/docs/Makefile.in +448 -0
  261. data/python/docs/examples/simpleexpand.py +98 -0
  262. data/python/docs/examples/simpleindex.py +65 -0
  263. data/python/docs/examples/simplematchdecider.py +78 -0
  264. data/python/docs/examples/simplesearch.py +65 -0
  265. data/python/docs/index.html +420 -0
  266. data/python/except.i +290 -0
  267. data/python/extra.i +1048 -0
  268. data/python/extracomments.i +28 -0
  269. data/python/generate-python-exceptions +189 -0
  270. data/python/generate-python-exceptions.in +189 -0
  271. data/python/modern/xapian.py +5662 -0
  272. data/python/modern/xapian_wrap.cc +35170 -0
  273. data/python/modern/xapian_wrap.h +244 -0
  274. data/python/pythontest.py +1110 -0
  275. data/python/smoketest.py +328 -0
  276. data/python/testsuite.py +382 -0
  277. data/python/util.i +517 -0
  278. data/ruby/.deps/xapian_wrap.Plo +494 -0
  279. data/ruby/.libs/_xapian.bundle +0 -0
  280. data/ruby/.libs/_xapian.bundle.dSYM/Contents/Info.plist +25 -0
  281. data/ruby/.libs/_xapian.bundle.dSYM/Contents/Resources/DWARF/_xapian.bundle +0 -0
  282. data/ruby/.libs/_xapian.la +35 -0
  283. data/ruby/.libs/_xapian.lai +35 -0
  284. data/ruby/Makefile +854 -0
  285. data/ruby/Makefile.am +62 -0
  286. data/ruby/Makefile.in +854 -0
  287. data/ruby/_xapian.la +35 -0
  288. data/ruby/docs/Makefile +487 -0
  289. data/ruby/docs/Makefile.am +50 -0
  290. data/ruby/docs/Makefile.in +487 -0
  291. data/ruby/docs/examples/simpleexpand.rb +98 -0
  292. data/ruby/docs/examples/simpleindex.rb +60 -0
  293. data/ruby/docs/examples/simplematchdecider.rb +74 -0
  294. data/ruby/docs/examples/simplesearch.rb +63 -0
  295. data/ruby/docs/index.html +197 -0
  296. data/ruby/smoketest.rb +211 -0
  297. data/ruby/util.i +232 -0
  298. data/ruby/xapian.rb +280 -0
  299. data/ruby/xapian_wrap.cc +25837 -0
  300. data/ruby/xapian_wrap.h +65 -0
  301. data/ruby/xapian_wrap.lo +12 -0
  302. data/skiptest +2 -0
  303. data/stamp-h1 +1 -0
  304. data/tcl8/.deps/xapian_wrap.Plo +1 -0
  305. data/tcl8/Makefile +835 -0
  306. data/tcl8/Makefile.am +49 -0
  307. data/tcl8/Makefile.in +835 -0
  308. data/tcl8/docs/Makefile +448 -0
  309. data/tcl8/docs/Makefile.am +14 -0
  310. data/tcl8/docs/Makefile.in +448 -0
  311. data/tcl8/docs/examples/simpleexpand.tcl +104 -0
  312. data/tcl8/docs/examples/simpleindex.tcl +68 -0
  313. data/tcl8/docs/examples/simplesearch.tcl +66 -0
  314. data/tcl8/docs/index.html +208 -0
  315. data/tcl8/except.i +48 -0
  316. data/tcl8/pkgIndex.tcl +1 -0
  317. data/tcl8/pkgIndex.tcl.in +1 -0
  318. data/tcl8/run-tcl-test +15 -0
  319. data/tcl8/runtest.tcl +29 -0
  320. data/tcl8/smoketest.tcl +155 -0
  321. data/tcl8/util.i +76 -0
  322. data/tcl8/xapian_wrap.cc +20900 -0
  323. data/xapian-bindings.spec +206 -0
  324. data/xapian-bindings.spec.in +206 -0
  325. data/xapian-version.h +1 -0
  326. data/xapian-version.h.in +1 -0
  327. data/xapian.i +939 -0
  328. metadata +395 -0
@@ -0,0 +1,98 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # Simple example script demonstrating query expansion.
4
+ #
5
+ # Originally by Paul Legato (plegato@nks.net), 4/22/06.
6
+ #
7
+ # Copyright (C) 2006 Networked Knowledge Systems, Inc.
8
+ # Copyright (C) 2006,2007 Olly Betts
9
+ #
10
+ # This program is free software; you can redistribute it and/or
11
+ # modify it under the terms of the GNU General Public License as
12
+ # published by the Free Software Foundation; either version 2 of the
13
+ # License, or (at your option) any later version.
14
+ #
15
+ # This program is distributed in the hope that it will be useful,
16
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
17
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18
+ # GNU General Public License for more details.
19
+ #
20
+ # You should have received a copy of the GNU General Public License
21
+ # along with this program; if not, write to the Free Software
22
+ # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
23
+ # USA
24
+
25
+ require 'xapian'
26
+
27
+ if ARGV.size < 2
28
+ $stderr.puts "Usage: #{$0} PATH_TO_DATABASE QUERY [-- [DOCID...]]"
29
+ exit 99
30
+ end
31
+
32
+ # Open the database for searching.
33
+ database = Xapian::Database.new(ARGV[0])
34
+
35
+ # Start an enquire session.
36
+ enquire = Xapian::Enquire.new(database)
37
+
38
+ queryString = ''
39
+ relevantDocs = Xapian::RSet.new()
40
+ onDocIdsYet = false
41
+
42
+ # Combine the rest of the command line arguments with spaces between
43
+ # them, so that simple queries don't have to be quoted at the shell
44
+ # level.
45
+ ARGV.each_with_index { |arg,index|
46
+ next if index == 0 # skip path to db
47
+
48
+ if arg == '--'
49
+ onDocIdsYet = true
50
+ next
51
+ end
52
+
53
+ if onDocIdsYet
54
+ relevantDocs.add_document(arg.to_i)
55
+ else
56
+ queryString += ' ' unless queryString.empty?
57
+ queryString += arg
58
+ end
59
+ }
60
+
61
+
62
+ # Parse the query string to produce a Xapian::Query object.
63
+ qp = Xapian::QueryParser.new()
64
+ stemmer = Xapian::Stem.new("english")
65
+ qp.stemmer = stemmer
66
+ qp.database = database
67
+ qp.stemming_strategy = Xapian::QueryParser::STEM_SOME
68
+ query = qp.parse_query(queryString)
69
+
70
+ unless query.empty?
71
+ puts "Parsed query is: #{query.description()}"
72
+
73
+ # Find the top 10 results for the query.
74
+ enquire.query = query
75
+ matchset = enquire.mset(0, 10, relevantDocs)
76
+
77
+ # Display the results.
78
+ puts "#{matchset.matches_estimated()} results found."
79
+ puts "Matches 1-#{matchset.size}:\n"
80
+
81
+ matchset.matches.each {|m|
82
+ puts "#{m.rank + 1}: #{m.percent}% docid=#{m.docid} [#{m.document.data}]\n"
83
+ }
84
+ end
85
+
86
+ # Put the top 5 (at most) docs into the rset if rset is empty
87
+ if relevantDocs.empty?
88
+ matchset.matches[0..4].each {|match|
89
+ relevantDocs.add_document(match.docid())
90
+ }
91
+ end
92
+
93
+ # Get the suggested expand terms
94
+ expandTerms = enquire.eset(10, relevantDocs)
95
+ puts "#{expandTerms.size()} suggested additional terms:"
96
+ expandTerms.terms.each {|term|
97
+ puts " * Term \"#{term.name}\", weight #{term.weight}"
98
+ }
@@ -0,0 +1,60 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # Index each paragraph of a text file as a Xapian document.
4
+ #
5
+ # Originally by Paul Legato (plegato@nks.net), 4/22/06
6
+ # Based on Python's simplesearch.py
7
+ # Copyright (C) 2006 Networked Knowledge Systems, Inc.
8
+ # Copyright (C) 2007 Olly Betts
9
+ #
10
+ # This program is free software; you can redistribute it and/or
11
+ # modify it under the terms of the GNU General Public License as
12
+ # published by the Free Software Foundation; either version 2 of the
13
+ # License, or (at your option) any later version.
14
+ #
15
+ # This program is distributed in the hope that it will be useful,
16
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
17
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18
+ # GNU General Public License for more details.
19
+ #
20
+ # You should have received a copy of the GNU General Public License
21
+ # along with this program; if not, write to the Free Software
22
+ # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
23
+ # USA
24
+
25
+ require 'xapian'
26
+
27
+ if ARGV.size != 1
28
+ $stderr.puts "Usage: #{$0} PATH_TO_DATABASE"
29
+ exit 99
30
+ end
31
+
32
+ # Open the database for update, creating a new database if necessary.
33
+ database = Xapian::WritableDatabase.new(ARGV[0], Xapian::DB_CREATE_OR_OPEN)
34
+
35
+ indexer = Xapian::TermGenerator.new()
36
+ stemmer = Xapian::Stem.new("english")
37
+ indexer.stemmer = stemmer
38
+
39
+ para = ''
40
+ while line = $stdin.gets()
41
+ line.strip!()
42
+ if line.empty?
43
+ if not para.empty?
44
+ # We've reached the end of a paragraph, so index it.
45
+ doc = Xapian::Document.new()
46
+ doc.data = para
47
+
48
+ indexer.document = doc
49
+ indexer.index_text(para)
50
+
51
+ # Add the document to the database
52
+ database.add_document(doc)
53
+ para = ''
54
+ end # if not para.empty?
55
+ else # line not empty
56
+ para += ' ' if para != ''
57
+ para += line
58
+ end # if line empty
59
+ end
60
+
@@ -0,0 +1,74 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # Simple command-line match decider example.
4
+ #
5
+ #
6
+ # Copyright (C) 2006 Networked Knowledge Systems, Inc.
7
+ # Copyright (C) 2006,2007 Olly Betts
8
+ #
9
+ # This program is free software; you can redistribute it and/or
10
+ # modify it under the terms of the GNU General Public License as
11
+ # published by the Free Software Foundation; either version 2 of the
12
+ # License, or (at your option) any later version.
13
+ #
14
+ # This program is distributed in the hope that it will be useful,
15
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
16
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17
+ # GNU General Public License for more details.
18
+ #
19
+ # You should have received a copy of the GNU General Public License
20
+ # along with this program; if not, write to the Free Software
21
+ # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
22
+ # USA
23
+
24
+ require 'xapian'
25
+
26
+ if ARGV.size < 3
27
+ $stderr.puts "Usage: #{$0} PATH_TO_DATABASE AVOID_VALUE QUERY"
28
+ exit 99
29
+ end
30
+
31
+ class MyMatchDecider < Xapian::MatchDecider
32
+ def initialize(avoidvalue)
33
+ @avoidvalue = avoidvalue
34
+ super()
35
+ end
36
+
37
+ def __call__(doc)
38
+ return doc.value(0) != @avoidvalue
39
+ end
40
+ end
41
+
42
+ # Open the database for searching.
43
+ database = Xapian::Database.new(ARGV[0])
44
+
45
+ # Start an enquire session.
46
+ enquire = Xapian::Enquire.new(database)
47
+
48
+ # Combine the rest of the command line arguments with spaces between
49
+ # them, so that simple queries don't have to be quoted at the shell
50
+ # level.
51
+ queryString = ARGV[2..-1].join(' ')
52
+
53
+ # Parse the query string to produce a Xapian::Query object.
54
+ qp = Xapian::QueryParser.new()
55
+ stemmer = Xapian::Stem.new("english")
56
+ qp.stemmer = stemmer
57
+ qp.database = database
58
+ qp.stemming_strategy = Xapian::QueryParser::STEM_SOME
59
+ query = qp.parse_query(queryString)
60
+
61
+ puts "Parsed query is: #{query.description()}"
62
+
63
+ # Find the top 10 results for the query.
64
+ enquire.query = query
65
+ mdecider = MyMatchDecider.new(ARGV[1])
66
+ matchset = enquire.mset(0, 10, nil, mdecider)
67
+
68
+ # Display the results.
69
+ puts "#{matchset.matches_estimated()} results found."
70
+ puts "Matches 1-#{matchset.size}:\n"
71
+
72
+ matchset.matches.each {|m|
73
+ puts "#{m.rank + 1}: #{m.percent}% docid=#{m.docid} [#{m.document.data}]\n"
74
+ }
@@ -0,0 +1,63 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # Simple command-line search script.
4
+ #
5
+ # Originally by Paul Legato (plegato@nks.net), 4/22/06.
6
+ #
7
+ # Copyright (C) 2006 Networked Knowledge Systems, Inc.
8
+ # Copyright (C) 2006,2007 Olly Betts
9
+ #
10
+ # This program is free software; you can redistribute it and/or
11
+ # modify it under the terms of the GNU General Public License as
12
+ # published by the Free Software Foundation; either version 2 of the
13
+ # License, or (at your option) any later version.
14
+ #
15
+ # This program is distributed in the hope that it will be useful,
16
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
17
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18
+ # GNU General Public License for more details.
19
+ #
20
+ # You should have received a copy of the GNU General Public License
21
+ # along with this program; if not, write to the Free Software
22
+ # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
23
+ # USA
24
+
25
+ require 'xapian'
26
+
27
+ if ARGV.size < 2
28
+ $stderr.puts "Usage: #{$0} PATH_TO_DATABASE QUERY"
29
+ exit 99
30
+ end
31
+
32
+ # Open the database for searching.
33
+ database = Xapian::Database.new(ARGV[0])
34
+
35
+ # Start an enquire session.
36
+ enquire = Xapian::Enquire.new(database)
37
+
38
+ # Combine the rest of the command line arguments with spaces between
39
+ # them, so that simple queries don't have to be quoted at the shell
40
+ # level.
41
+ queryString = ARGV[1..-1].join(' ')
42
+
43
+ # Parse the query string to produce a Xapian::Query object.
44
+ qp = Xapian::QueryParser.new()
45
+ stemmer = Xapian::Stem.new("english")
46
+ qp.stemmer = stemmer
47
+ qp.database = database
48
+ qp.stemming_strategy = Xapian::QueryParser::STEM_SOME
49
+ query = qp.parse_query(queryString)
50
+
51
+ puts "Parsed query is: #{query.description()}"
52
+
53
+ # Find the top 10 results for the query.
54
+ enquire.query = query
55
+ matchset = enquire.mset(0, 10)
56
+
57
+ # Display the results.
58
+ puts "#{matchset.matches_estimated()} results found."
59
+ puts "Matches 1-#{matchset.size}:\n"
60
+
61
+ matchset.matches.each {|m|
62
+ puts "#{m.rank + 1}: #{m.percent}% docid=#{m.docid} [#{m.document.data}]\n"
63
+ }
@@ -0,0 +1,197 @@
1
+ <html><head><title>Ruby bindings for Xapian</title></head>
2
+ <body>
3
+ <h1>Ruby bindings for Xapian</h1>
4
+
5
+ <p>
6
+ The Ruby bindings for Xapian are packaged in the <code>xapian</code> module.
7
+ Ruby strings and arrays are converted automatically in the bindings, so
8
+ generally they should just work naturally.
9
+ </p>
10
+
11
+ <p>
12
+ The <code>examples</code> subdirectory contains examples showing how to use the
13
+ Ruby bindings based on the simple examples from <code>xapian-examples</code>:
14
+ <a href="examples/simpleindex.rb">simpleindex.rb</a>,
15
+ <a href="examples/simplesearch.rb">simplesearch.rb</a>,
16
+ <a href="examples/simpleexpand.rb">simpleexpand.rb</a>.
17
+ There's also
18
+ <a href="examples/simplematchdecider.rb">simplematchdecider.rb</a>
19
+ which shows how to define a MatchDecider in Ruby.
20
+ </p>
21
+
22
+ <h2>Usage</h2>
23
+
24
+ <p>To use the bindings, you need to use <code>require 'xapian'</code>
25
+ in your ruby program.</p>
26
+
27
+ <p>
28
+ Most standard Xapian methods are available directly
29
+ to your Ruby program. Names have been altered to conform to the
30
+ standard Ruby naming conventions (i.e. get_foo() in C++ becomes foo()
31
+ in Ruby; set_foo() becomes foo=().) C++ 'operator()' methods are
32
+ renamed to 'call' methods in Ruby.
33
+ </p>
34
+
35
+ <p>
36
+ The C++ methods are not yet documented in the <a href="rdocs/">RDocs</a>.
37
+ In the meantime, refer to the
38
+ <a href="http://xapian.org/docs/apidoc/html/annotated.html">C++ API
39
+ documentation</a> for information on how to use the various methods. Most are
40
+ available directly in the Ruby version. The RDocs currently provide information
41
+ only on methods that are unique to the Ruby version.
42
+ </p>
43
+
44
+ <p>
45
+ The dangerous/non-Rubish methods from the C++ API have been renamed to
46
+ start with underscores ('_') in the Ruby bindings. You can see them in
47
+ use in xapian.rb. It is strongly recommended that you do not call any
48
+ method that starts with _ directly in your code, but instead use the
49
+ wrappers defined in xapian.rb. Improper use of an _ method can cause
50
+ the Ruby process to segfault.
51
+ </p>
52
+
53
+ <h2>Unicode Support</h2>
54
+
55
+ <p>
56
+ In Xapian 1.0.0 and later, the Xapian::Stem, Xapian::QueryParser, and
57
+ Xapian::TermGenerator classes all assume text is in UTF-8. If you want
58
+ to index strings in a different encoding, use the Ruby
59
+ <a href="http://www.ruby-doc.org/stdlib/libdoc/iconv/rdoc/index.html"
60
+ ><code>Iconv</code> class</a>
61
+ to convert them to UTF-8 before passing them to Xapian, and
62
+ when reading values back from Xapian.
63
+ </p>
64
+
65
+ <!--
66
+ <h2>Exceptions</h2>
67
+
68
+ <p>
69
+ Exceptions are thrown as SWIG exceptions instead of Xapian
70
+ exceptions. This isn't done well at the moment; in future we will
71
+ throw wrapped Xapian exceptions. For now, it's probably easier to
72
+ catch all exceptions and try to take appropriate action based on
73
+ their associated string.
74
+ </p>
75
+ -->
76
+
77
+ <h2>Iterators</h2>
78
+
79
+ <p>
80
+ One important difference from the C++ API is that *Iterator
81
+ classes should not be used from Ruby, as they fit awkwardly into
82
+ standard Ruby iteration paradigms, and as many of them cause segfaults
83
+ if used improperly. They have all been wrapped with appropriate
84
+ methods that simply return the *Iterator objects in an Array, so that
85
+ you can use 'each' to iterate through them.
86
+ </p>
87
+
88
+ <pre>
89
+ mset.matches.each {|match|
90
+ # do something
91
+ }
92
+ </pre>
93
+
94
+ <!--
95
+ <h2>Iterator dereferencing</h2>
96
+
97
+ <p>
98
+ C++ iterators are often dereferenced to get information, eg
99
+ <code>(*it)</code>. With Python these are all mapped to named methods, as
100
+ follows:
101
+ </p>
102
+
103
+ <table title="Iterator deferencing methods">
104
+ <thead><td>Iterator</td><td>Dereferencing method</td></thead>
105
+ <tr><td>PositionIterator</td> <td><code>get_termpos()</code></td></tr>
106
+ <tr><td>PostingIterator</td> <td><code>get_docid()</code></td></tr>
107
+ <tr><td>TermIterator</td> <td><code>get_term()</code></td></tr>
108
+ <tr><td>ValueIterator</td> <td><code>get_value()</code></td></tr>
109
+ <tr><td>MSetIterator</td> <td><code>get_docid()</code></td></tr>
110
+ <tr><td>ESetIterator</td> <td><code>get_term()</code></td></tr>
111
+ </table>
112
+
113
+ <p>
114
+ Other methods, such as <code>MSetIterator.get_document()</code>, are
115
+ available unchanged.
116
+ </p>
117
+
118
+ <h2>MSet</h2>
119
+
120
+ <p>
121
+ MSet objects have some additional methods to simplify access (these
122
+ work using the C++ array dereferencing):
123
+ </p>
124
+
125
+ <table title="MSet additional methods">
126
+ <thead><td>Method name</td><td>Explanation</td></thead>
127
+ <tr><td><code>get_hit(index)</code></td><td>returns MSetIterator at index</td></tr>
128
+ <tr><td><code>get_document_percentage(index)</code></td><td><code>convert_to_percent(get_hit(index))</code></td></tr>
129
+ <tr><td><code>get_document(index)</code></td><td><code>get_hit(index).get_document()</code></td></tr>
130
+ <tr><td><code>get_docid(index)</code></td><td><code>get_hit(index).get_docid()</code></td></tr>
131
+ </table>
132
+
133
+ -->
134
+
135
+ <h2>Non-Class Functions</h2>
136
+
137
+ <p>The C++ API contains a few non-class functions (the Database factory
138
+ functions, and some functions reporting version information), which are
139
+ wrapped like so for Ruby:
140
+ <ul>
141
+ <ul>
142
+ <li> <code>Xapian::version_string()</code> is wrapped as <code>Xapian::version_string()</code>
143
+ <li> <code>Xapian::major_version()</code> is wrapped as <code>Xapian::major_version()</code>
144
+ <li> <code>Xapian::minor_version()</code> is wrapped as <code>Xapian::minor_version()</code>
145
+ <li> <code>Xapian::revision()</code> is wrapped as <code>Xapian::revision()</code>
146
+ </ul>
147
+
148
+ <ul>
149
+ <li> <code>Xapian::Auto::open_stub()</code> is wrapped as <code>Xapian::open_stub()</code>
150
+ <li> <code>Xapian::Flint::open()</code> is wrapped as <code>Xapian::flint_open()</code>
151
+ <li> <code>Xapian::InMemory::open()</code> is wrapped as <code>Xapian::inmemory_open()</code>
152
+ <li> <code>Xapian::Quartz::open()</code> is wrapped as <code>Xapian::quartz_open()</code>
153
+ <li> <code>Xapian::Remote::open()</code> is wrapped as <code>Xapian::remote_open()</code> (both
154
+ the TCP and "program" versions are wrapped - the SWIG wrapper checks the parameter list to
155
+ decide which to call).
156
+ <li> <code>Xapian::Remote::open_writable()</code> is wrapped as <code>Xapian::remote_open_writable()</code> (both
157
+ the TCP and "program" versions are wrapped - the SWIG wrapper checks the parameter list to
158
+ decide which to call).
159
+ </ul>
160
+ </ul>
161
+
162
+ <h2>Query</h2>
163
+
164
+ <p>
165
+ In C++ there's a Xapian::Query constructor which takes a query operator and
166
+ start/end iterators specifying a number of terms or queries, plus an optional
167
+ parameter. In Ruby, this is wrapped to accept a Ruby array containing
168
+ terms, or queries, or even a mixture of terms and queries. For example:
169
+ </p>
170
+
171
+ <pre>
172
+ subq = Xapian::Query.new(Xapian::Query::OP_AND, "hello", "world")
173
+ q = Xapian::Query.new(Xapian::Query::OP_AND, [subq, "foo", Xapian::Query.new("bar", 2)])
174
+ </pre>
175
+
176
+ <h2>MatchDecider</h2>
177
+
178
+ <p>
179
+ Custom MatchDeciders can be created in Ruby; simply subclass
180
+ Xapian::MatchDecider, ensure you call the superclass constructor, and define a
181
+ __call__ method that will do the work. The simplest example (which does nothing
182
+ useful) would be as follows:
183
+ </p>
184
+
185
+ <pre>
186
+ class MyMatchDecider &lt; Xapian::MatchDecider
187
+ def __call__(doc):
188
+ return true
189
+ end
190
+ end
191
+ </pre>
192
+
193
+ <address>
194
+ Last updated $Date: 2006-04-01 16:35:10 +0100 (Sat, 01 Apr 2006) $
195
+ </address>
196
+ </body>
197
+ </html>