mhs-xapian 1.0.18a

Sign up to get free protection for your applications and to get access to all the features.
Files changed (328) hide show
  1. data/.gitignore +3 -0
  2. data/AUTHORS +1 -0
  3. data/COPYING +340 -0
  4. data/ChangeLog +5876 -0
  5. data/HACKING +101 -0
  6. data/INSTALL +293 -0
  7. data/Makefile +722 -0
  8. data/Makefile.am +26 -0
  9. data/Makefile.in +722 -0
  10. data/NEWS +2110 -0
  11. data/README +59 -0
  12. data/Rakefile +51 -0
  13. data/TODO +47 -0
  14. data/aclocal.m4 +7675 -0
  15. data/config.guess +1501 -0
  16. data/config.h +56 -0
  17. data/config.h.in +55 -0
  18. data/config.status +1298 -0
  19. data/config.sub +1705 -0
  20. data/configure +18536 -0
  21. data/configure.ac +944 -0
  22. data/csharp/.deps/xapian_wrap.Plo +1 -0
  23. data/csharp/AssemblyInfo.cs +40 -0
  24. data/csharp/AssemblyInfo.cs.in +40 -0
  25. data/csharp/Auto.cs +46 -0
  26. data/csharp/BM25Weight.cs +107 -0
  27. data/csharp/BoolWeight.cs +103 -0
  28. data/csharp/Database.cs +275 -0
  29. data/csharp/DateValueRangeProcessor.cs +61 -0
  30. data/csharp/Document.cs +177 -0
  31. data/csharp/ESet.cs +94 -0
  32. data/csharp/ESetIterator.cs +117 -0
  33. data/csharp/Enquire.cs +274 -0
  34. data/csharp/ExpandDecider.cs +76 -0
  35. data/csharp/Flint.cs +58 -0
  36. data/csharp/InMemory.cs +46 -0
  37. data/csharp/MSet.cs +193 -0
  38. data/csharp/MSetIterator.cs +141 -0
  39. data/csharp/Makefile +868 -0
  40. data/csharp/Makefile.am +106 -0
  41. data/csharp/Makefile.in +868 -0
  42. data/csharp/MatchDecider.cs +76 -0
  43. data/csharp/MultiValueSorter.cs +63 -0
  44. data/csharp/NumberValueRangeProcessor.cs +61 -0
  45. data/csharp/PositionIterator.cs +101 -0
  46. data/csharp/PostingIterator.cs +125 -0
  47. data/csharp/Quartz.cs +58 -0
  48. data/csharp/Query.cs +150 -0
  49. data/csharp/QueryParser.cs +174 -0
  50. data/csharp/RSet.cs +102 -0
  51. data/csharp/Remote.cs +100 -0
  52. data/csharp/SWIGTYPE_p_std__string.cs +30 -0
  53. data/csharp/SWIGTYPE_p_std__vectorTXapian__Query_t.cs +30 -0
  54. data/csharp/SWIGTYPE_p_std__vectorTstd__string_t.cs +30 -0
  55. data/csharp/SimpleStopper.cs +64 -0
  56. data/csharp/SmokeTest.cs +178 -0
  57. data/csharp/Sorter.cs +76 -0
  58. data/csharp/Stem.cs +66 -0
  59. data/csharp/Stopper.cs +91 -0
  60. data/csharp/StringValueRangeProcessor.cs +53 -0
  61. data/csharp/TermGenerator.cs +152 -0
  62. data/csharp/TermIterator.cs +125 -0
  63. data/csharp/TradWeight.cs +107 -0
  64. data/csharp/ValueIterator.cs +102 -0
  65. data/csharp/ValueRangeProcessor.cs +76 -0
  66. data/csharp/Version.cs +60 -0
  67. data/csharp/Weight.cs +93 -0
  68. data/csharp/WritableDatabase.cs +153 -0
  69. data/csharp/Xapian.cs +65 -0
  70. data/csharp/XapianPINVOKE.cs +1527 -0
  71. data/csharp/docs/Makefile +450 -0
  72. data/csharp/docs/Makefile.am +16 -0
  73. data/csharp/docs/Makefile.in +450 -0
  74. data/csharp/docs/examples/SimpleExpand.cs +109 -0
  75. data/csharp/docs/examples/SimpleIndex.cs +71 -0
  76. data/csharp/docs/examples/SimpleSearch.cs +78 -0
  77. data/csharp/docs/index.html +211 -0
  78. data/csharp/util.i +233 -0
  79. data/csharp/xapian_wrap.cc +10338 -0
  80. data/csharp/xapian_wrap.h +93 -0
  81. data/depcomp +632 -0
  82. data/extconf.rb +20 -0
  83. data/generic/except.i +80 -0
  84. data/generic/generic.mk +48 -0
  85. data/install-sh +520 -0
  86. data/java-swig/.deps/xapian_wrap.Plo +1 -0
  87. data/java-swig/Auto.java +35 -0
  88. data/java-swig/BM25Weight.java +81 -0
  89. data/java-swig/BoolWeight.java +77 -0
  90. data/java-swig/Database.java +195 -0
  91. data/java-swig/DateValueRangeProcessor.java +51 -0
  92. data/java-swig/Document.java +135 -0
  93. data/java-swig/ESet.java +71 -0
  94. data/java-swig/ESetIterator.java +71 -0
  95. data/java-swig/Enquire.java +246 -0
  96. data/java-swig/ExpandDecider.java +59 -0
  97. data/java-swig/Flint.java +43 -0
  98. data/java-swig/InMemory.java +35 -0
  99. data/java-swig/MSet.java +143 -0
  100. data/java-swig/MSetIterator.java +87 -0
  101. data/java-swig/Makefile +781 -0
  102. data/java-swig/Makefile.am +132 -0
  103. data/java-swig/Makefile.in +781 -0
  104. data/java-swig/MatchDecider.java +59 -0
  105. data/java-swig/MultiValueSorter.java +51 -0
  106. data/java-swig/NumberValueRangeProcessor.java +51 -0
  107. data/java-swig/PositionIterator.java +63 -0
  108. data/java-swig/PostingIterator.java +83 -0
  109. data/java-swig/Quartz.java +43 -0
  110. data/java-swig/Query.java +189 -0
  111. data/java-swig/QueryParser.java +214 -0
  112. data/java-swig/RSet.java +79 -0
  113. data/java-swig/Remote.java +71 -0
  114. data/java-swig/SWIGTYPE_p_std__string.java +25 -0
  115. data/java-swig/SimpleStopper.java +51 -0
  116. data/java-swig/SmokeTest.java +161 -0
  117. data/java-swig/Sorter.java +59 -0
  118. data/java-swig/Stem.java +51 -0
  119. data/java-swig/Stopper.java +63 -0
  120. data/java-swig/StringValueRangeProcessor.java +43 -0
  121. data/java-swig/TermGenerator.java +158 -0
  122. data/java-swig/TermIterator.java +83 -0
  123. data/java-swig/TradWeight.java +81 -0
  124. data/java-swig/ValueIterator.java +67 -0
  125. data/java-swig/ValueRangeProcessor.java +59 -0
  126. data/java-swig/Version.java +47 -0
  127. data/java-swig/Weight.java +68 -0
  128. data/java-swig/WritableDatabase.java +123 -0
  129. data/java-swig/Xapian.java +39 -0
  130. data/java-swig/XapianConstants.java +15 -0
  131. data/java-swig/XapianJNI.java +508 -0
  132. data/java-swig/run-java-test +6 -0
  133. data/java-swig/xapian_wrap.cc +12594 -0
  134. data/java-swig/xapian_wrap.h +91 -0
  135. data/java/Makefile +660 -0
  136. data/java/Makefile.am +35 -0
  137. data/java/Makefile.in +660 -0
  138. data/java/README +76 -0
  139. data/java/SmokeTest.java +148 -0
  140. data/java/native/.deps/Database.Plo +1 -0
  141. data/java/native/.deps/Document.Plo +1 -0
  142. data/java/native/.deps/ESet.Plo +1 -0
  143. data/java/native/.deps/ESetIterator.Plo +1 -0
  144. data/java/native/.deps/Enquire.Plo +1 -0
  145. data/java/native/.deps/MSet.Plo +1 -0
  146. data/java/native/.deps/MSetIterator.Plo +1 -0
  147. data/java/native/.deps/PositionIterator.Plo +1 -0
  148. data/java/native/.deps/Query.Plo +1 -0
  149. data/java/native/.deps/RSet.Plo +1 -0
  150. data/java/native/.deps/Stem.Plo +1 -0
  151. data/java/native/.deps/TermIterator.Plo +1 -0
  152. data/java/native/.deps/WritableDatabase.Plo +1 -0
  153. data/java/native/.deps/org_xapian_XapianJNI.Plo +1 -0
  154. data/java/native/.deps/utils.Plo +1 -0
  155. data/java/native/Database.cc +222 -0
  156. data/java/native/Document.cc +173 -0
  157. data/java/native/ESet.cc +79 -0
  158. data/java/native/ESetIterator.cc +82 -0
  159. data/java/native/Enquire.cc +271 -0
  160. data/java/native/MSet.cc +169 -0
  161. data/java/native/MSetIterator.cc +107 -0
  162. data/java/native/Makefile +594 -0
  163. data/java/native/Makefile.am +51 -0
  164. data/java/native/Makefile.in +594 -0
  165. data/java/native/PositionIterator.cc +64 -0
  166. data/java/native/Query.cc +180 -0
  167. data/java/native/RSet.cc +98 -0
  168. data/java/native/Stem.cc +75 -0
  169. data/java/native/TermIterator.cc +107 -0
  170. data/java/native/WritableDatabase.cc +118 -0
  171. data/java/native/XapianObjectHolder.h +115 -0
  172. data/java/native/org_xapian_XapianJNI.cc +78 -0
  173. data/java/native/org_xapian_XapianJNI.h +1369 -0
  174. data/java/native/utils.cc +51 -0
  175. data/java/native/xapian_jni.h +116 -0
  176. data/java/org/xapian/Database.java +148 -0
  177. data/java/org/xapian/Document.java +135 -0
  178. data/java/org/xapian/ESet.java +66 -0
  179. data/java/org/xapian/ESetIterator.java +97 -0
  180. data/java/org/xapian/Enquire.java +136 -0
  181. data/java/org/xapian/ExpandDecider.java +30 -0
  182. data/java/org/xapian/MSet.java +104 -0
  183. data/java/org/xapian/MSetIterator.java +132 -0
  184. data/java/org/xapian/Makefile +580 -0
  185. data/java/org/xapian/Makefile.am +38 -0
  186. data/java/org/xapian/Makefile.in +580 -0
  187. data/java/org/xapian/MatchDecider.java +30 -0
  188. data/java/org/xapian/PositionIterator.java +89 -0
  189. data/java/org/xapian/Query.java +190 -0
  190. data/java/org/xapian/RSet.java +89 -0
  191. data/java/org/xapian/Stem.java +80 -0
  192. data/java/org/xapian/TermIterator.java +142 -0
  193. data/java/org/xapian/WritableDatabase.java +92 -0
  194. data/java/org/xapian/Xapian.java +114 -0
  195. data/java/org/xapian/XapianJNI.java +444 -0
  196. data/java/org/xapian/errors/AssertionError.java +40 -0
  197. data/java/org/xapian/errors/DatabaseCorruptError.java +40 -0
  198. data/java/org/xapian/errors/DatabaseError.java +40 -0
  199. data/java/org/xapian/errors/DatabaseLockError.java +40 -0
  200. data/java/org/xapian/errors/DatabaseModifiedError.java +40 -0
  201. data/java/org/xapian/errors/DatabaseOpeningError.java +40 -0
  202. data/java/org/xapian/errors/DocNotFoundError.java +40 -0
  203. data/java/org/xapian/errors/FeatureUnavailableError.java +40 -0
  204. data/java/org/xapian/errors/InternalError.java +40 -0
  205. data/java/org/xapian/errors/InvalidArgumentError.java +40 -0
  206. data/java/org/xapian/errors/InvalidOperationError.java +40 -0
  207. data/java/org/xapian/errors/LogicError.java +40 -0
  208. data/java/org/xapian/errors/Makefile +416 -0
  209. data/java/org/xapian/errors/Makefile.am +32 -0
  210. data/java/org/xapian/errors/Makefile.in +416 -0
  211. data/java/org/xapian/errors/NetworkError.java +40 -0
  212. data/java/org/xapian/errors/NetworkTimeoutError.java +40 -0
  213. data/java/org/xapian/errors/RangeError.java +40 -0
  214. data/java/org/xapian/errors/RuntimeError.java +40 -0
  215. data/java/org/xapian/errors/UnimplementedError.java +40 -0
  216. data/java/org/xapian/errors/XapianError.java +40 -0
  217. data/java/org/xapian/errors/XapianRuntimeError.java +49 -0
  218. data/java/org/xapian/examples/Makefile +391 -0
  219. data/java/org/xapian/examples/Makefile.am +8 -0
  220. data/java/org/xapian/examples/Makefile.in +391 -0
  221. data/java/org/xapian/examples/SimpleIndex.java +68 -0
  222. data/java/org/xapian/examples/SimpleSearch.java +71 -0
  223. data/java/run-java-test +6 -0
  224. data/libtool +7618 -0
  225. data/ltmain.sh +6956 -0
  226. data/mhs-xapian.gemspec +368 -0
  227. data/missing +378 -0
  228. data/php/.deps/xapian_wrap.Plo +1 -0
  229. data/php/Makefile +871 -0
  230. data/php/Makefile.am +82 -0
  231. data/php/Makefile.in +871 -0
  232. data/php/docs/Makefile +453 -0
  233. data/php/docs/Makefile.am +19 -0
  234. data/php/docs/Makefile.in +453 -0
  235. data/php/docs/examples/simpleexpand.php4 +108 -0
  236. data/php/docs/examples/simpleexpand.php5 +104 -0
  237. data/php/docs/examples/simpleindex.php4 +76 -0
  238. data/php/docs/examples/simpleindex.php5 +73 -0
  239. data/php/docs/examples/simplesearch.php4 +75 -0
  240. data/php/docs/examples/simplesearch.php5 +72 -0
  241. data/php/docs/index.html +313 -0
  242. data/php/except.i +98 -0
  243. data/php/php4/php_xapian.h +323 -0
  244. data/php/php4/xapian.php +32 -0
  245. data/php/php4/xapian_wrap.cc +27656 -0
  246. data/php/php5/php_xapian.h +319 -0
  247. data/php/php5/xapian.php +1566 -0
  248. data/php/php5/xapian_wrap.cc +24330 -0
  249. data/php/smoketest.php +246 -0
  250. data/php/smoketest4.php +84 -0
  251. data/php/smoketest5.php +79 -0
  252. data/php/util.i +187 -0
  253. data/python/.deps/xapian_wrap.Plo +1 -0
  254. data/python/Makefile +891 -0
  255. data/python/Makefile.am +105 -0
  256. data/python/Makefile.in +891 -0
  257. data/python/doccomments.i +5134 -0
  258. data/python/docs/Makefile +448 -0
  259. data/python/docs/Makefile.am +14 -0
  260. data/python/docs/Makefile.in +448 -0
  261. data/python/docs/examples/simpleexpand.py +98 -0
  262. data/python/docs/examples/simpleindex.py +65 -0
  263. data/python/docs/examples/simplematchdecider.py +78 -0
  264. data/python/docs/examples/simplesearch.py +65 -0
  265. data/python/docs/index.html +420 -0
  266. data/python/except.i +290 -0
  267. data/python/extra.i +1048 -0
  268. data/python/extracomments.i +28 -0
  269. data/python/generate-python-exceptions +189 -0
  270. data/python/generate-python-exceptions.in +189 -0
  271. data/python/modern/xapian.py +5662 -0
  272. data/python/modern/xapian_wrap.cc +35170 -0
  273. data/python/modern/xapian_wrap.h +244 -0
  274. data/python/pythontest.py +1110 -0
  275. data/python/smoketest.py +328 -0
  276. data/python/testsuite.py +382 -0
  277. data/python/util.i +517 -0
  278. data/ruby/.deps/xapian_wrap.Plo +494 -0
  279. data/ruby/.libs/_xapian.bundle +0 -0
  280. data/ruby/.libs/_xapian.bundle.dSYM/Contents/Info.plist +25 -0
  281. data/ruby/.libs/_xapian.bundle.dSYM/Contents/Resources/DWARF/_xapian.bundle +0 -0
  282. data/ruby/.libs/_xapian.la +35 -0
  283. data/ruby/.libs/_xapian.lai +35 -0
  284. data/ruby/Makefile +854 -0
  285. data/ruby/Makefile.am +62 -0
  286. data/ruby/Makefile.in +854 -0
  287. data/ruby/_xapian.la +35 -0
  288. data/ruby/docs/Makefile +487 -0
  289. data/ruby/docs/Makefile.am +50 -0
  290. data/ruby/docs/Makefile.in +487 -0
  291. data/ruby/docs/examples/simpleexpand.rb +98 -0
  292. data/ruby/docs/examples/simpleindex.rb +60 -0
  293. data/ruby/docs/examples/simplematchdecider.rb +74 -0
  294. data/ruby/docs/examples/simplesearch.rb +63 -0
  295. data/ruby/docs/index.html +197 -0
  296. data/ruby/smoketest.rb +211 -0
  297. data/ruby/util.i +232 -0
  298. data/ruby/xapian.rb +280 -0
  299. data/ruby/xapian_wrap.cc +25837 -0
  300. data/ruby/xapian_wrap.h +65 -0
  301. data/ruby/xapian_wrap.lo +12 -0
  302. data/skiptest +2 -0
  303. data/stamp-h1 +1 -0
  304. data/tcl8/.deps/xapian_wrap.Plo +1 -0
  305. data/tcl8/Makefile +835 -0
  306. data/tcl8/Makefile.am +49 -0
  307. data/tcl8/Makefile.in +835 -0
  308. data/tcl8/docs/Makefile +448 -0
  309. data/tcl8/docs/Makefile.am +14 -0
  310. data/tcl8/docs/Makefile.in +448 -0
  311. data/tcl8/docs/examples/simpleexpand.tcl +104 -0
  312. data/tcl8/docs/examples/simpleindex.tcl +68 -0
  313. data/tcl8/docs/examples/simplesearch.tcl +66 -0
  314. data/tcl8/docs/index.html +208 -0
  315. data/tcl8/except.i +48 -0
  316. data/tcl8/pkgIndex.tcl +1 -0
  317. data/tcl8/pkgIndex.tcl.in +1 -0
  318. data/tcl8/run-tcl-test +15 -0
  319. data/tcl8/runtest.tcl +29 -0
  320. data/tcl8/smoketest.tcl +155 -0
  321. data/tcl8/util.i +76 -0
  322. data/tcl8/xapian_wrap.cc +20900 -0
  323. data/xapian-bindings.spec +206 -0
  324. data/xapian-bindings.spec.in +206 -0
  325. data/xapian-version.h +1 -0
  326. data/xapian-version.h.in +1 -0
  327. data/xapian.i +939 -0
  328. metadata +395 -0
@@ -0,0 +1,98 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # Simple example script demonstrating query expansion.
4
+ #
5
+ # Originally by Paul Legato (plegato@nks.net), 4/22/06.
6
+ #
7
+ # Copyright (C) 2006 Networked Knowledge Systems, Inc.
8
+ # Copyright (C) 2006,2007 Olly Betts
9
+ #
10
+ # This program is free software; you can redistribute it and/or
11
+ # modify it under the terms of the GNU General Public License as
12
+ # published by the Free Software Foundation; either version 2 of the
13
+ # License, or (at your option) any later version.
14
+ #
15
+ # This program is distributed in the hope that it will be useful,
16
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
17
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18
+ # GNU General Public License for more details.
19
+ #
20
+ # You should have received a copy of the GNU General Public License
21
+ # along with this program; if not, write to the Free Software
22
+ # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
23
+ # USA
24
+
25
+ require 'xapian'
26
+
27
+ if ARGV.size < 2
28
+ $stderr.puts "Usage: #{$0} PATH_TO_DATABASE QUERY [-- [DOCID...]]"
29
+ exit 99
30
+ end
31
+
32
+ # Open the database for searching.
33
+ database = Xapian::Database.new(ARGV[0])
34
+
35
+ # Start an enquire session.
36
+ enquire = Xapian::Enquire.new(database)
37
+
38
+ queryString = ''
39
+ relevantDocs = Xapian::RSet.new()
40
+ onDocIdsYet = false
41
+
42
+ # Combine the rest of the command line arguments with spaces between
43
+ # them, so that simple queries don't have to be quoted at the shell
44
+ # level.
45
+ ARGV.each_with_index { |arg,index|
46
+ next if index == 0 # skip path to db
47
+
48
+ if arg == '--'
49
+ onDocIdsYet = true
50
+ next
51
+ end
52
+
53
+ if onDocIdsYet
54
+ relevantDocs.add_document(arg.to_i)
55
+ else
56
+ queryString += ' ' unless queryString.empty?
57
+ queryString += arg
58
+ end
59
+ }
60
+
61
+
62
+ # Parse the query string to produce a Xapian::Query object.
63
+ qp = Xapian::QueryParser.new()
64
+ stemmer = Xapian::Stem.new("english")
65
+ qp.stemmer = stemmer
66
+ qp.database = database
67
+ qp.stemming_strategy = Xapian::QueryParser::STEM_SOME
68
+ query = qp.parse_query(queryString)
69
+
70
+ unless query.empty?
71
+ puts "Parsed query is: #{query.description()}"
72
+
73
+ # Find the top 10 results for the query.
74
+ enquire.query = query
75
+ matchset = enquire.mset(0, 10, relevantDocs)
76
+
77
+ # Display the results.
78
+ puts "#{matchset.matches_estimated()} results found."
79
+ puts "Matches 1-#{matchset.size}:\n"
80
+
81
+ matchset.matches.each {|m|
82
+ puts "#{m.rank + 1}: #{m.percent}% docid=#{m.docid} [#{m.document.data}]\n"
83
+ }
84
+ end
85
+
86
+ # Put the top 5 (at most) docs into the rset if rset is empty
87
+ if relevantDocs.empty?
88
+ matchset.matches[0..4].each {|match|
89
+ relevantDocs.add_document(match.docid())
90
+ }
91
+ end
92
+
93
+ # Get the suggested expand terms
94
+ expandTerms = enquire.eset(10, relevantDocs)
95
+ puts "#{expandTerms.size()} suggested additional terms:"
96
+ expandTerms.terms.each {|term|
97
+ puts " * Term \"#{term.name}\", weight #{term.weight}"
98
+ }
@@ -0,0 +1,60 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # Index each paragraph of a text file as a Xapian document.
4
+ #
5
+ # Originally by Paul Legato (plegato@nks.net), 4/22/06
6
+ # Based on Python's simplesearch.py
7
+ # Copyright (C) 2006 Networked Knowledge Systems, Inc.
8
+ # Copyright (C) 2007 Olly Betts
9
+ #
10
+ # This program is free software; you can redistribute it and/or
11
+ # modify it under the terms of the GNU General Public License as
12
+ # published by the Free Software Foundation; either version 2 of the
13
+ # License, or (at your option) any later version.
14
+ #
15
+ # This program is distributed in the hope that it will be useful,
16
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
17
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18
+ # GNU General Public License for more details.
19
+ #
20
+ # You should have received a copy of the GNU General Public License
21
+ # along with this program; if not, write to the Free Software
22
+ # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
23
+ # USA
24
+
25
+ require 'xapian'
26
+
27
+ if ARGV.size != 1
28
+ $stderr.puts "Usage: #{$0} PATH_TO_DATABASE"
29
+ exit 99
30
+ end
31
+
32
+ # Open the database for update, creating a new database if necessary.
33
+ database = Xapian::WritableDatabase.new(ARGV[0], Xapian::DB_CREATE_OR_OPEN)
34
+
35
+ indexer = Xapian::TermGenerator.new()
36
+ stemmer = Xapian::Stem.new("english")
37
+ indexer.stemmer = stemmer
38
+
39
+ para = ''
40
+ while line = $stdin.gets()
41
+ line.strip!()
42
+ if line.empty?
43
+ if not para.empty?
44
+ # We've reached the end of a paragraph, so index it.
45
+ doc = Xapian::Document.new()
46
+ doc.data = para
47
+
48
+ indexer.document = doc
49
+ indexer.index_text(para)
50
+
51
+ # Add the document to the database
52
+ database.add_document(doc)
53
+ para = ''
54
+ end # if not para.empty?
55
+ else # line not empty
56
+ para += ' ' if para != ''
57
+ para += line
58
+ end # if line empty
59
+ end
60
+
@@ -0,0 +1,74 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # Simple command-line match decider example.
4
+ #
5
+ #
6
+ # Copyright (C) 2006 Networked Knowledge Systems, Inc.
7
+ # Copyright (C) 2006,2007 Olly Betts
8
+ #
9
+ # This program is free software; you can redistribute it and/or
10
+ # modify it under the terms of the GNU General Public License as
11
+ # published by the Free Software Foundation; either version 2 of the
12
+ # License, or (at your option) any later version.
13
+ #
14
+ # This program is distributed in the hope that it will be useful,
15
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
16
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17
+ # GNU General Public License for more details.
18
+ #
19
+ # You should have received a copy of the GNU General Public License
20
+ # along with this program; if not, write to the Free Software
21
+ # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
22
+ # USA
23
+
24
+ require 'xapian'
25
+
26
+ if ARGV.size < 3
27
+ $stderr.puts "Usage: #{$0} PATH_TO_DATABASE AVOID_VALUE QUERY"
28
+ exit 99
29
+ end
30
+
31
+ class MyMatchDecider < Xapian::MatchDecider
32
+ def initialize(avoidvalue)
33
+ @avoidvalue = avoidvalue
34
+ super()
35
+ end
36
+
37
+ def __call__(doc)
38
+ return doc.value(0) != @avoidvalue
39
+ end
40
+ end
41
+
42
+ # Open the database for searching.
43
+ database = Xapian::Database.new(ARGV[0])
44
+
45
+ # Start an enquire session.
46
+ enquire = Xapian::Enquire.new(database)
47
+
48
+ # Combine the rest of the command line arguments with spaces between
49
+ # them, so that simple queries don't have to be quoted at the shell
50
+ # level.
51
+ queryString = ARGV[2..-1].join(' ')
52
+
53
+ # Parse the query string to produce a Xapian::Query object.
54
+ qp = Xapian::QueryParser.new()
55
+ stemmer = Xapian::Stem.new("english")
56
+ qp.stemmer = stemmer
57
+ qp.database = database
58
+ qp.stemming_strategy = Xapian::QueryParser::STEM_SOME
59
+ query = qp.parse_query(queryString)
60
+
61
+ puts "Parsed query is: #{query.description()}"
62
+
63
+ # Find the top 10 results for the query.
64
+ enquire.query = query
65
+ mdecider = MyMatchDecider.new(ARGV[1])
66
+ matchset = enquire.mset(0, 10, nil, mdecider)
67
+
68
+ # Display the results.
69
+ puts "#{matchset.matches_estimated()} results found."
70
+ puts "Matches 1-#{matchset.size}:\n"
71
+
72
+ matchset.matches.each {|m|
73
+ puts "#{m.rank + 1}: #{m.percent}% docid=#{m.docid} [#{m.document.data}]\n"
74
+ }
@@ -0,0 +1,63 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # Simple command-line search script.
4
+ #
5
+ # Originally by Paul Legato (plegato@nks.net), 4/22/06.
6
+ #
7
+ # Copyright (C) 2006 Networked Knowledge Systems, Inc.
8
+ # Copyright (C) 2006,2007 Olly Betts
9
+ #
10
+ # This program is free software; you can redistribute it and/or
11
+ # modify it under the terms of the GNU General Public License as
12
+ # published by the Free Software Foundation; either version 2 of the
13
+ # License, or (at your option) any later version.
14
+ #
15
+ # This program is distributed in the hope that it will be useful,
16
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
17
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18
+ # GNU General Public License for more details.
19
+ #
20
+ # You should have received a copy of the GNU General Public License
21
+ # along with this program; if not, write to the Free Software
22
+ # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
23
+ # USA
24
+
25
+ require 'xapian'
26
+
27
+ if ARGV.size < 2
28
+ $stderr.puts "Usage: #{$0} PATH_TO_DATABASE QUERY"
29
+ exit 99
30
+ end
31
+
32
+ # Open the database for searching.
33
+ database = Xapian::Database.new(ARGV[0])
34
+
35
+ # Start an enquire session.
36
+ enquire = Xapian::Enquire.new(database)
37
+
38
+ # Combine the rest of the command line arguments with spaces between
39
+ # them, so that simple queries don't have to be quoted at the shell
40
+ # level.
41
+ queryString = ARGV[1..-1].join(' ')
42
+
43
+ # Parse the query string to produce a Xapian::Query object.
44
+ qp = Xapian::QueryParser.new()
45
+ stemmer = Xapian::Stem.new("english")
46
+ qp.stemmer = stemmer
47
+ qp.database = database
48
+ qp.stemming_strategy = Xapian::QueryParser::STEM_SOME
49
+ query = qp.parse_query(queryString)
50
+
51
+ puts "Parsed query is: #{query.description()}"
52
+
53
+ # Find the top 10 results for the query.
54
+ enquire.query = query
55
+ matchset = enquire.mset(0, 10)
56
+
57
+ # Display the results.
58
+ puts "#{matchset.matches_estimated()} results found."
59
+ puts "Matches 1-#{matchset.size}:\n"
60
+
61
+ matchset.matches.each {|m|
62
+ puts "#{m.rank + 1}: #{m.percent}% docid=#{m.docid} [#{m.document.data}]\n"
63
+ }
@@ -0,0 +1,197 @@
1
+ <html><head><title>Ruby bindings for Xapian</title></head>
2
+ <body>
3
+ <h1>Ruby bindings for Xapian</h1>
4
+
5
+ <p>
6
+ The Ruby bindings for Xapian are packaged in the <code>xapian</code> module.
7
+ Ruby strings and arrays are converted automatically in the bindings, so
8
+ generally they should just work naturally.
9
+ </p>
10
+
11
+ <p>
12
+ The <code>examples</code> subdirectory contains examples showing how to use the
13
+ Ruby bindings based on the simple examples from <code>xapian-examples</code>:
14
+ <a href="examples/simpleindex.rb">simpleindex.rb</a>,
15
+ <a href="examples/simplesearch.rb">simplesearch.rb</a>,
16
+ <a href="examples/simpleexpand.rb">simpleexpand.rb</a>.
17
+ There's also
18
+ <a href="examples/simplematchdecider.rb">simplematchdecider.rb</a>
19
+ which shows how to define a MatchDecider in Ruby.
20
+ </p>
21
+
22
+ <h2>Usage</h2>
23
+
24
+ <p>To use the bindings, you need to use <code>require 'xapian'</code>
25
+ in your ruby program.</p>
26
+
27
+ <p>
28
+ Most standard Xapian methods are available directly
29
+ to your Ruby program. Names have been altered to conform to the
30
+ standard Ruby naming conventions (i.e. get_foo() in C++ becomes foo()
31
+ in Ruby; set_foo() becomes foo=().) C++ 'operator()' methods are
32
+ renamed to 'call' methods in Ruby.
33
+ </p>
34
+
35
+ <p>
36
+ The C++ methods are not yet documented in the <a href="rdocs/">RDocs</a>.
37
+ In the meantime, refer to the
38
+ <a href="http://xapian.org/docs/apidoc/html/annotated.html">C++ API
39
+ documentation</a> for information on how to use the various methods. Most are
40
+ available directly in the Ruby version. The RDocs currently provide information
41
+ only on methods that are unique to the Ruby version.
42
+ </p>
43
+
44
+ <p>
45
+ The dangerous/non-Rubish methods from the C++ API have been renamed to
46
+ start with underscores ('_') in the Ruby bindings. You can see them in
47
+ use in xapian.rb. It is strongly recommended that you do not call any
48
+ method that starts with _ directly in your code, but instead use the
49
+ wrappers defined in xapian.rb. Improper use of an _ method can cause
50
+ the Ruby process to segfault.
51
+ </p>
52
+
53
+ <h2>Unicode Support</h2>
54
+
55
+ <p>
56
+ In Xapian 1.0.0 and later, the Xapian::Stem, Xapian::QueryParser, and
57
+ Xapian::TermGenerator classes all assume text is in UTF-8. If you want
58
+ to index strings in a different encoding, use the Ruby
59
+ <a href="http://www.ruby-doc.org/stdlib/libdoc/iconv/rdoc/index.html"
60
+ ><code>Iconv</code> class</a>
61
+ to convert them to UTF-8 before passing them to Xapian, and
62
+ when reading values back from Xapian.
63
+ </p>
64
+
65
+ <!--
66
+ <h2>Exceptions</h2>
67
+
68
+ <p>
69
+ Exceptions are thrown as SWIG exceptions instead of Xapian
70
+ exceptions. This isn't done well at the moment; in future we will
71
+ throw wrapped Xapian exceptions. For now, it's probably easier to
72
+ catch all exceptions and try to take appropriate action based on
73
+ their associated string.
74
+ </p>
75
+ -->
76
+
77
+ <h2>Iterators</h2>
78
+
79
+ <p>
80
+ One important difference from the C++ API is that *Iterator
81
+ classes should not be used from Ruby, as they fit awkwardly into
82
+ standard Ruby iteration paradigms, and as many of them cause segfaults
83
+ if used improperly. They have all been wrapped with appropriate
84
+ methods that simply return the *Iterator objects in an Array, so that
85
+ you can use 'each' to iterate through them.
86
+ </p>
87
+
88
+ <pre>
89
+ mset.matches.each {|match|
90
+ # do something
91
+ }
92
+ </pre>
93
+
94
+ <!--
95
+ <h2>Iterator dereferencing</h2>
96
+
97
+ <p>
98
+ C++ iterators are often dereferenced to get information, eg
99
+ <code>(*it)</code>. With Python these are all mapped to named methods, as
100
+ follows:
101
+ </p>
102
+
103
+ <table title="Iterator deferencing methods">
104
+ <thead><td>Iterator</td><td>Dereferencing method</td></thead>
105
+ <tr><td>PositionIterator</td> <td><code>get_termpos()</code></td></tr>
106
+ <tr><td>PostingIterator</td> <td><code>get_docid()</code></td></tr>
107
+ <tr><td>TermIterator</td> <td><code>get_term()</code></td></tr>
108
+ <tr><td>ValueIterator</td> <td><code>get_value()</code></td></tr>
109
+ <tr><td>MSetIterator</td> <td><code>get_docid()</code></td></tr>
110
+ <tr><td>ESetIterator</td> <td><code>get_term()</code></td></tr>
111
+ </table>
112
+
113
+ <p>
114
+ Other methods, such as <code>MSetIterator.get_document()</code>, are
115
+ available unchanged.
116
+ </p>
117
+
118
+ <h2>MSet</h2>
119
+
120
+ <p>
121
+ MSet objects have some additional methods to simplify access (these
122
+ work using the C++ array dereferencing):
123
+ </p>
124
+
125
+ <table title="MSet additional methods">
126
+ <thead><td>Method name</td><td>Explanation</td></thead>
127
+ <tr><td><code>get_hit(index)</code></td><td>returns MSetIterator at index</td></tr>
128
+ <tr><td><code>get_document_percentage(index)</code></td><td><code>convert_to_percent(get_hit(index))</code></td></tr>
129
+ <tr><td><code>get_document(index)</code></td><td><code>get_hit(index).get_document()</code></td></tr>
130
+ <tr><td><code>get_docid(index)</code></td><td><code>get_hit(index).get_docid()</code></td></tr>
131
+ </table>
132
+
133
+ -->
134
+
135
+ <h2>Non-Class Functions</h2>
136
+
137
+ <p>The C++ API contains a few non-class functions (the Database factory
138
+ functions, and some functions reporting version information), which are
139
+ wrapped like so for Ruby:
140
+ <ul>
141
+ <ul>
142
+ <li> <code>Xapian::version_string()</code> is wrapped as <code>Xapian::version_string()</code>
143
+ <li> <code>Xapian::major_version()</code> is wrapped as <code>Xapian::major_version()</code>
144
+ <li> <code>Xapian::minor_version()</code> is wrapped as <code>Xapian::minor_version()</code>
145
+ <li> <code>Xapian::revision()</code> is wrapped as <code>Xapian::revision()</code>
146
+ </ul>
147
+
148
+ <ul>
149
+ <li> <code>Xapian::Auto::open_stub()</code> is wrapped as <code>Xapian::open_stub()</code>
150
+ <li> <code>Xapian::Flint::open()</code> is wrapped as <code>Xapian::flint_open()</code>
151
+ <li> <code>Xapian::InMemory::open()</code> is wrapped as <code>Xapian::inmemory_open()</code>
152
+ <li> <code>Xapian::Quartz::open()</code> is wrapped as <code>Xapian::quartz_open()</code>
153
+ <li> <code>Xapian::Remote::open()</code> is wrapped as <code>Xapian::remote_open()</code> (both
154
+ the TCP and "program" versions are wrapped - the SWIG wrapper checks the parameter list to
155
+ decide which to call).
156
+ <li> <code>Xapian::Remote::open_writable()</code> is wrapped as <code>Xapian::remote_open_writable()</code> (both
157
+ the TCP and "program" versions are wrapped - the SWIG wrapper checks the parameter list to
158
+ decide which to call).
159
+ </ul>
160
+ </ul>
161
+
162
+ <h2>Query</h2>
163
+
164
+ <p>
165
+ In C++ there's a Xapian::Query constructor which takes a query operator and
166
+ start/end iterators specifying a number of terms or queries, plus an optional
167
+ parameter. In Ruby, this is wrapped to accept a Ruby array containing
168
+ terms, or queries, or even a mixture of terms and queries. For example:
169
+ </p>
170
+
171
+ <pre>
172
+ subq = Xapian::Query.new(Xapian::Query::OP_AND, "hello", "world")
173
+ q = Xapian::Query.new(Xapian::Query::OP_AND, [subq, "foo", Xapian::Query.new("bar", 2)])
174
+ </pre>
175
+
176
+ <h2>MatchDecider</h2>
177
+
178
+ <p>
179
+ Custom MatchDeciders can be created in Ruby; simply subclass
180
+ Xapian::MatchDecider, ensure you call the superclass constructor, and define a
181
+ __call__ method that will do the work. The simplest example (which does nothing
182
+ useful) would be as follows:
183
+ </p>
184
+
185
+ <pre>
186
+ class MyMatchDecider &lt; Xapian::MatchDecider
187
+ def __call__(doc):
188
+ return true
189
+ end
190
+ end
191
+ </pre>
192
+
193
+ <address>
194
+ Last updated $Date: 2006-04-01 16:35:10 +0100 (Sat, 01 Apr 2006) $
195
+ </address>
196
+ </body>
197
+ </html>