xapian_db 0.5.15 → 1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG.md +7 -1
- data/README.rdoc +13 -1
- data/lib/xapian_db/adapters/base_adapter.rb +4 -0
- data/lib/xapian_db/database.rb +31 -0
- metadata +13 -15
- data/xapian_source/xapian-bindings-1.2.6.tar.gz +0 -0
- data/xapian_source/xapian-core-1.2.6.tar.gz +0 -0
data/CHANGELOG.md
CHANGED
@@ -1,8 +1,14 @@
|
|
1
|
+
##1.0 (August 17th, 2011)
|
2
|
+
|
3
|
+
Features:
|
4
|
+
|
5
|
+
- find similar documents based on one or more reference documents
|
6
|
+
|
1
7
|
##0.5.15 (July 8th, 2011)
|
2
8
|
|
3
9
|
Features:
|
4
10
|
|
5
|
-
- faster
|
11
|
+
- faster install if the new, dependent gem containig xapian (xapian-ruby) is already installed
|
6
12
|
|
7
13
|
##0.5.14 (July 7th, 2011)
|
8
14
|
|
data/README.rdoc
CHANGED
@@ -232,7 +232,19 @@ A global facet search always groups the results by the class of the indexed obje
|
|
232
232
|
puts "#{name}: #{count} hits"
|
233
233
|
end
|
234
234
|
|
235
|
-
At the class level, any attribute can be used for a facet query.
|
235
|
+
At the class level, any attribute can be used for a facet query. Use facet queries on attributes that store atomic values like strings, numbers or dates.
|
236
|
+
If you use it on attributes that contain collections (like an array of strings), you might get unexpected results.
|
237
|
+
|
238
|
+
=== Find similar documents
|
239
|
+
|
240
|
+
If you have a rearch result, you can search for similar documents by selecting one or more documents from your result and passing them to the find_similar_to method:
|
241
|
+
|
242
|
+
results = XapianDb.search("moose")
|
243
|
+
similar = XapianDb.find_similar_to results.first
|
244
|
+
|
245
|
+
It works like this: The xapian engine extracts the most selective terms from the passed documents. Then, a
|
246
|
+
new query is executed with the retrieved terms combined with OR operators.
|
247
|
+
This method works best if your models contain large amounts of text.
|
236
248
|
|
237
249
|
=== Transactions
|
238
250
|
|
@@ -47,6 +47,10 @@ module XapianDb
|
|
47
47
|
result
|
48
48
|
end
|
49
49
|
|
50
|
+
define_singleton_method(:find_similar_to) do |reference|
|
51
|
+
return XapianDb.database.find_similar_to reference, :class => klass
|
52
|
+
end
|
53
|
+
|
50
54
|
# Add a method to search atribute facets of this class
|
51
55
|
define_singleton_method(:facets) do |attr_name, expression|
|
52
56
|
|
data/lib/xapian_db/database.rb
CHANGED
@@ -92,6 +92,37 @@ module XapianDb
|
|
92
92
|
result
|
93
93
|
end
|
94
94
|
|
95
|
+
# Find documents that are similar to one or more reference documents. It is basically
|
96
|
+
# the implementation of this suggestion: http://trac.xapian.org/wiki/FAQ/FindSimilar
|
97
|
+
# @param [Array<Xapian::Document> or Xapian::Document] docs One or more reference docs
|
98
|
+
# @param [Hash] options query options
|
99
|
+
# @option options [Class] :class an indexed class; if a class is passed, the result will
|
100
|
+
# contain objects of this class only
|
101
|
+
# @return [XapianDb::Resultset] The resultset
|
102
|
+
def find_similar_to(docs, options={})
|
103
|
+
docs = [docs].flatten
|
104
|
+
reference = Xapian::RSet.new
|
105
|
+
docs.each { |doc| reference.add_document doc.docid }
|
106
|
+
pk_terms = docs.map { |doc| "Q#{doc.data}" }
|
107
|
+
class_terms = docs.map { |doc| "C#{doc.indexed_class}" }
|
108
|
+
|
109
|
+
relevant_terms = Xapian::Enquire.new(reader).eset(40, reference).terms.map {|e| e.name } - pk_terms - class_terms
|
110
|
+
relevant_terms.reject! { |term| term =~ /INDEXED_CLASS/ }
|
111
|
+
|
112
|
+
reference_query = Xapian::Query.new Xapian::Query::OP_OR, pk_terms
|
113
|
+
terms_query = Xapian::Query.new Xapian::Query::OP_OR, relevant_terms
|
114
|
+
final_query = Xapian::Query.new Xapian::Query::OP_AND_NOT, terms_query, reference_query
|
115
|
+
if options[:class]
|
116
|
+
class_scope = "indexed_class:#{options[:class].name.downcase}"
|
117
|
+
@query_parser ||= QueryParser.new(self)
|
118
|
+
class_query = @query_parser.parse(class_scope)
|
119
|
+
final_query = Xapian::Query.new Xapian::Query::OP_AND, class_query, final_query
|
120
|
+
end
|
121
|
+
enquiry = Xapian::Enquire.new(reader)
|
122
|
+
enquiry.query = final_query
|
123
|
+
Resultset.new(enquiry, :db_size => self.size)
|
124
|
+
end
|
125
|
+
|
95
126
|
# A very simple implementation of facets limited to the class facets.
|
96
127
|
# @param [String] expression A valid search expression (see {#search} for examples).
|
97
128
|
# @return [Hash<Class, Integer>] A hash containing the classes and the hits per class
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: xapian_db
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0
|
4
|
+
version: '1.0'
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,12 +9,12 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2011-
|
12
|
+
date: 2011-08-17 00:00:00.000000000 +02:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: daemons
|
17
|
-
requirement: &
|
17
|
+
requirement: &70169658679640 !ruby/object:Gem::Requirement
|
18
18
|
none: false
|
19
19
|
requirements:
|
20
20
|
- - ! '>='
|
@@ -22,10 +22,10 @@ dependencies:
|
|
22
22
|
version: 1.0.10
|
23
23
|
type: :runtime
|
24
24
|
prerelease: false
|
25
|
-
version_requirements: *
|
25
|
+
version_requirements: *70169658679640
|
26
26
|
- !ruby/object:Gem::Dependency
|
27
27
|
name: xapian-ruby
|
28
|
-
requirement: &
|
28
|
+
requirement: &70169658679180 !ruby/object:Gem::Requirement
|
29
29
|
none: false
|
30
30
|
requirements:
|
31
31
|
- - ! '>='
|
@@ -33,10 +33,10 @@ dependencies:
|
|
33
33
|
version: 1.2.6
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
|
-
version_requirements: *
|
36
|
+
version_requirements: *70169658679180
|
37
37
|
- !ruby/object:Gem::Dependency
|
38
38
|
name: rspec
|
39
|
-
requirement: &
|
39
|
+
requirement: &70169658678720 !ruby/object:Gem::Requirement
|
40
40
|
none: false
|
41
41
|
requirements:
|
42
42
|
- - ! '>='
|
@@ -44,10 +44,10 @@ dependencies:
|
|
44
44
|
version: 2.3.1
|
45
45
|
type: :development
|
46
46
|
prerelease: false
|
47
|
-
version_requirements: *
|
47
|
+
version_requirements: *70169658678720
|
48
48
|
- !ruby/object:Gem::Dependency
|
49
49
|
name: simplecov
|
50
|
-
requirement: &
|
50
|
+
requirement: &70169658678120 !ruby/object:Gem::Requirement
|
51
51
|
none: false
|
52
52
|
requirements:
|
53
53
|
- - ! '>='
|
@@ -55,10 +55,10 @@ dependencies:
|
|
55
55
|
version: 0.3.7
|
56
56
|
type: :development
|
57
57
|
prerelease: false
|
58
|
-
version_requirements: *
|
58
|
+
version_requirements: *70169658678120
|
59
59
|
- !ruby/object:Gem::Dependency
|
60
60
|
name: beanstalk-client
|
61
|
-
requirement: &
|
61
|
+
requirement: &70169658677520 !ruby/object:Gem::Requirement
|
62
62
|
none: false
|
63
63
|
requirements:
|
64
64
|
- - ! '>='
|
@@ -66,7 +66,7 @@ dependencies:
|
|
66
66
|
version: 1.1.0
|
67
67
|
type: :development
|
68
68
|
prerelease: false
|
69
|
-
version_requirements: *
|
69
|
+
version_requirements: *70169658677520
|
70
70
|
description: XapianDb is a ruby gem that combines features of nosql databases and
|
71
71
|
fulltext indexing. It is based on Xapian, an efficient and powerful indexing library
|
72
72
|
email: gernot.kogler (at) garaio (dot) com
|
@@ -112,8 +112,6 @@ files:
|
|
112
112
|
- lib/xapian_db/utilities.rb
|
113
113
|
- lib/xapian_db.rb
|
114
114
|
- tasks/beanstalk_worker.rake
|
115
|
-
- xapian_source/xapian-bindings-1.2.6.tar.gz
|
116
|
-
- xapian_source/xapian-core-1.2.6.tar.gz
|
117
115
|
- LICENSE
|
118
116
|
- README.rdoc
|
119
117
|
- CHANGELOG.md
|
@@ -145,7 +143,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
145
143
|
version: 1.3.6
|
146
144
|
requirements: []
|
147
145
|
rubyforge_project:
|
148
|
-
rubygems_version: 1.
|
146
|
+
rubygems_version: 1.3.9.2
|
149
147
|
signing_key:
|
150
148
|
specification_version: 3
|
151
149
|
summary: Ruby library to use a Xapian db as a key/value store with high performance
|
Binary file
|
Binary file
|