xapian_db 0.5.15 → 1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG.md +7 -1
- data/README.rdoc +13 -1
- data/lib/xapian_db/adapters/base_adapter.rb +4 -0
- data/lib/xapian_db/database.rb +31 -0
- metadata +13 -15
- data/xapian_source/xapian-bindings-1.2.6.tar.gz +0 -0
- data/xapian_source/xapian-core-1.2.6.tar.gz +0 -0
data/CHANGELOG.md
CHANGED
@@ -1,8 +1,14 @@
|
|
1
|
+
##1.0 (August 17th, 2011)
|
2
|
+
|
3
|
+
Features:
|
4
|
+
|
5
|
+
- find similar documents based on one or more reference documents
|
6
|
+
|
1
7
|
##0.5.15 (July 8th, 2011)
|
2
8
|
|
3
9
|
Features:
|
4
10
|
|
5
|
-
- faster
|
11
|
+
- faster install if the new, dependent gem containig xapian (xapian-ruby) is already installed
|
6
12
|
|
7
13
|
##0.5.14 (July 7th, 2011)
|
8
14
|
|
data/README.rdoc
CHANGED
@@ -232,7 +232,19 @@ A global facet search always groups the results by the class of the indexed obje
|
|
232
232
|
puts "#{name}: #{count} hits"
|
233
233
|
end
|
234
234
|
|
235
|
-
At the class level, any attribute can be used for a facet query.
|
235
|
+
At the class level, any attribute can be used for a facet query. Use facet queries on attributes that store atomic values like strings, numbers or dates.
|
236
|
+
If you use it on attributes that contain collections (like an array of strings), you might get unexpected results.
|
237
|
+
|
238
|
+
=== Find similar documents
|
239
|
+
|
240
|
+
If you have a rearch result, you can search for similar documents by selecting one or more documents from your result and passing them to the find_similar_to method:
|
241
|
+
|
242
|
+
results = XapianDb.search("moose")
|
243
|
+
similar = XapianDb.find_similar_to results.first
|
244
|
+
|
245
|
+
It works like this: The xapian engine extracts the most selective terms from the passed documents. Then, a
|
246
|
+
new query is executed with the retrieved terms combined with OR operators.
|
247
|
+
This method works best if your models contain large amounts of text.
|
236
248
|
|
237
249
|
=== Transactions
|
238
250
|
|
@@ -47,6 +47,10 @@ module XapianDb
|
|
47
47
|
result
|
48
48
|
end
|
49
49
|
|
50
|
+
define_singleton_method(:find_similar_to) do |reference|
|
51
|
+
return XapianDb.database.find_similar_to reference, :class => klass
|
52
|
+
end
|
53
|
+
|
50
54
|
# Add a method to search atribute facets of this class
|
51
55
|
define_singleton_method(:facets) do |attr_name, expression|
|
52
56
|
|
data/lib/xapian_db/database.rb
CHANGED
@@ -92,6 +92,37 @@ module XapianDb
|
|
92
92
|
result
|
93
93
|
end
|
94
94
|
|
95
|
+
# Find documents that are similar to one or more reference documents. It is basically
|
96
|
+
# the implementation of this suggestion: http://trac.xapian.org/wiki/FAQ/FindSimilar
|
97
|
+
# @param [Array<Xapian::Document> or Xapian::Document] docs One or more reference docs
|
98
|
+
# @param [Hash] options query options
|
99
|
+
# @option options [Class] :class an indexed class; if a class is passed, the result will
|
100
|
+
# contain objects of this class only
|
101
|
+
# @return [XapianDb::Resultset] The resultset
|
102
|
+
def find_similar_to(docs, options={})
|
103
|
+
docs = [docs].flatten
|
104
|
+
reference = Xapian::RSet.new
|
105
|
+
docs.each { |doc| reference.add_document doc.docid }
|
106
|
+
pk_terms = docs.map { |doc| "Q#{doc.data}" }
|
107
|
+
class_terms = docs.map { |doc| "C#{doc.indexed_class}" }
|
108
|
+
|
109
|
+
relevant_terms = Xapian::Enquire.new(reader).eset(40, reference).terms.map {|e| e.name } - pk_terms - class_terms
|
110
|
+
relevant_terms.reject! { |term| term =~ /INDEXED_CLASS/ }
|
111
|
+
|
112
|
+
reference_query = Xapian::Query.new Xapian::Query::OP_OR, pk_terms
|
113
|
+
terms_query = Xapian::Query.new Xapian::Query::OP_OR, relevant_terms
|
114
|
+
final_query = Xapian::Query.new Xapian::Query::OP_AND_NOT, terms_query, reference_query
|
115
|
+
if options[:class]
|
116
|
+
class_scope = "indexed_class:#{options[:class].name.downcase}"
|
117
|
+
@query_parser ||= QueryParser.new(self)
|
118
|
+
class_query = @query_parser.parse(class_scope)
|
119
|
+
final_query = Xapian::Query.new Xapian::Query::OP_AND, class_query, final_query
|
120
|
+
end
|
121
|
+
enquiry = Xapian::Enquire.new(reader)
|
122
|
+
enquiry.query = final_query
|
123
|
+
Resultset.new(enquiry, :db_size => self.size)
|
124
|
+
end
|
125
|
+
|
95
126
|
# A very simple implementation of facets limited to the class facets.
|
96
127
|
# @param [String] expression A valid search expression (see {#search} for examples).
|
97
128
|
# @return [Hash<Class, Integer>] A hash containing the classes and the hits per class
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: xapian_db
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0
|
4
|
+
version: '1.0'
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,12 +9,12 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2011-
|
12
|
+
date: 2011-08-17 00:00:00.000000000 +02:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: daemons
|
17
|
-
requirement: &
|
17
|
+
requirement: &70169658679640 !ruby/object:Gem::Requirement
|
18
18
|
none: false
|
19
19
|
requirements:
|
20
20
|
- - ! '>='
|
@@ -22,10 +22,10 @@ dependencies:
|
|
22
22
|
version: 1.0.10
|
23
23
|
type: :runtime
|
24
24
|
prerelease: false
|
25
|
-
version_requirements: *
|
25
|
+
version_requirements: *70169658679640
|
26
26
|
- !ruby/object:Gem::Dependency
|
27
27
|
name: xapian-ruby
|
28
|
-
requirement: &
|
28
|
+
requirement: &70169658679180 !ruby/object:Gem::Requirement
|
29
29
|
none: false
|
30
30
|
requirements:
|
31
31
|
- - ! '>='
|
@@ -33,10 +33,10 @@ dependencies:
|
|
33
33
|
version: 1.2.6
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
|
-
version_requirements: *
|
36
|
+
version_requirements: *70169658679180
|
37
37
|
- !ruby/object:Gem::Dependency
|
38
38
|
name: rspec
|
39
|
-
requirement: &
|
39
|
+
requirement: &70169658678720 !ruby/object:Gem::Requirement
|
40
40
|
none: false
|
41
41
|
requirements:
|
42
42
|
- - ! '>='
|
@@ -44,10 +44,10 @@ dependencies:
|
|
44
44
|
version: 2.3.1
|
45
45
|
type: :development
|
46
46
|
prerelease: false
|
47
|
-
version_requirements: *
|
47
|
+
version_requirements: *70169658678720
|
48
48
|
- !ruby/object:Gem::Dependency
|
49
49
|
name: simplecov
|
50
|
-
requirement: &
|
50
|
+
requirement: &70169658678120 !ruby/object:Gem::Requirement
|
51
51
|
none: false
|
52
52
|
requirements:
|
53
53
|
- - ! '>='
|
@@ -55,10 +55,10 @@ dependencies:
|
|
55
55
|
version: 0.3.7
|
56
56
|
type: :development
|
57
57
|
prerelease: false
|
58
|
-
version_requirements: *
|
58
|
+
version_requirements: *70169658678120
|
59
59
|
- !ruby/object:Gem::Dependency
|
60
60
|
name: beanstalk-client
|
61
|
-
requirement: &
|
61
|
+
requirement: &70169658677520 !ruby/object:Gem::Requirement
|
62
62
|
none: false
|
63
63
|
requirements:
|
64
64
|
- - ! '>='
|
@@ -66,7 +66,7 @@ dependencies:
|
|
66
66
|
version: 1.1.0
|
67
67
|
type: :development
|
68
68
|
prerelease: false
|
69
|
-
version_requirements: *
|
69
|
+
version_requirements: *70169658677520
|
70
70
|
description: XapianDb is a ruby gem that combines features of nosql databases and
|
71
71
|
fulltext indexing. It is based on Xapian, an efficient and powerful indexing library
|
72
72
|
email: gernot.kogler (at) garaio (dot) com
|
@@ -112,8 +112,6 @@ files:
|
|
112
112
|
- lib/xapian_db/utilities.rb
|
113
113
|
- lib/xapian_db.rb
|
114
114
|
- tasks/beanstalk_worker.rake
|
115
|
-
- xapian_source/xapian-bindings-1.2.6.tar.gz
|
116
|
-
- xapian_source/xapian-core-1.2.6.tar.gz
|
117
115
|
- LICENSE
|
118
116
|
- README.rdoc
|
119
117
|
- CHANGELOG.md
|
@@ -145,7 +143,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
145
143
|
version: 1.3.6
|
146
144
|
requirements: []
|
147
145
|
rubyforge_project:
|
148
|
-
rubygems_version: 1.
|
146
|
+
rubygems_version: 1.3.9.2
|
149
147
|
signing_key:
|
150
148
|
specification_version: 3
|
151
149
|
summary: Ruby library to use a Xapian db as a key/value store with high performance
|
Binary file
|
Binary file
|