mongoid-sphinx 0.0.3 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/mongoid_sphinx.rb
CHANGED
@@ -5,18 +5,72 @@ module Mongoid
|
|
5
5
|
module Sphinx
|
6
6
|
extend ActiveSupport::Concern
|
7
7
|
included do
|
8
|
+
SPHINX_TYPE_MAPPING = {
|
9
|
+
'Date' => 'timestamp',
|
10
|
+
'DateTime' => 'timestamp',
|
11
|
+
'Time' => 'timestamp',
|
12
|
+
'Float' => 'float',
|
13
|
+
'Integer' => 'int',
|
14
|
+
'BigDecimal' => 'float',
|
15
|
+
'Boolean' => 'bool'
|
16
|
+
}
|
17
|
+
|
8
18
|
cattr_accessor :search_fields
|
19
|
+
cattr_accessor :search_attributes
|
9
20
|
end
|
10
21
|
|
11
22
|
module ClassMethods
|
12
|
-
def search_index(
|
13
|
-
self.search_fields = fields
|
23
|
+
def search_index(options={})
|
24
|
+
self.search_fields = options[:fields]
|
25
|
+
self.search_attributes = {}
|
26
|
+
options[:attributes].each do |attrib|
|
27
|
+
self.search_attributes[attrib] = SPHINX_TYPE_MAPPING[self.fields[attrib.to_s].type.to_s] || 'str2ordinal'
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
def sphinx_stream
|
32
|
+
STDOUT.sync = true # Make sure we really stream..
|
33
|
+
|
34
|
+
puts '<?xml version="1.0" encoding="utf-8"?>'
|
35
|
+
puts '<sphinx:docset>'
|
36
|
+
|
37
|
+
# Schema
|
38
|
+
puts '<sphinx:schema>'
|
39
|
+
puts '<sphinx:field name="classname"/>'
|
40
|
+
self.search_fields.each do |key, value|
|
41
|
+
puts "<sphinx:field name=\"#{key}\"/>"
|
42
|
+
end
|
43
|
+
self.search_attributes.each do |key, value|
|
44
|
+
puts "<sphinx:attr name=\"#{key}\" type=\"#{value}\"/>"
|
45
|
+
end
|
46
|
+
puts '</sphinx:schema>'
|
47
|
+
|
48
|
+
self.all.each do |document_hash|
|
49
|
+
sphinx_compatible_id = document_hash['_id'].to_s.to_i - 100000000000000000000000
|
50
|
+
|
51
|
+
puts "<sphinx:document id=\"#{sphinx_compatible_id}\">"
|
52
|
+
|
53
|
+
puts "<classname>#{self.to_s}</classname>"
|
54
|
+
self.search_fields.each do |key|
|
55
|
+
puts "<#{key}><![CDATA[[#{document_hash[key.to_s]}]]></#{key}>"
|
56
|
+
end
|
57
|
+
self.search_attributes.each do |key, value|
|
58
|
+
value = case value
|
59
|
+
when 'bool' : document_hash[key.to_s] ? 1 : 0
|
60
|
+
when 'timestamp' : document_hash[key.to_s].to_i
|
61
|
+
else document_hash[key.to_s].to_s
|
62
|
+
end
|
63
|
+
puts "<#{key}>#{value}</#{key}>"
|
64
|
+
end
|
65
|
+
|
66
|
+
puts '</sphinx:document>'
|
67
|
+
end
|
68
|
+
|
69
|
+
puts '</sphinx:docset>'
|
14
70
|
end
|
15
71
|
|
16
72
|
def search(query, options = {})
|
17
73
|
client = MongoidSphinx::Configuration.instance.client
|
18
|
-
|
19
|
-
query = query + " @classname #{self.to_s}"
|
20
74
|
|
21
75
|
client.match_mode = options[:match_mode] || :extended
|
22
76
|
client.limit = options[:limit] if options.key?(:limit)
|
@@ -27,17 +81,27 @@ module Mongoid
|
|
27
81
|
client.sort_by = options[:sort_by]
|
28
82
|
end
|
29
83
|
|
30
|
-
|
84
|
+
if options.key?(:with)
|
85
|
+
options[:with].each do |key, value|
|
86
|
+
client.filters << Riddle::Client::Filter.new(key.to_s, value.is_a?(Range) ? value : value.to_a, false)
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
if options.key?(:without)
|
91
|
+
options[:without].each do |key, value|
|
92
|
+
client.filters << Riddle::Client::Filter.new(key.to_s, value.is_a?(Range) ? value : value.to_a, true)
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
result = client.query("#{query} @classname #{self.to_s}")
|
31
97
|
|
32
98
|
if result and result[:status] == 0 and (matches = result[:matches])
|
33
|
-
classname = nil
|
34
99
|
ids = matches.collect do |row|
|
35
|
-
classname = MongoidSphinx::MultiAttribute.decode(row[:attributes]['csphinx-class'])
|
36
100
|
(100000000000000000000000 + row[:doc]).to_s rescue nil
|
37
101
|
end.compact
|
38
102
|
|
39
103
|
return ids if options[:raw] or ids.empty?
|
40
|
-
return
|
104
|
+
return self.find(ids)
|
41
105
|
else
|
42
106
|
return []
|
43
107
|
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: mongoid-sphinx
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 23
|
5
5
|
prerelease: false
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 0
|
9
|
-
-
|
10
|
-
version: 0.0.
|
9
|
+
- 4
|
10
|
+
version: 0.0.4
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Matt Hodgson
|
@@ -63,10 +63,8 @@ extra_rdoc_files: []
|
|
63
63
|
|
64
64
|
files:
|
65
65
|
- lib/mongoid_sphinx/configuration.rb
|
66
|
-
- lib/mongoid_sphinx/indexer.rb
|
67
66
|
- lib/mongoid_sphinx/mongoid/identity.rb
|
68
67
|
- lib/mongoid_sphinx/mongoid/sphinx.rb
|
69
|
-
- lib/mongoid_sphinx/multi_attribute.rb
|
70
68
|
- lib/mongoid_sphinx/version.rb
|
71
69
|
- lib/mongoid_sphinx.rb
|
72
70
|
- README.markdown
|
@@ -1,123 +0,0 @@
|
|
1
|
-
# MongoidSphinx, a full text indexing extension for MongoDB using
|
2
|
-
# Sphinx.
|
3
|
-
#
|
4
|
-
# This file contains the MongoidSphinx::Indexer::XMLDocset and
|
5
|
-
# MongoidSphinx::Indexer::XMLDoc classes.
|
6
|
-
|
7
|
-
module MongoidSphinx #:nodoc:
|
8
|
-
|
9
|
-
# Module Indexer contains classes for creating XML input documents for the
|
10
|
-
# indexer. Each Sphinx index consists of a single "sphinx:docset" with any
|
11
|
-
# number of "sphinx:document" tags.
|
12
|
-
#
|
13
|
-
# The XML source can be generated from an array of CouchRest objects or from
|
14
|
-
# an array of Hashes containing at least fields "classname" and "_id"
|
15
|
-
# as returned by MongoDB view "MongoSphinxIndex/couchrests_by_timestamp".
|
16
|
-
#
|
17
|
-
# Sample:
|
18
|
-
#
|
19
|
-
# rows = [{ 'name' => 'John', 'phone' => '199 43828',
|
20
|
-
# 'classname' => 'Address', '_id' => 'Address-234164'
|
21
|
-
# },
|
22
|
-
# { 'name' => 'Sue', 'mobile' => '828 19439',
|
23
|
-
# 'classname' => 'Address', '_id' => 'Address-422433'
|
24
|
-
# }
|
25
|
-
# ]
|
26
|
-
# puts MongoSphinx::Indexer::XMLDocset.new(rows).to_s
|
27
|
-
#
|
28
|
-
# <?xml version="1.0" encoding="utf-8"?>
|
29
|
-
# <sphinx:docset>
|
30
|
-
# <sphinx:schema>
|
31
|
-
# <sphinx:attr name="csphinx-class" type="multi"/>
|
32
|
-
# <sphinx:field name="classname"/>
|
33
|
-
# <sphinx:field name="name"/>
|
34
|
-
# <sphinx:field name="phone"/>
|
35
|
-
# <sphinx:field name="mobile"/>
|
36
|
-
# <sphinx:field name="created_at"/>
|
37
|
-
# </sphinx:schema>
|
38
|
-
# <sphinx:document id="234164">
|
39
|
-
# <csphinx-class>336,623,883,1140</csphinx-class>
|
40
|
-
# <classname>Address</classname>
|
41
|
-
# <name><![CDATA[[John]]></name>
|
42
|
-
# <phone><![CDATA[[199 422433]]></phone>
|
43
|
-
# <mobile><![CDATA[[]]></mobile>
|
44
|
-
# <created_at><![CDATA[[]]></created_at>
|
45
|
-
# </sphinx:document>
|
46
|
-
# <sphinx:document id="423423">
|
47
|
-
# <csphinx-class>336,623,883,1140</csphinx-class>
|
48
|
-
# <classname>Address</classname>
|
49
|
-
# <name><![CDATA[[Sue]]></name>
|
50
|
-
# <phone><![CDATA[[]]></phone>
|
51
|
-
# <mobile><![CDATA[[828 19439]]></mobile>
|
52
|
-
# <created_at><![CDATA[[]]></created_at>
|
53
|
-
# </sphinx:document>
|
54
|
-
# </sphinx:docset>"
|
55
|
-
|
56
|
-
module Indexer
|
57
|
-
|
58
|
-
# Class XMLDocset wraps the XML representation of a document to index. It
|
59
|
-
# contains a complete "sphinx:docset" including its schema definition.
|
60
|
-
|
61
|
-
class XMLDocset
|
62
|
-
|
63
|
-
# Streams xml of all objects in a klass to the stdout. This makes sure you can process large collections.
|
64
|
-
#
|
65
|
-
# Options:
|
66
|
-
# attributes (required) - The attributes that are put in the sphinx xml.
|
67
|
-
#
|
68
|
-
# Example:
|
69
|
-
# MongoSphinx::Indexer::XMLDocset.stream(Document, :attributes => %w(title content))
|
70
|
-
# This will create an XML stream to stdout.
|
71
|
-
#
|
72
|
-
# Configure in your sphinx.conf like
|
73
|
-
# xmlpipe_command = ./script/runner "MongoSphinx::Indexer::XMLDocset.stream(Document, :attributes => %w(title content))"
|
74
|
-
#
|
75
|
-
def self.stream(klass)
|
76
|
-
STDOUT.sync = true # Make sure we really stream..
|
77
|
-
|
78
|
-
puts '<?xml version="1.0" encoding="utf-8"?>'
|
79
|
-
puts '<sphinx:docset>'
|
80
|
-
|
81
|
-
# Schema
|
82
|
-
puts '<sphinx:schema>'
|
83
|
-
klass.search_fields.each do |key, value|
|
84
|
-
puts "<sphinx:field name=\"#{key}\"/>"
|
85
|
-
end
|
86
|
-
# FIXME: What is this attribute?
|
87
|
-
puts '<sphinx:field name="classname"/>'
|
88
|
-
puts '<sphinx:attr name="csphinx-class" type="multi"/>'
|
89
|
-
puts '</sphinx:schema>'
|
90
|
-
|
91
|
-
collection = Mongoid.database.collection(klass.collection.name)
|
92
|
-
collection.find.each do |document_hash|
|
93
|
-
XMLDoc.stream_for_hash(document_hash, klass)
|
94
|
-
end
|
95
|
-
|
96
|
-
puts '</sphinx:docset>'
|
97
|
-
end
|
98
|
-
|
99
|
-
end
|
100
|
-
|
101
|
-
class XMLDoc
|
102
|
-
|
103
|
-
def self.stream_for_hash(hash, klass)
|
104
|
-
sphinx_compatible_id = hash['_id'].to_s.to_i - 100000000000000000000000
|
105
|
-
|
106
|
-
puts "<sphinx:document id=\"#{sphinx_compatible_id}\">"
|
107
|
-
# FIXME: Should we include this?
|
108
|
-
puts '<csphinx-class>'
|
109
|
-
puts MongoidSphinx::MultiAttribute.encode(klass.to_s)
|
110
|
-
puts '</csphinx-class>'
|
111
|
-
puts "<classname>#{klass.to_s}</classname>"
|
112
|
-
|
113
|
-
klass.search_fields.each do |key|
|
114
|
-
value = hash[key.to_s]
|
115
|
-
puts "<#{key}><![CDATA[[#{value}]]></#{key}>"
|
116
|
-
end
|
117
|
-
|
118
|
-
puts '</sphinx:document>'
|
119
|
-
end
|
120
|
-
|
121
|
-
end
|
122
|
-
end
|
123
|
-
end
|
@@ -1,57 +0,0 @@
|
|
1
|
-
# MongoSphinx, a full text indexing extension for MongoDB using
|
2
|
-
# Sphinx.
|
3
|
-
#
|
4
|
-
# This file contains the MongoSphinx::MultiAttribute class.
|
5
|
-
|
6
|
-
# Namespace module for the MongoSphinx gem.
|
7
|
-
|
8
|
-
module MongoidSphinx #:nodoc:
|
9
|
-
|
10
|
-
# Module MultiAttribute implements helpers to translate back and
|
11
|
-
# forth between Ruby Strings and an array of integers suitable for Sphinx
|
12
|
-
# attributes of type "multi".
|
13
|
-
#
|
14
|
-
# Background: Getting an ID as result for a query is OK, but for example to
|
15
|
-
# allow cast safety, we need an aditional attribute. Sphinx supports
|
16
|
-
# attributes which are returned together with the ID, but they behave a
|
17
|
-
# little different than expected: Instead we can use arrays of integers with
|
18
|
-
# ASCII character codes. These values are returned in ascending (!) order of
|
19
|
-
# value (yes, sounds funny but is reasonable from an internal view to
|
20
|
-
# Sphinx). So we mask each byte with 0x0100++ to keep the order...
|
21
|
-
#
|
22
|
-
# Sample:
|
23
|
-
#
|
24
|
-
# MongoSphinx::MultiAttribute.encode('Hello')
|
25
|
-
# => "328,613,876,1132,1391"
|
26
|
-
# MongoSphinx::MultiAttribute.decode('328,613,876,1132,1391')
|
27
|
-
# => "Hello"
|
28
|
-
|
29
|
-
module MultiAttribute
|
30
|
-
|
31
|
-
# Returns an numeric representation of a Ruby String suitable for "multi"
|
32
|
-
# attributes of Sphinx.
|
33
|
-
#
|
34
|
-
# Parameters:
|
35
|
-
#
|
36
|
-
# [str] String to translate
|
37
|
-
|
38
|
-
def self.encode(str)
|
39
|
-
offset = 0
|
40
|
-
return str.bytes.collect { |c| (offset+= 0x0100) + c }.join(',')
|
41
|
-
end
|
42
|
-
|
43
|
-
# Returns the original MongoDB ID created from a Sphinx ID. Only works if
|
44
|
-
# the ID was created from a MongoDB ID before!
|
45
|
-
#
|
46
|
-
# Parameters:
|
47
|
-
#
|
48
|
-
# [multi] Sphinx "multi" attribute to translate back
|
49
|
-
|
50
|
-
def self.decode(multi)
|
51
|
-
offset = 0
|
52
|
-
multi = multi.split(',') if not multi.kind_of? Array
|
53
|
-
|
54
|
-
return multi.collect {|x| (x.to_i-(offset+=0x0100)).chr}.to_s
|
55
|
-
end
|
56
|
-
end
|
57
|
-
end
|