mongoid-sphinx 0.0.3 → 0.0.4
Sign up to get free protection for your applications and to get access to all the features.
data/lib/mongoid_sphinx.rb
CHANGED
@@ -5,18 +5,72 @@ module Mongoid
|
|
5
5
|
module Sphinx
|
6
6
|
extend ActiveSupport::Concern
|
7
7
|
included do
|
8
|
+
SPHINX_TYPE_MAPPING = {
|
9
|
+
'Date' => 'timestamp',
|
10
|
+
'DateTime' => 'timestamp',
|
11
|
+
'Time' => 'timestamp',
|
12
|
+
'Float' => 'float',
|
13
|
+
'Integer' => 'int',
|
14
|
+
'BigDecimal' => 'float',
|
15
|
+
'Boolean' => 'bool'
|
16
|
+
}
|
17
|
+
|
8
18
|
cattr_accessor :search_fields
|
19
|
+
cattr_accessor :search_attributes
|
9
20
|
end
|
10
21
|
|
11
22
|
module ClassMethods
|
12
|
-
def search_index(
|
13
|
-
self.search_fields = fields
|
23
|
+
def search_index(options={})
|
24
|
+
self.search_fields = options[:fields]
|
25
|
+
self.search_attributes = {}
|
26
|
+
options[:attributes].each do |attrib|
|
27
|
+
self.search_attributes[attrib] = SPHINX_TYPE_MAPPING[self.fields[attrib.to_s].type.to_s] || 'str2ordinal'
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
def sphinx_stream
|
32
|
+
STDOUT.sync = true # Make sure we really stream..
|
33
|
+
|
34
|
+
puts '<?xml version="1.0" encoding="utf-8"?>'
|
35
|
+
puts '<sphinx:docset>'
|
36
|
+
|
37
|
+
# Schema
|
38
|
+
puts '<sphinx:schema>'
|
39
|
+
puts '<sphinx:field name="classname"/>'
|
40
|
+
self.search_fields.each do |key, value|
|
41
|
+
puts "<sphinx:field name=\"#{key}\"/>"
|
42
|
+
end
|
43
|
+
self.search_attributes.each do |key, value|
|
44
|
+
puts "<sphinx:attr name=\"#{key}\" type=\"#{value}\"/>"
|
45
|
+
end
|
46
|
+
puts '</sphinx:schema>'
|
47
|
+
|
48
|
+
self.all.each do |document_hash|
|
49
|
+
sphinx_compatible_id = document_hash['_id'].to_s.to_i - 100000000000000000000000
|
50
|
+
|
51
|
+
puts "<sphinx:document id=\"#{sphinx_compatible_id}\">"
|
52
|
+
|
53
|
+
puts "<classname>#{self.to_s}</classname>"
|
54
|
+
self.search_fields.each do |key|
|
55
|
+
puts "<#{key}><![CDATA[[#{document_hash[key.to_s]}]]></#{key}>"
|
56
|
+
end
|
57
|
+
self.search_attributes.each do |key, value|
|
58
|
+
value = case value
|
59
|
+
when 'bool' : document_hash[key.to_s] ? 1 : 0
|
60
|
+
when 'timestamp' : document_hash[key.to_s].to_i
|
61
|
+
else document_hash[key.to_s].to_s
|
62
|
+
end
|
63
|
+
puts "<#{key}>#{value}</#{key}>"
|
64
|
+
end
|
65
|
+
|
66
|
+
puts '</sphinx:document>'
|
67
|
+
end
|
68
|
+
|
69
|
+
puts '</sphinx:docset>'
|
14
70
|
end
|
15
71
|
|
16
72
|
def search(query, options = {})
|
17
73
|
client = MongoidSphinx::Configuration.instance.client
|
18
|
-
|
19
|
-
query = query + " @classname #{self.to_s}"
|
20
74
|
|
21
75
|
client.match_mode = options[:match_mode] || :extended
|
22
76
|
client.limit = options[:limit] if options.key?(:limit)
|
@@ -27,17 +81,27 @@ module Mongoid
|
|
27
81
|
client.sort_by = options[:sort_by]
|
28
82
|
end
|
29
83
|
|
30
|
-
|
84
|
+
if options.key?(:with)
|
85
|
+
options[:with].each do |key, value|
|
86
|
+
client.filters << Riddle::Client::Filter.new(key.to_s, value.is_a?(Range) ? value : value.to_a, false)
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
if options.key?(:without)
|
91
|
+
options[:without].each do |key, value|
|
92
|
+
client.filters << Riddle::Client::Filter.new(key.to_s, value.is_a?(Range) ? value : value.to_a, true)
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
result = client.query("#{query} @classname #{self.to_s}")
|
31
97
|
|
32
98
|
if result and result[:status] == 0 and (matches = result[:matches])
|
33
|
-
classname = nil
|
34
99
|
ids = matches.collect do |row|
|
35
|
-
classname = MongoidSphinx::MultiAttribute.decode(row[:attributes]['csphinx-class'])
|
36
100
|
(100000000000000000000000 + row[:doc]).to_s rescue nil
|
37
101
|
end.compact
|
38
102
|
|
39
103
|
return ids if options[:raw] or ids.empty?
|
40
|
-
return
|
104
|
+
return self.find(ids)
|
41
105
|
else
|
42
106
|
return []
|
43
107
|
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: mongoid-sphinx
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 23
|
5
5
|
prerelease: false
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 0
|
9
|
-
-
|
10
|
-
version: 0.0.
|
9
|
+
- 4
|
10
|
+
version: 0.0.4
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Matt Hodgson
|
@@ -63,10 +63,8 @@ extra_rdoc_files: []
|
|
63
63
|
|
64
64
|
files:
|
65
65
|
- lib/mongoid_sphinx/configuration.rb
|
66
|
-
- lib/mongoid_sphinx/indexer.rb
|
67
66
|
- lib/mongoid_sphinx/mongoid/identity.rb
|
68
67
|
- lib/mongoid_sphinx/mongoid/sphinx.rb
|
69
|
-
- lib/mongoid_sphinx/multi_attribute.rb
|
70
68
|
- lib/mongoid_sphinx/version.rb
|
71
69
|
- lib/mongoid_sphinx.rb
|
72
70
|
- README.markdown
|
@@ -1,123 +0,0 @@
|
|
1
|
-
# MongoidSphinx, a full text indexing extension for MongoDB using
|
2
|
-
# Sphinx.
|
3
|
-
#
|
4
|
-
# This file contains the MongoidSphinx::Indexer::XMLDocset and
|
5
|
-
# MongoidSphinx::Indexer::XMLDoc classes.
|
6
|
-
|
7
|
-
module MongoidSphinx #:nodoc:
|
8
|
-
|
9
|
-
# Module Indexer contains classes for creating XML input documents for the
|
10
|
-
# indexer. Each Sphinx index consists of a single "sphinx:docset" with any
|
11
|
-
# number of "sphinx:document" tags.
|
12
|
-
#
|
13
|
-
# The XML source can be generated from an array of CouchRest objects or from
|
14
|
-
# an array of Hashes containing at least fields "classname" and "_id"
|
15
|
-
# as returned by MongoDB view "MongoSphinxIndex/couchrests_by_timestamp".
|
16
|
-
#
|
17
|
-
# Sample:
|
18
|
-
#
|
19
|
-
# rows = [{ 'name' => 'John', 'phone' => '199 43828',
|
20
|
-
# 'classname' => 'Address', '_id' => 'Address-234164'
|
21
|
-
# },
|
22
|
-
# { 'name' => 'Sue', 'mobile' => '828 19439',
|
23
|
-
# 'classname' => 'Address', '_id' => 'Address-422433'
|
24
|
-
# }
|
25
|
-
# ]
|
26
|
-
# puts MongoSphinx::Indexer::XMLDocset.new(rows).to_s
|
27
|
-
#
|
28
|
-
# <?xml version="1.0" encoding="utf-8"?>
|
29
|
-
# <sphinx:docset>
|
30
|
-
# <sphinx:schema>
|
31
|
-
# <sphinx:attr name="csphinx-class" type="multi"/>
|
32
|
-
# <sphinx:field name="classname"/>
|
33
|
-
# <sphinx:field name="name"/>
|
34
|
-
# <sphinx:field name="phone"/>
|
35
|
-
# <sphinx:field name="mobile"/>
|
36
|
-
# <sphinx:field name="created_at"/>
|
37
|
-
# </sphinx:schema>
|
38
|
-
# <sphinx:document id="234164">
|
39
|
-
# <csphinx-class>336,623,883,1140</csphinx-class>
|
40
|
-
# <classname>Address</classname>
|
41
|
-
# <name><![CDATA[[John]]></name>
|
42
|
-
# <phone><![CDATA[[199 422433]]></phone>
|
43
|
-
# <mobile><![CDATA[[]]></mobile>
|
44
|
-
# <created_at><![CDATA[[]]></created_at>
|
45
|
-
# </sphinx:document>
|
46
|
-
# <sphinx:document id="423423">
|
47
|
-
# <csphinx-class>336,623,883,1140</csphinx-class>
|
48
|
-
# <classname>Address</classname>
|
49
|
-
# <name><![CDATA[[Sue]]></name>
|
50
|
-
# <phone><![CDATA[[]]></phone>
|
51
|
-
# <mobile><![CDATA[[828 19439]]></mobile>
|
52
|
-
# <created_at><![CDATA[[]]></created_at>
|
53
|
-
# </sphinx:document>
|
54
|
-
# </sphinx:docset>"
|
55
|
-
|
56
|
-
module Indexer
|
57
|
-
|
58
|
-
# Class XMLDocset wraps the XML representation of a document to index. It
|
59
|
-
# contains a complete "sphinx:docset" including its schema definition.
|
60
|
-
|
61
|
-
class XMLDocset
|
62
|
-
|
63
|
-
# Streams xml of all objects in a klass to the stdout. This makes sure you can process large collections.
|
64
|
-
#
|
65
|
-
# Options:
|
66
|
-
# attributes (required) - The attributes that are put in the sphinx xml.
|
67
|
-
#
|
68
|
-
# Example:
|
69
|
-
# MongoSphinx::Indexer::XMLDocset.stream(Document, :attributes => %w(title content))
|
70
|
-
# This will create an XML stream to stdout.
|
71
|
-
#
|
72
|
-
# Configure in your sphinx.conf like
|
73
|
-
# xmlpipe_command = ./script/runner "MongoSphinx::Indexer::XMLDocset.stream(Document, :attributes => %w(title content))"
|
74
|
-
#
|
75
|
-
def self.stream(klass)
|
76
|
-
STDOUT.sync = true # Make sure we really stream..
|
77
|
-
|
78
|
-
puts '<?xml version="1.0" encoding="utf-8"?>'
|
79
|
-
puts '<sphinx:docset>'
|
80
|
-
|
81
|
-
# Schema
|
82
|
-
puts '<sphinx:schema>'
|
83
|
-
klass.search_fields.each do |key, value|
|
84
|
-
puts "<sphinx:field name=\"#{key}\"/>"
|
85
|
-
end
|
86
|
-
# FIXME: What is this attribute?
|
87
|
-
puts '<sphinx:field name="classname"/>'
|
88
|
-
puts '<sphinx:attr name="csphinx-class" type="multi"/>'
|
89
|
-
puts '</sphinx:schema>'
|
90
|
-
|
91
|
-
collection = Mongoid.database.collection(klass.collection.name)
|
92
|
-
collection.find.each do |document_hash|
|
93
|
-
XMLDoc.stream_for_hash(document_hash, klass)
|
94
|
-
end
|
95
|
-
|
96
|
-
puts '</sphinx:docset>'
|
97
|
-
end
|
98
|
-
|
99
|
-
end
|
100
|
-
|
101
|
-
class XMLDoc
|
102
|
-
|
103
|
-
def self.stream_for_hash(hash, klass)
|
104
|
-
sphinx_compatible_id = hash['_id'].to_s.to_i - 100000000000000000000000
|
105
|
-
|
106
|
-
puts "<sphinx:document id=\"#{sphinx_compatible_id}\">"
|
107
|
-
# FIXME: Should we include this?
|
108
|
-
puts '<csphinx-class>'
|
109
|
-
puts MongoidSphinx::MultiAttribute.encode(klass.to_s)
|
110
|
-
puts '</csphinx-class>'
|
111
|
-
puts "<classname>#{klass.to_s}</classname>"
|
112
|
-
|
113
|
-
klass.search_fields.each do |key|
|
114
|
-
value = hash[key.to_s]
|
115
|
-
puts "<#{key}><![CDATA[[#{value}]]></#{key}>"
|
116
|
-
end
|
117
|
-
|
118
|
-
puts '</sphinx:document>'
|
119
|
-
end
|
120
|
-
|
121
|
-
end
|
122
|
-
end
|
123
|
-
end
|
@@ -1,57 +0,0 @@
|
|
1
|
-
# MongoSphinx, a full text indexing extension for MongoDB using
|
2
|
-
# Sphinx.
|
3
|
-
#
|
4
|
-
# This file contains the MongoSphinx::MultiAttribute class.
|
5
|
-
|
6
|
-
# Namespace module for the MongoSphinx gem.
|
7
|
-
|
8
|
-
module MongoidSphinx #:nodoc:
|
9
|
-
|
10
|
-
# Module MultiAttribute implements helpers to translate back and
|
11
|
-
# forth between Ruby Strings and an array of integers suitable for Sphinx
|
12
|
-
# attributes of type "multi".
|
13
|
-
#
|
14
|
-
# Background: Getting an ID as result for a query is OK, but for example to
|
15
|
-
# allow cast safety, we need an aditional attribute. Sphinx supports
|
16
|
-
# attributes which are returned together with the ID, but they behave a
|
17
|
-
# little different than expected: Instead we can use arrays of integers with
|
18
|
-
# ASCII character codes. These values are returned in ascending (!) order of
|
19
|
-
# value (yes, sounds funny but is reasonable from an internal view to
|
20
|
-
# Sphinx). So we mask each byte with 0x0100++ to keep the order...
|
21
|
-
#
|
22
|
-
# Sample:
|
23
|
-
#
|
24
|
-
# MongoSphinx::MultiAttribute.encode('Hello')
|
25
|
-
# => "328,613,876,1132,1391"
|
26
|
-
# MongoSphinx::MultiAttribute.decode('328,613,876,1132,1391')
|
27
|
-
# => "Hello"
|
28
|
-
|
29
|
-
module MultiAttribute
|
30
|
-
|
31
|
-
# Returns an numeric representation of a Ruby String suitable for "multi"
|
32
|
-
# attributes of Sphinx.
|
33
|
-
#
|
34
|
-
# Parameters:
|
35
|
-
#
|
36
|
-
# [str] String to translate
|
37
|
-
|
38
|
-
def self.encode(str)
|
39
|
-
offset = 0
|
40
|
-
return str.bytes.collect { |c| (offset+= 0x0100) + c }.join(',')
|
41
|
-
end
|
42
|
-
|
43
|
-
# Returns the original MongoDB ID created from a Sphinx ID. Only works if
|
44
|
-
# the ID was created from a MongoDB ID before!
|
45
|
-
#
|
46
|
-
# Parameters:
|
47
|
-
#
|
48
|
-
# [multi] Sphinx "multi" attribute to translate back
|
49
|
-
|
50
|
-
def self.decode(multi)
|
51
|
-
offset = 0
|
52
|
-
multi = multi.split(',') if not multi.kind_of? Array
|
53
|
-
|
54
|
-
return multi.collect {|x| (x.to_i-(offset+=0x0100)).chr}.to_s
|
55
|
-
end
|
56
|
-
end
|
57
|
-
end
|