bdimcheff-dm-sphinx-adapter 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +200 -0
- data/Rakefile +49 -0
- data/VERSION.yml +4 -0
- data/lib/dm-sphinx-adapter.rb +23 -0
- data/lib/dm-sphinx-adapter/adapter.rb +200 -0
- data/lib/dm-sphinx-adapter/attribute.rb +83 -0
- data/lib/dm-sphinx-adapter/collection.rb +19 -0
- data/lib/dm-sphinx-adapter/index.rb +38 -0
- data/lib/dm-sphinx-adapter/query.rb +68 -0
- data/lib/dm-sphinx-adapter/resource.rb +120 -0
- data/lib/dm-sphinx-adapter/xmlpipe2.rb +94 -0
- data/lib/riddle.rb +28 -0
- data/lib/riddle/client.rb +619 -0
- data/lib/riddle/client/filter.rb +53 -0
- data/lib/riddle/client/message.rb +65 -0
- data/lib/riddle/client/response.rb +84 -0
- data/test/files/model.rb +24 -0
- data/test/files/source.xml +39 -0
- data/test/files/sphinx.conf +36 -0
- data/test/files/test_xmlpipe2.xml +1 -0
- data/test/helper.rb +46 -0
- data/test/test_adapter.rb +68 -0
- data/test/test_attribute.rb +36 -0
- data/test/test_collection.rb +26 -0
- data/test/test_index.rb +30 -0
- data/test/test_query.rb +61 -0
- data/test/test_resource.rb +17 -0
- data/test/test_xmlpipe2.rb +77 -0
- metadata +97 -0
@@ -0,0 +1,83 @@
|
|
1
|
+
require 'date'
|
2
|
+
require 'time'
|
3
|
+
|
4
|
+
module DataMapper
|
5
|
+
module Adapters
|
6
|
+
module Sphinx
|
7
|
+
|
8
|
+
# Sphinx attribute definition.
|
9
|
+
#
|
10
|
+
# You must declare attributes as such if you want to use them for sorting or conditions.
|
11
|
+
#
|
12
|
+
# ==== Notes
|
13
|
+
# The following primatives will be used as sql_attr_* types. Some liberty has been taken to accommodate for as
|
14
|
+
# many DM primitives as possible.
|
15
|
+
#
|
16
|
+
# TrueClass:: sql_attr_bool
|
17
|
+
# String:: sql_attr_str2ordinal
|
18
|
+
# DataMapper::Types::Text:: sql_attr_str2ordinal
|
19
|
+
# Float:: sql_attr_float
|
20
|
+
# Integer:: sql_attr_uint
|
21
|
+
# BigDecimal:: sql_attr_float
|
22
|
+
# DateTime:: sql_attr_timestamp
|
23
|
+
# Date:: sql_attr_timestamp
|
24
|
+
# Time:: sql_attr_timestamp
|
25
|
+
# DataMapper::Types::Serial:: sql_attr_uint
|
26
|
+
class Attribute < Property
|
27
|
+
|
28
|
+
# DataMapper types supported as Sphinx attributes.
|
29
|
+
TYPES = [
|
30
|
+
TrueClass, # sql_attr_bool
|
31
|
+
String, # sql_attr_str2ordinal
|
32
|
+
DataMapper::Types::Text, # sql_attr_str2ordinal
|
33
|
+
Float, # sql_attr_float
|
34
|
+
Integer, # sql_attr_uint
|
35
|
+
BigDecimal, # sql_attr_float
|
36
|
+
DateTime, # sql_attr_timestamp
|
37
|
+
Date, # sql_attr_timestamp
|
38
|
+
Time, # sql_attr_timestamp
|
39
|
+
# Object,
|
40
|
+
# Class,
|
41
|
+
# DataMapper::Types::Discriminator,
|
42
|
+
DataMapper::Types::Serial # sql_attr_uint
|
43
|
+
]
|
44
|
+
|
45
|
+
# Create a riddle client filter from a value.
|
46
|
+
#
|
47
|
+
# ==== Parameters
|
48
|
+
# value<Object>::
|
49
|
+
# The filter value to typecast and include/exclude.
|
50
|
+
#
|
51
|
+
# inclusive<Boolean>::
|
52
|
+
# Include or exclude results matching the filter value. Default: inclusive (true).
|
53
|
+
#
|
54
|
+
# ==== Returns
|
55
|
+
# Riddle::Client::Filter::
|
56
|
+
def filter(value, inclusive = true)
|
57
|
+
# Riddle uses exclusive = false as the default which doesn't read well IMO. Nobody says "Yes I don't want
|
58
|
+
# these values" you say "No I don't want these values".
|
59
|
+
value = typecast(value)
|
60
|
+
value = [value] unless value.quacks_like?([Array, Range])
|
61
|
+
Riddle::Client::Filter.new(field, value, !inclusive)
|
62
|
+
end
|
63
|
+
|
64
|
+
# Typecasts the value into a sphinx primitive. Supports ranges or arrays of values.
|
65
|
+
#
|
66
|
+
# ==== Notes
|
67
|
+
# Some loss of precision may occur when casting BigDecimal to Float.
|
68
|
+
def typecast(value)
|
69
|
+
if value.kind_of?(Range) then Range.new(typecast(value.first), typecast(value.last))
|
70
|
+
elsif value.kind_of?(Array) then value.map{|v| typecast(v)}
|
71
|
+
elsif primitive == BigDecimal then super(value).to_f
|
72
|
+
elsif primitive == DateTime then Time.parse(super(value).to_s).to_i
|
73
|
+
elsif primitive == Date then Time.parse(super(value).to_s).to_i
|
74
|
+
elsif primitive == Time then super(value).to_i
|
75
|
+
else
|
76
|
+
super(value) # Good luck
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
end # Attribute
|
81
|
+
end # Sphinx
|
82
|
+
end # Adapters
|
83
|
+
end # DataMapper
|
@@ -0,0 +1,19 @@
|
|
1
|
+
module DataMapper
|
2
|
+
module Adapters
|
3
|
+
module Sphinx
|
4
|
+
class Collection < Array
|
5
|
+
attr_accessor :error, :time, :total, :words
|
6
|
+
|
7
|
+
def initialize(result)
|
8
|
+
# TODO: One liner that works in Ruby 1.x now #indexes is #keys?
|
9
|
+
@error = result[:error]
|
10
|
+
@time = result[:time]
|
11
|
+
@total = result[:total]
|
12
|
+
@words = result[:words]
|
13
|
+
super result[:matches].map{|doc| doc[:id] = doc[:doc]; doc}
|
14
|
+
end
|
15
|
+
|
16
|
+
end
|
17
|
+
end # Sphinx
|
18
|
+
end # Adapters
|
19
|
+
end # DataMapper
|
@@ -0,0 +1,38 @@
|
|
1
|
+
module DataMapper
|
2
|
+
module Adapters
|
3
|
+
module Sphinx
|
4
|
+
|
5
|
+
# Sphinx index definition.
|
6
|
+
class Index
|
7
|
+
include Assertions
|
8
|
+
|
9
|
+
# Options.
|
10
|
+
attr_reader :model, :name, :options
|
11
|
+
|
12
|
+
# ==== Parameters
|
13
|
+
# model<DataMapper::Model>:: Your resources model.
|
14
|
+
# name<Symbol, String>:: The index name.
|
15
|
+
# options<Hash>:: Optional arguments.
|
16
|
+
#
|
17
|
+
# ==== Options
|
18
|
+
# :delta<Boolean>::
|
19
|
+
# Delta index. Delta indexes will be searched last when multiple indexes are defined for a
|
20
|
+
# resource. Default is false.
|
21
|
+
def initialize(model, name, options = {})
|
22
|
+
assert_kind_of 'model', model, Model
|
23
|
+
assert_kind_of 'name', name, Symbol, String
|
24
|
+
assert_kind_of 'options', options, Hash
|
25
|
+
|
26
|
+
@model = model
|
27
|
+
@name = name.to_sym
|
28
|
+
@delta = options.fetch(:delta, false)
|
29
|
+
end
|
30
|
+
|
31
|
+
# Is the index a delta index.
|
32
|
+
def delta?
|
33
|
+
!!@delta
|
34
|
+
end
|
35
|
+
end # Index
|
36
|
+
end # Sphinx
|
37
|
+
end # Adapters
|
38
|
+
end # DataMapper
|
@@ -0,0 +1,68 @@
|
|
1
|
+
module DataMapper
|
2
|
+
module Adapters
|
3
|
+
module Sphinx
|
4
|
+
|
5
|
+
# Sphinx extended search query string from DataMapper query.
|
6
|
+
class Query
|
7
|
+
include Extlib::Assertions
|
8
|
+
|
9
|
+
# Initialize a new extended Sphinx query from a DataMapper::Query object.
|
10
|
+
#
|
11
|
+
# If the query has no conditions an '' empty string will be generated possibly triggering Sphinx's full scan
|
12
|
+
# mode.
|
13
|
+
#
|
14
|
+
# ==== See
|
15
|
+
# * http://www.sphinxsearch.com/doc.html#searching
|
16
|
+
# * http://www.sphinxsearch.com/doc.html#conf-docinfo
|
17
|
+
# * http://www.sphinxsearch.com/doc.html#extended-syntax
|
18
|
+
#
|
19
|
+
# ==== Raises
|
20
|
+
# NotImplementedError:: DataMapper operators that can't be expressed in the extended sphinx query syntax.
|
21
|
+
#
|
22
|
+
# ==== Parameters
|
23
|
+
# query<DataMapper::Query>:: DataMapper query object.
|
24
|
+
def initialize(query)
|
25
|
+
assert_kind_of 'query', query, DataMapper::Query
|
26
|
+
@query = []
|
27
|
+
|
28
|
+
if query.conditions.empty?
|
29
|
+
@query << ''
|
30
|
+
else
|
31
|
+
query.conditions.each do |operator, property, value|
|
32
|
+
next if property.kind_of? Sphinx::Attribute # Filters are added elsewhere.
|
33
|
+
normalized = normalize_value(value)
|
34
|
+
field = property.field(query.repository.name) unless operator == :raw
|
35
|
+
@query << case operator
|
36
|
+
when :eql, :like then '@%s "%s"' % [field.to_s, normalized.join(' ')]
|
37
|
+
when :not then '@%s -"%s"' % [field.to_s, normalized.join(' ')]
|
38
|
+
when :in then '@%s (%s)' % [field.to_s, normalized.map{|v| %{"#{v}"}}.join(' | ')]
|
39
|
+
when :raw then "#{property}"
|
40
|
+
else raise NotImplementedError.new("Sphinx: Query fields do not support the #{operator} operator")
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
# ==== Returns
|
47
|
+
# String:: The extended sphinx query string.
|
48
|
+
def to_s
|
49
|
+
@query.join(' ')
|
50
|
+
end
|
51
|
+
|
52
|
+
protected
|
53
|
+
# Normalize and escape DataMapper query value(s) to escaped sphinx query values.
|
54
|
+
#
|
55
|
+
# ==== Parameters
|
56
|
+
# value<String, Array>:: The query value.
|
57
|
+
#
|
58
|
+
# ==== Returns
|
59
|
+
# Array:: An array of one or more query values.
|
60
|
+
def normalize_value(value)
|
61
|
+
[value].flatten.map do |v|
|
62
|
+
v.to_s.gsub(/[\(\)\|\-!@~"&\/]/){|char| "\\#{char}"}
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end # Query
|
66
|
+
end # Sphinx
|
67
|
+
end # Adapters
|
68
|
+
end # DataMapper
|
@@ -0,0 +1,120 @@
|
|
1
|
+
module DataMapper
|
2
|
+
module Adapters
|
3
|
+
module Sphinx
|
4
|
+
|
5
|
+
# Declare Sphinx indexes and attributes in your resource.
|
6
|
+
#
|
7
|
+
# model Items
|
8
|
+
# include DataMapper::SphinxResource
|
9
|
+
#
|
10
|
+
# # .. normal properties and such for :default
|
11
|
+
#
|
12
|
+
# repository(:search) do
|
13
|
+
# # Query some_index, some_index_delta in that order.
|
14
|
+
# index :some_index
|
15
|
+
# index :some_index_delta, :delta => true
|
16
|
+
#
|
17
|
+
# # Sortable by some attributes.
|
18
|
+
# attribute :updated_at, DateTime # sql_attr_timestamp
|
19
|
+
# attribute :age, Integer # sql_attr_uint
|
20
|
+
# attribute :deleted, Boolean # sql_attr_bool
|
21
|
+
# end
|
22
|
+
# end
|
23
|
+
module Resource
|
24
|
+
|
25
|
+
def self.append_inclusions(*inclusions)
|
26
|
+
extra_inclusions.concat inclusions
|
27
|
+
true
|
28
|
+
end
|
29
|
+
|
30
|
+
def self.extra_inclusions
|
31
|
+
@extra_inclusions ||= []
|
32
|
+
end
|
33
|
+
|
34
|
+
def self.included(model) #:nodoc:
|
35
|
+
model.send(:include, DataMapper::Resource)
|
36
|
+
model.extend ClassMethods if defined?(ClassMethods)
|
37
|
+
extra_inclusions.each{|inclusion| model.send(:include, inclusion)}
|
38
|
+
end
|
39
|
+
|
40
|
+
module ClassMethods
|
41
|
+
def self.extended(model) #:nodoc:
|
42
|
+
model.instance_variable_set(:@sphinx_indexes, {})
|
43
|
+
model.instance_variable_set(:@sphinx_attributes, {})
|
44
|
+
end
|
45
|
+
|
46
|
+
# Defines a sphinx index on the resource.
|
47
|
+
#
|
48
|
+
# Indexes are naturally ordered, with delta indexes at the end of the list so that duplicate document IDs in
|
49
|
+
# delta indexes override your main indexes.
|
50
|
+
#
|
51
|
+
# ==== See
|
52
|
+
# * DataMapper::Adapters::Sphinx::Index
|
53
|
+
#
|
54
|
+
# ==== Parameters
|
55
|
+
# name<Symbol>:: The name of a sphinx index to search for this resource.
|
56
|
+
# options<Hash>:: A hash of available index options.
|
57
|
+
def index(name, options = {})
|
58
|
+
index = Index.new(self, name, options)
|
59
|
+
indexes = sphinx_indexes(repository_name)
|
60
|
+
indexes << index
|
61
|
+
|
62
|
+
# TODO: I'm such a Ruby nub. In the meantime I've gone back to my Perl roots.
|
63
|
+
# This is a Schwartzian transform to sort delta indexes to the bottom and natural sort by name.
|
64
|
+
mapped = indexes.map{|i| [(i.delta? ? 1 : 0), i.name, i]}
|
65
|
+
sorted = mapped.sort{|a, b| a[0] <=> b[0] || a[1] <=> b[1]}
|
66
|
+
indexes.replace(sorted.map{|i| i[2]})
|
67
|
+
|
68
|
+
index
|
69
|
+
end
|
70
|
+
|
71
|
+
# List of declared sphinx indexes for this model.
|
72
|
+
#
|
73
|
+
# ==== Returns
|
74
|
+
# Array<DataMapper::Adapters::Sphinx::Index>
|
75
|
+
def sphinx_indexes(repository_name = default_repository_name)
|
76
|
+
@sphinx_indexes[repository_name] ||= []
|
77
|
+
end
|
78
|
+
|
79
|
+
# Defines a sphinx attribute on the resource.
|
80
|
+
#
|
81
|
+
# ==== See
|
82
|
+
# DataMapper::Adapters::Sphinx::Attribute
|
83
|
+
#
|
84
|
+
# ==== Parameters
|
85
|
+
# name<Symbol>:: The name of a sphinx attribute to order/restrict by for this resource.
|
86
|
+
# type<Class>:: The type to define this attribute as.
|
87
|
+
# options<Hash>:: An optional hash of attribute options.
|
88
|
+
def attribute(name, type, options = {})
|
89
|
+
# Attributes are just properties without a getter/setter in the model.
|
90
|
+
# This keeps DataMapper::Query happy when building queries.
|
91
|
+
attribute = Sphinx::Attribute.new(self, name, type, options)
|
92
|
+
properties(repository_name)[attribute.name] = attribute
|
93
|
+
attribute
|
94
|
+
end
|
95
|
+
|
96
|
+
# List of declared sphinx attributes for this model.
|
97
|
+
#
|
98
|
+
# ==== Returns
|
99
|
+
# Array<DataMapper::Adapters::Sphinx::Attribute>
|
100
|
+
def sphinx_attributes(repository_name = default_repository_name)
|
101
|
+
properties(repository_name).find_all{|p| p.kind_of? Sphinx::Attribute}
|
102
|
+
end
|
103
|
+
|
104
|
+
# List of properties (aka sphinx fields).
|
105
|
+
#
|
106
|
+
# This list will be the inverse of properties not declared as attributes.
|
107
|
+
# ==== Returns
|
108
|
+
def sphinx_fields(repository_name = default_repository_name)
|
109
|
+
properties(repository_name).reject{|p| p.kind_of? Sphinx::Attribute}
|
110
|
+
end
|
111
|
+
|
112
|
+
end # ClassMethods
|
113
|
+
end # Resource
|
114
|
+
end # Sphinx
|
115
|
+
end # Adapters
|
116
|
+
|
117
|
+
# Follow DM naming convention.
|
118
|
+
SphinxResource = Adapters::Sphinx::Resource
|
119
|
+
end # DataMapper
|
120
|
+
|
@@ -0,0 +1,94 @@
|
|
1
|
+
module DataMapper
|
2
|
+
module Adapters
|
3
|
+
module Sphinx
|
4
|
+
require 'builder'
|
5
|
+
|
6
|
+
|
7
|
+
# Sphinx xmlpipe2.
|
8
|
+
#
|
9
|
+
# Full text search data from any DM adapter without having to implement new Sphinx data sources drivers.
|
10
|
+
#
|
11
|
+
# ==== See
|
12
|
+
# * http://www.sphinxsearch.com/docs/current.html#xmlpipe2
|
13
|
+
#
|
14
|
+
#--
|
15
|
+
# TODO:
|
16
|
+
# * Synopsis.
|
17
|
+
module XmlPipe2
|
18
|
+
def self.included(model)
|
19
|
+
model.extend ClassMethods if defined?(ClassMethods)
|
20
|
+
end
|
21
|
+
|
22
|
+
module ClassMethods
|
23
|
+
|
24
|
+
# Write a Sphinx xmlpipe2 XML stream to $stdout.
|
25
|
+
#
|
26
|
+
# ==== Parameters
|
27
|
+
# source<String>:: The name of the repository to stream from.
|
28
|
+
# destination<String>:: The name of the repository to stream to (contains your sphinx definition).
|
29
|
+
# query<Hash>:: The conditions with which to find the records to stream.
|
30
|
+
#--
|
31
|
+
# TODO:
|
32
|
+
# * in_memory_adapter doesn't call the super constructor so there is no field_naming_convention set in
|
33
|
+
# DataMapper 0.9.10. Submit a patch or live with rescue and field.name clause?
|
34
|
+
# * Keys that aren't called .id?
|
35
|
+
# * Composite keys?
|
36
|
+
# * Method for schema and documents.
|
37
|
+
# * Less poking round in the internals of the :default adapter if I can?
|
38
|
+
# * Destination should always be a dm-sphinx-adapter adapter.
|
39
|
+
# * Optional schema since it overrides any schema you might define in the sphinx configuration.
|
40
|
+
# * Schema default values from DM property default values.
|
41
|
+
def xmlpipe2(source, destination = :default, query = {})
|
42
|
+
builder = Builder::XmlMarkup.new(:target => $stdout)
|
43
|
+
builder.instruct!
|
44
|
+
builder.sphinx(:docset, :'xmlns:sphinx' => 'sphinx') do
|
45
|
+
|
46
|
+
builder.sphinx(:schema) do
|
47
|
+
sphinx_fields(destination).each do |field|
|
48
|
+
builder.sphinx(:field, :name => (field.field(destination) rescue field.name))
|
49
|
+
end
|
50
|
+
sphinx_attributes(destination).each do |attr|
|
51
|
+
builder.sphinx(:attr, {
|
52
|
+
:name => (attr.field(destination) rescue attr.name),
|
53
|
+
:type => xmlpipe2_type(attr.primitive)
|
54
|
+
})
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
all(query.merge(:repository => repository(source))).map do |resource|
|
59
|
+
builder.sphinx(:document, :id => resource.id) do |document|
|
60
|
+
properties(destination).each do |property|
|
61
|
+
# TODO: Pretty sure this isn't the correct way to get and typecast.
|
62
|
+
builder.tag!((property.field(destination) rescue property.name)) do |field|
|
63
|
+
field.cdata!(property.typecast(property.get(resource)))
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
private
|
72
|
+
def xmlpipe2_type(primitive) #:nodoc:
|
73
|
+
{
|
74
|
+
Integer => 'int',
|
75
|
+
Float => 'float',
|
76
|
+
BigDecimal => 'float',
|
77
|
+
DateTime => 'timestamp',
|
78
|
+
Date => 'timestamp',
|
79
|
+
Time => 'timestamp',
|
80
|
+
TrueClass => 'bool',
|
81
|
+
String => 'str2ordinal',
|
82
|
+
DataMapper::Types::Text => 'str2ordinal'
|
83
|
+
}[primitive]
|
84
|
+
end
|
85
|
+
|
86
|
+
end # ClassMethods
|
87
|
+
end # XmlPipe2
|
88
|
+
|
89
|
+
# Include XmlPipe2 in all DM::A::SphinxResource models when you require this file.
|
90
|
+
Resource.append_inclusions XmlPipe2
|
91
|
+
end # Sphinx
|
92
|
+
end # Adapters
|
93
|
+
end # DataMapper
|
94
|
+
|
data/lib/riddle.rb
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
require 'socket'
|
2
|
+
require 'timeout'
|
3
|
+
|
4
|
+
require 'riddle/client'
|
5
|
+
|
6
|
+
module Riddle #:nodoc:
|
7
|
+
class ConnectionError < StandardError #:nodoc:
|
8
|
+
end
|
9
|
+
|
10
|
+
module Version #:nodoc:
|
11
|
+
Major = 0
|
12
|
+
Minor = 9
|
13
|
+
Tiny = 8
|
14
|
+
# Revision number for RubyForge's sake, taken from what Sphinx
|
15
|
+
# outputs to the command line.
|
16
|
+
Rev = 1533
|
17
|
+
# Release number to mark my own fixes, beyond feature parity with
|
18
|
+
# Sphinx itself.
|
19
|
+
Release = 2
|
20
|
+
|
21
|
+
String = [Major, Minor, Tiny].join('.')
|
22
|
+
GemVersion = [Major, Minor, Tiny, Rev, Release].join('.')
|
23
|
+
end
|
24
|
+
|
25
|
+
def self.escape(string)
|
26
|
+
string.gsub(/[\(\)\|\-!@~"&\/]/) { |char| "\\#{char}" }
|
27
|
+
end
|
28
|
+
end
|