bdimcheff-dm-sphinx-adapter 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,83 @@
1
+ require 'date'
2
+ require 'time'
3
+
4
+ module DataMapper
5
+ module Adapters
6
+ module Sphinx
7
+
8
+ # Sphinx attribute definition.
9
+ #
10
+ # You must declare attributes as such if you want to use them for sorting or conditions.
11
+ #
12
+ # ==== Notes
13
+ # The following primatives will be used as sql_attr_* types. Some liberty has been taken to accommodate for as
14
+ # many DM primitives as possible.
15
+ #
16
+ # TrueClass:: sql_attr_bool
17
+ # String:: sql_attr_str2ordinal
18
+ # DataMapper::Types::Text:: sql_attr_str2ordinal
19
+ # Float:: sql_attr_float
20
+ # Integer:: sql_attr_uint
21
+ # BigDecimal:: sql_attr_float
22
+ # DateTime:: sql_attr_timestamp
23
+ # Date:: sql_attr_timestamp
24
+ # Time:: sql_attr_timestamp
25
+ # DataMapper::Types::Serial:: sql_attr_uint
26
+ class Attribute < Property
27
+
28
+ # DataMapper types supported as Sphinx attributes.
29
+ TYPES = [
30
+ TrueClass, # sql_attr_bool
31
+ String, # sql_attr_str2ordinal
32
+ DataMapper::Types::Text, # sql_attr_str2ordinal
33
+ Float, # sql_attr_float
34
+ Integer, # sql_attr_uint
35
+ BigDecimal, # sql_attr_float
36
+ DateTime, # sql_attr_timestamp
37
+ Date, # sql_attr_timestamp
38
+ Time, # sql_attr_timestamp
39
+ # Object,
40
+ # Class,
41
+ # DataMapper::Types::Discriminator,
42
+ DataMapper::Types::Serial # sql_attr_uint
43
+ ]
44
+
45
+ # Create a riddle client filter from a value.
46
+ #
47
+ # ==== Parameters
48
+ # value<Object>::
49
+ # The filter value to typecast and include/exclude.
50
+ #
51
+ # inclusive<Boolean>::
52
+ # Include or exclude results matching the filter value. Default: inclusive (true).
53
+ #
54
+ # ==== Returns
55
+ # Riddle::Client::Filter::
56
+ def filter(value, inclusive = true)
57
+ # Riddle uses exclusive = false as the default which doesn't read well IMO. Nobody says "Yes I don't want
58
+ # these values" you say "No I don't want these values".
59
+ value = typecast(value)
60
+ value = [value] unless value.quacks_like?([Array, Range])
61
+ Riddle::Client::Filter.new(field, value, !inclusive)
62
+ end
63
+
64
+ # Typecasts the value into a sphinx primitive. Supports ranges or arrays of values.
65
+ #
66
+ # ==== Notes
67
+ # Some loss of precision may occur when casting BigDecimal to Float.
68
+ def typecast(value)
69
+ if value.kind_of?(Range) then Range.new(typecast(value.first), typecast(value.last))
70
+ elsif value.kind_of?(Array) then value.map{|v| typecast(v)}
71
+ elsif primitive == BigDecimal then super(value).to_f
72
+ elsif primitive == DateTime then Time.parse(super(value).to_s).to_i
73
+ elsif primitive == Date then Time.parse(super(value).to_s).to_i
74
+ elsif primitive == Time then super(value).to_i
75
+ else
76
+ super(value) # Good luck
77
+ end
78
+ end
79
+
80
+ end # Attribute
81
+ end # Sphinx
82
+ end # Adapters
83
+ end # DataMapper
@@ -0,0 +1,19 @@
1
+ module DataMapper
2
+ module Adapters
3
+ module Sphinx
4
+ class Collection < Array
5
+ attr_accessor :error, :time, :total, :words
6
+
7
+ def initialize(result)
8
+ # TODO: One liner that works in Ruby 1.x now #indexes is #keys?
9
+ @error = result[:error]
10
+ @time = result[:time]
11
+ @total = result[:total]
12
+ @words = result[:words]
13
+ super result[:matches].map{|doc| doc[:id] = doc[:doc]; doc}
14
+ end
15
+
16
+ end
17
+ end # Sphinx
18
+ end # Adapters
19
+ end # DataMapper
@@ -0,0 +1,38 @@
1
+ module DataMapper
2
+ module Adapters
3
+ module Sphinx
4
+
5
+ # Sphinx index definition.
6
+ class Index
7
+ include Assertions
8
+
9
+ # Options.
10
+ attr_reader :model, :name, :options
11
+
12
+ # ==== Parameters
13
+ # model<DataMapper::Model>:: Your resources model.
14
+ # name<Symbol, String>:: The index name.
15
+ # options<Hash>:: Optional arguments.
16
+ #
17
+ # ==== Options
18
+ # :delta<Boolean>::
19
+ # Delta index. Delta indexes will be searched last when multiple indexes are defined for a
20
+ # resource. Default is false.
21
+ def initialize(model, name, options = {})
22
+ assert_kind_of 'model', model, Model
23
+ assert_kind_of 'name', name, Symbol, String
24
+ assert_kind_of 'options', options, Hash
25
+
26
+ @model = model
27
+ @name = name.to_sym
28
+ @delta = options.fetch(:delta, false)
29
+ end
30
+
31
+ # Is the index a delta index.
32
+ def delta?
33
+ !!@delta
34
+ end
35
+ end # Index
36
+ end # Sphinx
37
+ end # Adapters
38
+ end # DataMapper
@@ -0,0 +1,68 @@
1
+ module DataMapper
2
+ module Adapters
3
+ module Sphinx
4
+
5
+ # Sphinx extended search query string from DataMapper query.
6
+ class Query
7
+ include Extlib::Assertions
8
+
9
+ # Initialize a new extended Sphinx query from a DataMapper::Query object.
10
+ #
11
+ # If the query has no conditions an '' empty string will be generated possibly triggering Sphinx's full scan
12
+ # mode.
13
+ #
14
+ # ==== See
15
+ # * http://www.sphinxsearch.com/doc.html#searching
16
+ # * http://www.sphinxsearch.com/doc.html#conf-docinfo
17
+ # * http://www.sphinxsearch.com/doc.html#extended-syntax
18
+ #
19
+ # ==== Raises
20
+ # NotImplementedError:: DataMapper operators that can't be expressed in the extended sphinx query syntax.
21
+ #
22
+ # ==== Parameters
23
+ # query<DataMapper::Query>:: DataMapper query object.
24
+ def initialize(query)
25
+ assert_kind_of 'query', query, DataMapper::Query
26
+ @query = []
27
+
28
+ if query.conditions.empty?
29
+ @query << ''
30
+ else
31
+ query.conditions.each do |operator, property, value|
32
+ next if property.kind_of? Sphinx::Attribute # Filters are added elsewhere.
33
+ normalized = normalize_value(value)
34
+ field = property.field(query.repository.name) unless operator == :raw
35
+ @query << case operator
36
+ when :eql, :like then '@%s "%s"' % [field.to_s, normalized.join(' ')]
37
+ when :not then '@%s -"%s"' % [field.to_s, normalized.join(' ')]
38
+ when :in then '@%s (%s)' % [field.to_s, normalized.map{|v| %{"#{v}"}}.join(' | ')]
39
+ when :raw then "#{property}"
40
+ else raise NotImplementedError.new("Sphinx: Query fields do not support the #{operator} operator")
41
+ end
42
+ end
43
+ end
44
+ end
45
+
46
+ # ==== Returns
47
+ # String:: The extended sphinx query string.
48
+ def to_s
49
+ @query.join(' ')
50
+ end
51
+
52
+ protected
53
+ # Normalize and escape DataMapper query value(s) to escaped sphinx query values.
54
+ #
55
+ # ==== Parameters
56
+ # value<String, Array>:: The query value.
57
+ #
58
+ # ==== Returns
59
+ # Array:: An array of one or more query values.
60
+ def normalize_value(value)
61
+ [value].flatten.map do |v|
62
+ v.to_s.gsub(/[\(\)\|\-!@~"&\/]/){|char| "\\#{char}"}
63
+ end
64
+ end
65
+ end # Query
66
+ end # Sphinx
67
+ end # Adapters
68
+ end # DataMapper
@@ -0,0 +1,120 @@
1
+ module DataMapper
2
+ module Adapters
3
+ module Sphinx
4
+
5
+ # Declare Sphinx indexes and attributes in your resource.
6
+ #
7
+ # model Items
8
+ # include DataMapper::SphinxResource
9
+ #
10
+ # # .. normal properties and such for :default
11
+ #
12
+ # repository(:search) do
13
+ # # Query some_index, some_index_delta in that order.
14
+ # index :some_index
15
+ # index :some_index_delta, :delta => true
16
+ #
17
+ # # Sortable by some attributes.
18
+ # attribute :updated_at, DateTime # sql_attr_timestamp
19
+ # attribute :age, Integer # sql_attr_uint
20
+ # attribute :deleted, Boolean # sql_attr_bool
21
+ # end
22
+ # end
23
+ module Resource
24
+
25
+ def self.append_inclusions(*inclusions)
26
+ extra_inclusions.concat inclusions
27
+ true
28
+ end
29
+
30
+ def self.extra_inclusions
31
+ @extra_inclusions ||= []
32
+ end
33
+
34
+ def self.included(model) #:nodoc:
35
+ model.send(:include, DataMapper::Resource)
36
+ model.extend ClassMethods if defined?(ClassMethods)
37
+ extra_inclusions.each{|inclusion| model.send(:include, inclusion)}
38
+ end
39
+
40
+ module ClassMethods
41
+ def self.extended(model) #:nodoc:
42
+ model.instance_variable_set(:@sphinx_indexes, {})
43
+ model.instance_variable_set(:@sphinx_attributes, {})
44
+ end
45
+
46
+ # Defines a sphinx index on the resource.
47
+ #
48
+ # Indexes are naturally ordered, with delta indexes at the end of the list so that duplicate document IDs in
49
+ # delta indexes override your main indexes.
50
+ #
51
+ # ==== See
52
+ # * DataMapper::Adapters::Sphinx::Index
53
+ #
54
+ # ==== Parameters
55
+ # name<Symbol>:: The name of a sphinx index to search for this resource.
56
+ # options<Hash>:: A hash of available index options.
57
+ def index(name, options = {})
58
+ index = Index.new(self, name, options)
59
+ indexes = sphinx_indexes(repository_name)
60
+ indexes << index
61
+
62
+ # TODO: I'm such a Ruby nub. In the meantime I've gone back to my Perl roots.
63
+ # This is a Schwartzian transform to sort delta indexes to the bottom and natural sort by name.
64
+ mapped = indexes.map{|i| [(i.delta? ? 1 : 0), i.name, i]}
65
+ sorted = mapped.sort{|a, b| a[0] <=> b[0] || a[1] <=> b[1]}
66
+ indexes.replace(sorted.map{|i| i[2]})
67
+
68
+ index
69
+ end
70
+
71
+ # List of declared sphinx indexes for this model.
72
+ #
73
+ # ==== Returns
74
+ # Array<DataMapper::Adapters::Sphinx::Index>
75
+ def sphinx_indexes(repository_name = default_repository_name)
76
+ @sphinx_indexes[repository_name] ||= []
77
+ end
78
+
79
+ # Defines a sphinx attribute on the resource.
80
+ #
81
+ # ==== See
82
+ # DataMapper::Adapters::Sphinx::Attribute
83
+ #
84
+ # ==== Parameters
85
+ # name<Symbol>:: The name of a sphinx attribute to order/restrict by for this resource.
86
+ # type<Class>:: The type to define this attribute as.
87
+ # options<Hash>:: An optional hash of attribute options.
88
+ def attribute(name, type, options = {})
89
+ # Attributes are just properties without a getter/setter in the model.
90
+ # This keeps DataMapper::Query happy when building queries.
91
+ attribute = Sphinx::Attribute.new(self, name, type, options)
92
+ properties(repository_name)[attribute.name] = attribute
93
+ attribute
94
+ end
95
+
96
+ # List of declared sphinx attributes for this model.
97
+ #
98
+ # ==== Returns
99
+ # Array<DataMapper::Adapters::Sphinx::Attribute>
100
+ def sphinx_attributes(repository_name = default_repository_name)
101
+ properties(repository_name).find_all{|p| p.kind_of? Sphinx::Attribute}
102
+ end
103
+
104
+ # List of properties (aka sphinx fields).
105
+ #
106
+ # This list will be the inverse of properties not declared as attributes.
107
+ # ==== Returns
108
+ def sphinx_fields(repository_name = default_repository_name)
109
+ properties(repository_name).reject{|p| p.kind_of? Sphinx::Attribute}
110
+ end
111
+
112
+ end # ClassMethods
113
+ end # Resource
114
+ end # Sphinx
115
+ end # Adapters
116
+
117
+ # Follow DM naming convention.
118
+ SphinxResource = Adapters::Sphinx::Resource
119
+ end # DataMapper
120
+
@@ -0,0 +1,94 @@
1
+ module DataMapper
2
+ module Adapters
3
+ module Sphinx
4
+ require 'builder'
5
+
6
+
7
+ # Sphinx xmlpipe2.
8
+ #
9
+ # Full text search data from any DM adapter without having to implement new Sphinx data sources drivers.
10
+ #
11
+ # ==== See
12
+ # * http://www.sphinxsearch.com/docs/current.html#xmlpipe2
13
+ #
14
+ #--
15
+ # TODO:
16
+ # * Synopsis.
17
+ module XmlPipe2
18
+ def self.included(model)
19
+ model.extend ClassMethods if defined?(ClassMethods)
20
+ end
21
+
22
+ module ClassMethods
23
+
24
+ # Write a Sphinx xmlpipe2 XML stream to $stdout.
25
+ #
26
+ # ==== Parameters
27
+ # source<String>:: The name of the repository to stream from.
28
+ # destination<String>:: The name of the repository to stream to (contains your sphinx definition).
29
+ # query<Hash>:: The conditions with which to find the records to stream.
30
+ #--
31
+ # TODO:
32
+ # * in_memory_adapter doesn't call the super constructor so there is no field_naming_convention set in
33
+ # DataMapper 0.9.10. Submit a patch or live with rescue and field.name clause?
34
+ # * Keys that aren't called .id?
35
+ # * Composite keys?
36
+ # * Method for schema and documents.
37
+ # * Less poking round in the internals of the :default adapter if I can?
38
+ # * Destination should always be a dm-sphinx-adapter adapter.
39
+ # * Optional schema since it overrides any schema you might define in the sphinx configuration.
40
+ # * Schema default values from DM property default values.
41
+ def xmlpipe2(source, destination = :default, query = {})
42
+ builder = Builder::XmlMarkup.new(:target => $stdout)
43
+ builder.instruct!
44
+ builder.sphinx(:docset, :'xmlns:sphinx' => 'sphinx') do
45
+
46
+ builder.sphinx(:schema) do
47
+ sphinx_fields(destination).each do |field|
48
+ builder.sphinx(:field, :name => (field.field(destination) rescue field.name))
49
+ end
50
+ sphinx_attributes(destination).each do |attr|
51
+ builder.sphinx(:attr, {
52
+ :name => (attr.field(destination) rescue attr.name),
53
+ :type => xmlpipe2_type(attr.primitive)
54
+ })
55
+ end
56
+ end
57
+
58
+ all(query.merge(:repository => repository(source))).map do |resource|
59
+ builder.sphinx(:document, :id => resource.id) do |document|
60
+ properties(destination).each do |property|
61
+ # TODO: Pretty sure this isn't the correct way to get and typecast.
62
+ builder.tag!((property.field(destination) rescue property.name)) do |field|
63
+ field.cdata!(property.typecast(property.get(resource)))
64
+ end
65
+ end
66
+ end
67
+ end
68
+ end
69
+ end
70
+
71
+ private
72
+ def xmlpipe2_type(primitive) #:nodoc:
73
+ {
74
+ Integer => 'int',
75
+ Float => 'float',
76
+ BigDecimal => 'float',
77
+ DateTime => 'timestamp',
78
+ Date => 'timestamp',
79
+ Time => 'timestamp',
80
+ TrueClass => 'bool',
81
+ String => 'str2ordinal',
82
+ DataMapper::Types::Text => 'str2ordinal'
83
+ }[primitive]
84
+ end
85
+
86
+ end # ClassMethods
87
+ end # XmlPipe2
88
+
89
+ # Include XmlPipe2 in all DM::A::SphinxResource models when you require this file.
90
+ Resource.append_inclusions XmlPipe2
91
+ end # Sphinx
92
+ end # Adapters
93
+ end # DataMapper
94
+
@@ -0,0 +1,28 @@
1
+ require 'socket'
2
+ require 'timeout'
3
+
4
+ require 'riddle/client'
5
+
6
+ module Riddle #:nodoc:
7
+ class ConnectionError < StandardError #:nodoc:
8
+ end
9
+
10
+ module Version #:nodoc:
11
+ Major = 0
12
+ Minor = 9
13
+ Tiny = 8
14
+ # Revision number for RubyForge's sake, taken from what Sphinx
15
+ # outputs to the command line.
16
+ Rev = 1533
17
+ # Release number to mark my own fixes, beyond feature parity with
18
+ # Sphinx itself.
19
+ Release = 2
20
+
21
+ String = [Major, Minor, Tiny].join('.')
22
+ GemVersion = [Major, Minor, Tiny, Rev, Release].join('.')
23
+ end
24
+
25
+ def self.escape(string)
26
+ string.gsub(/[\(\)\|\-!@~"&\/]/) { |char| "\\#{char}" }
27
+ end
28
+ end