ultrasphinx 1.5.3 → 1.6
Sign up to get free protection for your applications and to get access to all the features.
- data.tar.gz.sig +0 -0
- data/CHANGELOG +7 -1
- data/Manifest +6 -6
- data/README +3 -3
- data/TODO +2 -1
- data/examples/default.base +1 -0
- data/lib/ultrasphinx.rb +2 -1
- data/lib/ultrasphinx/configure.rb +10 -11
- data/lib/ultrasphinx/core_extensions.rb +2 -1
- data/lib/ultrasphinx/fields.rb +29 -18
- data/lib/ultrasphinx/search.rb +77 -92
- data/lib/ultrasphinx/search/internals.rb +133 -102
- data/lib/ultrasphinx/ultrasphinx.rb +0 -8
- data/test/integration/app/app/models/geo/state.rb +2 -1
- data/test/integration/app/app/models/person/user.rb +2 -1
- data/test/integration/app/config/environment.rb +0 -2
- data/test/integration/app/config/ultrasphinx/development.conf +6 -6
- data/test/integration/app/config/ultrasphinx/development.conf.canonical +6 -6
- data/test/integration/app/test/fixtures/sellers.yml +2 -2
- data/test/integration/app/test/fixtures/users.yml +2 -2
- data/test/integration/search_test.rb +67 -40
- data/test/setup.rb +3 -0
- data/ultrasphinx.gemspec +18 -17
- data/vendor/riddle/MIT-LICENSE +20 -0
- data/vendor/riddle/riddle.rb +15 -0
- data/vendor/riddle/riddle/client.rb +409 -0
- data/vendor/riddle/riddle/client/filter.rb +42 -0
- data/vendor/riddle/riddle/client/message.rb +54 -0
- data/vendor/riddle/riddle/client/response.rb +67 -0
- metadata +22 -16
- metadata.gz.sig +0 -0
- data/test/ts.multi +0 -2
- data/vendor/sphinx/LICENSE +0 -58
- data/vendor/sphinx/README +0 -40
- data/vendor/sphinx/Rakefile +0 -21
- data/vendor/sphinx/init.rb +0 -1
- data/vendor/sphinx/lib/client.rb +0 -647
@@ -0,0 +1,42 @@
|
|
1
|
+
module Riddle
|
2
|
+
class Client
|
3
|
+
# Used for querying Sphinx.
|
4
|
+
class Filter
|
5
|
+
attr_accessor :attribute, :values, :exclude
|
6
|
+
|
7
|
+
# Attribute name, values (which can be an array or a range), and whether
|
8
|
+
# the filter should be exclusive.
|
9
|
+
def initialize(attribute, values, exclude=false)
|
10
|
+
@attribute, @values, @exclude = attribute, values, exclude
|
11
|
+
end
|
12
|
+
|
13
|
+
def exclude?
|
14
|
+
self.exclude
|
15
|
+
end
|
16
|
+
|
17
|
+
# Returns the message for this filter to send to the Sphinx service
|
18
|
+
def query_message
|
19
|
+
message = Message.new
|
20
|
+
|
21
|
+
message.append_string self.attribute
|
22
|
+
case self.values
|
23
|
+
when Range
|
24
|
+
if self.values.first.is_a?(Float) && self.values.last.is_a?(Float)
|
25
|
+
message.append_int FilterTypes[:float_range]
|
26
|
+
message.append_floats self.values.first, self.values.last
|
27
|
+
else
|
28
|
+
message.append_int FilterTypes[:range]
|
29
|
+
message.append_ints self.values.first, self.values.last
|
30
|
+
end
|
31
|
+
when Array
|
32
|
+
message.append_int FilterTypes[:values]
|
33
|
+
message.append_int self.values.length
|
34
|
+
message.append_ints *self.values
|
35
|
+
end
|
36
|
+
message.append_int self.exclude? ? 1 : 0
|
37
|
+
|
38
|
+
message.to_s
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
module Riddle
|
2
|
+
class Client
|
3
|
+
# This class takes care of the translation of ints, strings and arrays to
|
4
|
+
# the format required by the Sphinx service.
|
5
|
+
class Message
|
6
|
+
def initialize
|
7
|
+
@message = ""
|
8
|
+
end
|
9
|
+
|
10
|
+
# Append raw data (only use if you know what you're doing)
|
11
|
+
def append(*args)
|
12
|
+
return if args.length == 0
|
13
|
+
|
14
|
+
args.each { |arg| @message << arg }
|
15
|
+
end
|
16
|
+
|
17
|
+
# Append a string's length, then the string itself
|
18
|
+
def append_string(str)
|
19
|
+
@message << [str.length].pack('N') + str
|
20
|
+
end
|
21
|
+
|
22
|
+
# Append an integer
|
23
|
+
def append_int(int)
|
24
|
+
@message << [int].pack('N')
|
25
|
+
end
|
26
|
+
|
27
|
+
def append_float(float)
|
28
|
+
@message << [float].pack('f')
|
29
|
+
end
|
30
|
+
|
31
|
+
# Append multiple integers
|
32
|
+
def append_ints(*ints)
|
33
|
+
ints.each { |int| append_int(int) }
|
34
|
+
end
|
35
|
+
|
36
|
+
def append_floats(*floats)
|
37
|
+
floats.each { |float| append_float(float) }
|
38
|
+
end
|
39
|
+
|
40
|
+
# Append an array of strings - first appends the length of the array,
|
41
|
+
# then each item's length and value.
|
42
|
+
def append_array(array)
|
43
|
+
append_int(array.length)
|
44
|
+
|
45
|
+
array.each { |item| append_string(item) }
|
46
|
+
end
|
47
|
+
|
48
|
+
# Returns the entire message
|
49
|
+
def to_s
|
50
|
+
@message
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
@@ -0,0 +1,67 @@
|
|
1
|
+
module Riddle
|
2
|
+
class Client
|
3
|
+
# Used to interrogate responses from the Sphinx daemon. Keep in mind none
|
4
|
+
# of the methods here check whether the data they're grabbing are what the
|
5
|
+
# user expects - it just assumes the user knows what the data stream is
|
6
|
+
# made up of.
|
7
|
+
class Response
|
8
|
+
# Create with the data to interpret
|
9
|
+
def initialize(str)
|
10
|
+
@str = str
|
11
|
+
@marker = 0
|
12
|
+
end
|
13
|
+
|
14
|
+
# Return the next string value in the stream
|
15
|
+
def next
|
16
|
+
len = next_int
|
17
|
+
result = @str[@marker, len]
|
18
|
+
@marker += len
|
19
|
+
|
20
|
+
return result
|
21
|
+
end
|
22
|
+
|
23
|
+
# Return the next integer value from the stream
|
24
|
+
def next_int
|
25
|
+
int = @str[@marker, 4].unpack('N*').first
|
26
|
+
@marker += 4
|
27
|
+
|
28
|
+
return int
|
29
|
+
end
|
30
|
+
|
31
|
+
# Return the next float value from the stream
|
32
|
+
def next_float
|
33
|
+
float = @str[@marker, 4].unpack('f*').first
|
34
|
+
@marker += 4
|
35
|
+
|
36
|
+
return float
|
37
|
+
end
|
38
|
+
|
39
|
+
# Returns an array of string items
|
40
|
+
def next_array
|
41
|
+
count = next_int
|
42
|
+
items = []
|
43
|
+
for i in 0...count
|
44
|
+
items << self.next
|
45
|
+
end
|
46
|
+
|
47
|
+
return items
|
48
|
+
end
|
49
|
+
|
50
|
+
# Returns an array of int items
|
51
|
+
def next_int_array
|
52
|
+
count = next_int
|
53
|
+
items = []
|
54
|
+
for i in 0...count
|
55
|
+
items << self.next_int
|
56
|
+
end
|
57
|
+
|
58
|
+
return items
|
59
|
+
end
|
60
|
+
|
61
|
+
# Returns the length of the streamed data
|
62
|
+
def length
|
63
|
+
@str.length
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
metadata
CHANGED
@@ -1,10 +1,10 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
|
-
rubygems_version: 0.9.4
|
3
|
-
specification_version:
|
2
|
+
rubygems_version: 0.9.4.6
|
3
|
+
specification_version: 2
|
4
4
|
name: ultrasphinx
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 1.
|
7
|
-
date: 2007-
|
6
|
+
version: "1.6"
|
7
|
+
date: 2007-11-14 00:00:00 -05:00
|
8
8
|
summary: Ruby on Rails configurator and client to the Sphinx fulltext search engine.
|
9
9
|
require_paths:
|
10
10
|
- lib
|
@@ -16,11 +16,17 @@ autorequire:
|
|
16
16
|
default_executable:
|
17
17
|
bindir: bin
|
18
18
|
has_rdoc: true
|
19
|
-
required_ruby_version: !ruby/object:Gem::
|
19
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
20
20
|
requirements:
|
21
|
-
- - "
|
21
|
+
- - ">="
|
22
22
|
- !ruby/object:Gem::Version
|
23
|
-
version: 0
|
23
|
+
version: "0"
|
24
|
+
version:
|
25
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
26
|
+
requirements:
|
27
|
+
- - ">="
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: "0"
|
24
30
|
version:
|
25
31
|
platform: ruby
|
26
32
|
signing_key:
|
@@ -177,14 +183,14 @@ files:
|
|
177
183
|
- test/setup.rb
|
178
184
|
- test/test_all.rb
|
179
185
|
- test/test_helper.rb
|
180
|
-
- test/ts.multi
|
181
186
|
- test/unit/parser_test.rb
|
182
187
|
- TODO
|
183
|
-
- vendor/
|
184
|
-
- vendor/
|
185
|
-
- vendor/
|
186
|
-
- vendor/
|
187
|
-
- vendor/
|
188
|
+
- vendor/riddle/MIT-LICENSE
|
189
|
+
- vendor/riddle/riddle/client/filter.rb
|
190
|
+
- vendor/riddle/riddle/client/message.rb
|
191
|
+
- vendor/riddle/riddle/client/response.rb
|
192
|
+
- vendor/riddle/riddle/client.rb
|
193
|
+
- vendor/riddle/riddle.rb
|
188
194
|
- vendor/will_paginate/LICENSE
|
189
195
|
- ultrasphinx.gemspec
|
190
196
|
test_files:
|
@@ -203,9 +209,9 @@ dependencies:
|
|
203
209
|
- !ruby/object:Gem::Dependency
|
204
210
|
name: chronic
|
205
211
|
version_requirement:
|
206
|
-
version_requirements: !ruby/object:Gem::
|
212
|
+
version_requirements: !ruby/object:Gem::Requirement
|
207
213
|
requirements:
|
208
|
-
- - "
|
214
|
+
- - ">="
|
209
215
|
- !ruby/object:Gem::Version
|
210
|
-
version: 0
|
216
|
+
version: "0"
|
211
217
|
version:
|
metadata.gz.sig
CHANGED
Binary file
|
data/test/ts.multi
DELETED
data/vendor/sphinx/LICENSE
DELETED
@@ -1,58 +0,0 @@
|
|
1
|
-
Ruby is copyrighted free software by Yukihiro Matsumoto <matz@netlab.co.jp>.
|
2
|
-
You can redistribute it and/or modify it under either the terms of the GPL
|
3
|
-
(see COPYING.txt file), or the conditions below:
|
4
|
-
|
5
|
-
1. You may make and give away verbatim copies of the source form of the
|
6
|
-
software without restriction, provided that you duplicate all of the
|
7
|
-
original copyright notices and associated disclaimers.
|
8
|
-
|
9
|
-
2. You may modify your copy of the software in any way, provided that
|
10
|
-
you do at least ONE of the following:
|
11
|
-
|
12
|
-
a) place your modifications in the Public Domain or otherwise
|
13
|
-
make them Freely Available, such as by posting said
|
14
|
-
modifications to Usenet or an equivalent medium, or by allowing
|
15
|
-
the author to include your modifications in the software.
|
16
|
-
|
17
|
-
b) use the modified software only within your corporation or
|
18
|
-
organization.
|
19
|
-
|
20
|
-
c) rename any non-standard executables so the names do not conflict
|
21
|
-
with standard executables, which must also be provided.
|
22
|
-
|
23
|
-
d) make other distribution arrangements with the author.
|
24
|
-
|
25
|
-
3. You may distribute the software in object code or executable
|
26
|
-
form, provided that you do at least ONE of the following:
|
27
|
-
|
28
|
-
a) distribute the executables and library files of the software,
|
29
|
-
together with instructions (in the manual page or equivalent)
|
30
|
-
on where to get the original distribution.
|
31
|
-
|
32
|
-
b) accompany the distribution with the machine-readable source of
|
33
|
-
the software.
|
34
|
-
|
35
|
-
c) give non-standard executables non-standard names, with
|
36
|
-
instructions on where to get the original software distribution.
|
37
|
-
|
38
|
-
d) make other distribution arrangements with the author.
|
39
|
-
|
40
|
-
4. You may modify and include the part of the software into any other
|
41
|
-
software (possibly commercial). But some files in the distribution
|
42
|
-
are not written by the author, so that they are not under this terms.
|
43
|
-
|
44
|
-
They are gc.c(partly), utils.c(partly), regex.[ch], st.[ch] and some
|
45
|
-
files under the ./missing directory. See each file for the copying
|
46
|
-
condition.
|
47
|
-
|
48
|
-
5. The scripts and library files supplied as input to or produced as
|
49
|
-
output from the software do not automatically fall under the
|
50
|
-
copyright of the software, but belong to whomever generated them,
|
51
|
-
and may be sold commercially, and may be aggregated with this
|
52
|
-
software.
|
53
|
-
|
54
|
-
6. THIS SOFTWARE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR
|
55
|
-
IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
|
56
|
-
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
57
|
-
PURPOSE.
|
58
|
-
|
data/vendor/sphinx/README
DELETED
@@ -1,40 +0,0 @@
|
|
1
|
-
=Sphinx Client API 0.3.0
|
2
|
-
|
3
|
-
Patched for Ultrasphinx.
|
4
|
-
|
5
|
-
This document gives an overview of what is Sphinx itself and how to use in
|
6
|
-
within Ruby on Rails. For more information or documentation,
|
7
|
-
please go to http://www.sphinxsearch.com
|
8
|
-
|
9
|
-
==Sphinx
|
10
|
-
|
11
|
-
Sphinx is a standalone full-text search engine, meant to provide fast,
|
12
|
-
size-efficient and relevant fulltext search functions to other applications.
|
13
|
-
Sphinx was specially designed to integrate well with SQL databases and
|
14
|
-
scripting languages. Currently built-in data sources support fetching data
|
15
|
-
either via direct connection to MySQL, or from an XML pipe.
|
16
|
-
|
17
|
-
Simplest way to communicate with Sphinx is to use <tt>searchd</tt> -
|
18
|
-
a daemon to search through fulltext indices from external software.
|
19
|
-
|
20
|
-
==Documentation
|
21
|
-
|
22
|
-
You can create the documentation by running:
|
23
|
-
|
24
|
-
rake rdoc
|
25
|
-
|
26
|
-
==Latest version
|
27
|
-
|
28
|
-
You can always get latest version from
|
29
|
-
http://kpumuk.info/projects/ror-plugins/sphinx
|
30
|
-
|
31
|
-
==Credits
|
32
|
-
|
33
|
-
Dmytro Shteflyuk <kpumuk@kpumuk.info> http://kpumuk.info
|
34
|
-
|
35
|
-
Special thanks to Alexey Kovyrin <alexey@kovyrin.net> http://blog.kovyrin.net
|
36
|
-
|
37
|
-
==License
|
38
|
-
|
39
|
-
This library is distributed under the terms of the Ruby license.
|
40
|
-
You can freely distribute/modify this library.
|
data/vendor/sphinx/Rakefile
DELETED
@@ -1,21 +0,0 @@
|
|
1
|
-
require 'rake'
|
2
|
-
require 'spec/rake/spectask'
|
3
|
-
require 'rake/rdoctask'
|
4
|
-
|
5
|
-
desc 'Default: run unit tests.'
|
6
|
-
task :default => :spec
|
7
|
-
|
8
|
-
desc 'Test the magic_enum plugin.'
|
9
|
-
Spec::Rake::SpecTask.new(:spec) do |t|
|
10
|
-
t.libs << 'lib'
|
11
|
-
t.pattern = 'spec/*_spec.rb'
|
12
|
-
end
|
13
|
-
|
14
|
-
desc 'Generate documentation for the magic_enum plugin.'
|
15
|
-
Rake::RDocTask.new(:rdoc) do |rdoc|
|
16
|
-
rdoc.rdoc_dir = 'rdoc'
|
17
|
-
rdoc.title = 'MagicEnum'
|
18
|
-
rdoc.options << '--line-numbers' << '--inline-source'
|
19
|
-
rdoc.rdoc_files.include('README')
|
20
|
-
rdoc.rdoc_files.include('lib/**/*.rb')
|
21
|
-
end
|
data/vendor/sphinx/init.rb
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
require File.dirname(__FILE__) + '/lib/client'
|
data/vendor/sphinx/lib/client.rb
DELETED
@@ -1,647 +0,0 @@
|
|
1
|
-
# = client.rb - Sphinx Client API
|
2
|
-
#
|
3
|
-
# Author:: Dmytro Shteflyuk <mailto:kpumuk@kpumuk.info>.
|
4
|
-
# Copyright:: Copyright (c) 2006 - 2007 Dmytro Shteflyuk
|
5
|
-
# License:: Distributes under the same terms as Ruby
|
6
|
-
# Version:: 0.3.0
|
7
|
-
# Website:: http://kpumuk.info/projects/ror-plugins/sphinx
|
8
|
-
#
|
9
|
-
# This library is distributed under the terms of the Ruby license.
|
10
|
-
# You can freely distribute/modify this library.
|
11
|
-
|
12
|
-
# ==Sphinx Client API
|
13
|
-
#
|
14
|
-
# The Sphinx Client API is used to communicate with <tt>searchd</tt>
|
15
|
-
# daemon and get search results from Sphinx.
|
16
|
-
#
|
17
|
-
# ===Usage
|
18
|
-
#
|
19
|
-
# sphinx = Sphinx::Client.new
|
20
|
-
# result = sphinx.Query('test')
|
21
|
-
# ids = result['matches'].map { |id, value| id }.join(',')
|
22
|
-
# posts = Post.find :all, :conditions => "id IN (#{ids})"
|
23
|
-
#
|
24
|
-
# docs = posts.map(&:body)
|
25
|
-
# excerpts = sphinx.BuildExcerpts(docs, 'index', 'test')
|
26
|
-
module Sphinx
|
27
|
-
# :stopdoc:
|
28
|
-
|
29
|
-
class SphinxError < StandardError; end
|
30
|
-
class SphinxArgumentError < SphinxError; end
|
31
|
-
class SphinxConnectError < SphinxError; end
|
32
|
-
class SphinxResponseError < SphinxError; end
|
33
|
-
class SphinxInternalError < SphinxError; end
|
34
|
-
class SphinxTemporaryError < SphinxError; end
|
35
|
-
class SphinxUnknownError < SphinxError; end
|
36
|
-
|
37
|
-
# :startdoc:
|
38
|
-
|
39
|
-
class Client
|
40
|
-
|
41
|
-
# :stopdoc:
|
42
|
-
|
43
|
-
# Known searchd commands
|
44
|
-
|
45
|
-
# search command
|
46
|
-
SEARCHD_COMMAND_SEARCH = 0
|
47
|
-
# excerpt command
|
48
|
-
SEARCHD_COMMAND_EXCERPT = 1
|
49
|
-
# update command
|
50
|
-
SEARCHD_COMMAND_UPDATE = 2
|
51
|
-
|
52
|
-
# Current client-side command implementation versions
|
53
|
-
|
54
|
-
# search command version
|
55
|
-
VER_COMMAND_SEARCH = 0x107
|
56
|
-
# excerpt command version
|
57
|
-
VER_COMMAND_EXCERPT = 0x100
|
58
|
-
# update command version
|
59
|
-
VER_COMMAND_UPDATE = 0x100
|
60
|
-
|
61
|
-
# Known searchd status codes
|
62
|
-
|
63
|
-
# general success, command-specific reply follows
|
64
|
-
SEARCHD_OK = 0
|
65
|
-
# general failure, command-specific reply may follow
|
66
|
-
SEARCHD_ERROR = 1
|
67
|
-
# temporaty failure, client should retry later
|
68
|
-
SEARCHD_RETRY = 2
|
69
|
-
# general success, warning message and command-specific reply follow
|
70
|
-
SEARCHD_WARNING = 3
|
71
|
-
|
72
|
-
# :startdoc:
|
73
|
-
|
74
|
-
# Known match modes
|
75
|
-
|
76
|
-
# match all query words
|
77
|
-
SPH_MATCH_ALL = 0
|
78
|
-
# match any query word
|
79
|
-
SPH_MATCH_ANY = 1
|
80
|
-
# match this exact phrase
|
81
|
-
SPH_MATCH_PHRASE = 2
|
82
|
-
# match this boolean query
|
83
|
-
SPH_MATCH_BOOLEAN = 3
|
84
|
-
# match this extended query
|
85
|
-
SPH_MATCH_EXTENDED = 4
|
86
|
-
|
87
|
-
# Known sort modes
|
88
|
-
|
89
|
-
# sort by document relevance desc, then by date
|
90
|
-
SPH_SORT_RELEVANCE = 0
|
91
|
-
# sort by document date desc, then by relevance desc
|
92
|
-
SPH_SORT_ATTR_DESC = 1
|
93
|
-
# sort by document date asc, then by relevance desc
|
94
|
-
SPH_SORT_ATTR_ASC = 2
|
95
|
-
# sort by time segments (hour/day/week/etc) desc, then by relevance desc
|
96
|
-
SPH_SORT_TIME_SEGMENTS = 3
|
97
|
-
# sort by SQL-like expression (eg. "@relevance DESC, price ASC, @id DESC")
|
98
|
-
SPH_SORT_EXTENDED = 4
|
99
|
-
|
100
|
-
# Known attribute types
|
101
|
-
|
102
|
-
# this attr is just an integer
|
103
|
-
SPH_ATTR_INTEGER = 1
|
104
|
-
# this attr is a timestamp
|
105
|
-
SPH_ATTR_TIMESTAMP = 2
|
106
|
-
|
107
|
-
# Known grouping functions
|
108
|
-
|
109
|
-
# group by day
|
110
|
-
SPH_GROUPBY_DAY = 0
|
111
|
-
# group by week
|
112
|
-
SPH_GROUPBY_WEEK = 1
|
113
|
-
# group by month
|
114
|
-
SPH_GROUPBY_MONTH = 2
|
115
|
-
# group by year
|
116
|
-
SPH_GROUPBY_YEAR = 3
|
117
|
-
# group by attribute value
|
118
|
-
SPH_GROUPBY_ATTR = 4
|
119
|
-
|
120
|
-
# Constructs the <tt>Sphinx::Client</tt> object and sets options to their default values.
|
121
|
-
def initialize
|
122
|
-
@host = 'localhost' # searchd host (default is "localhost")
|
123
|
-
@port = 3312 # searchd port (default is 3312)
|
124
|
-
@offset = 0 # how many records to seek from result-set start (default is 0)
|
125
|
-
@limit = 20 # how many records to return from result-set starting at offset (default is 20)
|
126
|
-
@mode = SPH_MATCH_ALL # query matching mode (default is SPH_MATCH_ALL)
|
127
|
-
@weights = [] # per-field weights (default is 1 for all fields)
|
128
|
-
@sort = SPH_SORT_RELEVANCE # match sorting mode (default is SPH_SORT_RELEVANCE)
|
129
|
-
@sortby = '' # attribute to sort by (defualt is "")
|
130
|
-
@min_id = 0 # min ID to match (default is 0)
|
131
|
-
@max_id = 0xFFFFFFFF # max ID to match (default is UINT_MAX)
|
132
|
-
@filters = [] # search filters
|
133
|
-
@groupby = '' # group-by attribute name
|
134
|
-
@groupfunc = SPH_GROUPBY_DAY # function to pre-process group-by attribute value with
|
135
|
-
@groupsort = '@group desc' # group-by sorting clause (to sort groups in result set with)
|
136
|
-
@maxmatches = 1000 # max matches to retrieve
|
137
|
-
|
138
|
-
@error = '' # last error message
|
139
|
-
@warning = '' # last warning message
|
140
|
-
end
|
141
|
-
|
142
|
-
# Get last error message.
|
143
|
-
def GetLastError
|
144
|
-
@error
|
145
|
-
end
|
146
|
-
|
147
|
-
# Get last warning message.
|
148
|
-
def GetLastWarning
|
149
|
-
@warning
|
150
|
-
end
|
151
|
-
|
152
|
-
# Set searchd server.
|
153
|
-
def SetServer(host, port)
|
154
|
-
assert { host.instance_of? String }
|
155
|
-
assert { port.instance_of? Fixnum }
|
156
|
-
|
157
|
-
@host = host
|
158
|
-
@port = port
|
159
|
-
end
|
160
|
-
|
161
|
-
# Set match offset, count, and max number to retrieve.
|
162
|
-
def SetLimits(offset, limit, max = 0)
|
163
|
-
assert { offset.instance_of? Fixnum }
|
164
|
-
assert { limit.instance_of? Fixnum }
|
165
|
-
assert { max.instance_of? Fixnum }
|
166
|
-
assert { offset >= 0 }
|
167
|
-
assert { limit > 0 }
|
168
|
-
assert { max >= 0 }
|
169
|
-
|
170
|
-
@offset = offset
|
171
|
-
@limit = limit
|
172
|
-
@maxmatches = max if max > 0
|
173
|
-
end
|
174
|
-
|
175
|
-
# Set match mode.
|
176
|
-
def SetMatchMode(mode)
|
177
|
-
assert { mode == SPH_MATCH_ALL \
|
178
|
-
|| mode == SPH_MATCH_ANY \
|
179
|
-
|| mode == SPH_MATCH_PHRASE \
|
180
|
-
|| mode == SPH_MATCH_BOOLEAN \
|
181
|
-
|| mode == SPH_MATCH_EXTENDED }
|
182
|
-
|
183
|
-
@mode = mode
|
184
|
-
end
|
185
|
-
|
186
|
-
# Set matches sorting mode.
|
187
|
-
def SetSortMode(mode, sortby = '')
|
188
|
-
assert { mode == SPH_SORT_RELEVANCE \
|
189
|
-
|| mode == SPH_SORT_ATTR_DESC \
|
190
|
-
|| mode == SPH_SORT_ATTR_ASC \
|
191
|
-
|| mode == SPH_SORT_TIME_SEGMENTS \
|
192
|
-
|| mode == SPH_SORT_EXTENDED }
|
193
|
-
assert { sortby.instance_of? String }
|
194
|
-
assert { mode == SPH_SORT_RELEVANCE || !sortby.empty? }
|
195
|
-
|
196
|
-
@sort = mode
|
197
|
-
@sortby = sortby
|
198
|
-
end
|
199
|
-
|
200
|
-
# Set per-field weights.
|
201
|
-
def SetWeights(weights)
|
202
|
-
assert { weights.instance_of? Array }
|
203
|
-
weights.each do |weight|
|
204
|
-
assert { weight.instance_of? Fixnum }
|
205
|
-
end
|
206
|
-
|
207
|
-
@weights = weights
|
208
|
-
end
|
209
|
-
|
210
|
-
# Set IDs range to match.
|
211
|
-
#
|
212
|
-
# Only match those records where document ID is beetwen <tt>min_id</tt> and <tt>max_id</tt>
|
213
|
-
# (including <tt>min_id</tt> and <tt>max_id</tt>).
|
214
|
-
def SetIDRange(min, max)
|
215
|
-
assert { min.instance_of? Fixnum }
|
216
|
-
assert { max.instance_of? Fixnum }
|
217
|
-
assert { min <= max }
|
218
|
-
|
219
|
-
@min_id = min
|
220
|
-
@max_id = max
|
221
|
-
end
|
222
|
-
|
223
|
-
# Set values filter.
|
224
|
-
#
|
225
|
-
# Only match those records where <tt>attribute</tt> column values
|
226
|
-
# are in specified set.
|
227
|
-
def SetFilter(attribute, values, exclude = false)
|
228
|
-
assert { attribute.instance_of? String }
|
229
|
-
assert { values.instance_of? Array }
|
230
|
-
assert { !values.empty? }
|
231
|
-
|
232
|
-
if values.instance_of?(Array) && values.size > 0
|
233
|
-
values.each do |value|
|
234
|
-
assert { value.instance_of? Fixnum }
|
235
|
-
end
|
236
|
-
|
237
|
-
@filters << { 'attr' => attribute, 'exclude' => exclude, 'values' => values }
|
238
|
-
end
|
239
|
-
end
|
240
|
-
|
241
|
-
# Set range filter.
|
242
|
-
#
|
243
|
-
# Only match those records where <tt>attribute</tt> column value
|
244
|
-
# is beetwen <tt>min</tt> and <tt>max</tt> (including <tt>min</tt> and <tt>max</tt>).
|
245
|
-
def SetFilterRange(attribute, min, max, exclude = false)
|
246
|
-
assert { attribute.instance_of? String }
|
247
|
-
assert { min.instance_of? Fixnum }
|
248
|
-
assert { max.instance_of? Fixnum }
|
249
|
-
assert { min <= max }
|
250
|
-
|
251
|
-
@filters << { 'attr' => attribute, 'exclude' => exclude, 'min' => min, 'max' => max }
|
252
|
-
end
|
253
|
-
|
254
|
-
# Set grouping attribute and function.
|
255
|
-
#
|
256
|
-
# In grouping mode, all matches are assigned to different groups
|
257
|
-
# based on grouping function value.
|
258
|
-
#
|
259
|
-
# Each group keeps track of the total match count, and the best match
|
260
|
-
# (in this group) according to current sorting function.
|
261
|
-
#
|
262
|
-
# The final result set contains one best match per group, with
|
263
|
-
# grouping function value and matches count attached.
|
264
|
-
#
|
265
|
-
# Groups in result set could be sorted by any sorting clause,
|
266
|
-
# including both document attributes and the following special
|
267
|
-
# internal Sphinx attributes:
|
268
|
-
#
|
269
|
-
# * @id - match document ID;
|
270
|
-
# * @weight, @rank, @relevance - match weight;
|
271
|
-
# * @group - groupby function value;
|
272
|
-
# * @count - amount of matches in group.
|
273
|
-
#
|
274
|
-
# the default mode is to sort by groupby value in descending order,
|
275
|
-
# ie. by '@group desc'.
|
276
|
-
#
|
277
|
-
# 'total_found' would contain total amount of matching groups over
|
278
|
-
# the whole index.
|
279
|
-
#
|
280
|
-
# WARNING: grouping is done in fixed memory and thus its results
|
281
|
-
# are only approximate; so there might be more groups reported
|
282
|
-
# in total_found than actually present. @count might also
|
283
|
-
# be underestimated.
|
284
|
-
#
|
285
|
-
# For example, if sorting by relevance and grouping by "published"
|
286
|
-
# attribute with SPH_GROUPBY_DAY function, then the result set will
|
287
|
-
# contain one most relevant match per each day when there were any
|
288
|
-
# matches published, with day number and per-day match count attached,
|
289
|
-
# and sorted by day number in descending order (ie. recent days first).
|
290
|
-
def SetGroupBy(attribute, func, groupsort = '@group desc')
|
291
|
-
assert { attribute.instance_of? String }
|
292
|
-
assert { groupsort.instance_of? String }
|
293
|
-
assert { func == SPH_GROUPBY_DAY \
|
294
|
-
|| func == SPH_GROUPBY_WEEK \
|
295
|
-
|| func == SPH_GROUPBY_MONTH \
|
296
|
-
|| func == SPH_GROUPBY_YEAR \
|
297
|
-
|| func == SPH_GROUPBY_ATTR }
|
298
|
-
|
299
|
-
@groupby = attribute
|
300
|
-
@groupfunc = func
|
301
|
-
@groupsort = groupsort
|
302
|
-
end
|
303
|
-
|
304
|
-
# Connect to searchd server and run given search query.
|
305
|
-
#
|
306
|
-
# * <tt>query</tt> -- query string
|
307
|
-
# * <tt>index</tt> -- index name to query, default is "*" which means to query all indexes
|
308
|
-
#
|
309
|
-
# returns hash which has the following keys on success:
|
310
|
-
#
|
311
|
-
# * <tt>'matches'</tt> -- hash which maps found document_id to ('weight', 'group') hash
|
312
|
-
# * <tt>'total'</tt> -- total amount of matches retrieved (upto SPH_MAX_MATCHES, see sphinx.h)
|
313
|
-
# * <tt>'total_found'</tt> -- total amount of matching documents in index
|
314
|
-
# * <tt>'time'</tt> -- search time
|
315
|
-
# * <tt>'words'</tt> -- hash which maps query terms (stemmed!) to ('docs', 'hits') hash
|
316
|
-
def Query(query, index = '*')
|
317
|
-
sock = self.Connect
|
318
|
-
|
319
|
-
# build request
|
320
|
-
|
321
|
-
# mode and limits
|
322
|
-
req = [@offset, @limit, @mode, @sort].pack('NNNN')
|
323
|
-
req << [@sortby.length].pack('N') + @sortby
|
324
|
-
# query itself
|
325
|
-
req << [query.length].pack('N') + query
|
326
|
-
# weights
|
327
|
-
req << [@weights.length].pack('N')
|
328
|
-
req << @weights.pack('N' * @weights.length)
|
329
|
-
# indexes
|
330
|
-
req << [index.length].pack('N') + index
|
331
|
-
# id range
|
332
|
-
req << [@min_id.to_i, @max_id.to_i].pack('NN')
|
333
|
-
|
334
|
-
# filters
|
335
|
-
req << [@filters.length].pack('N')
|
336
|
-
@filters.each do |filter|
|
337
|
-
req << [filter['attr'].length].pack('N') + filter['attr']
|
338
|
-
|
339
|
-
unless filter['values'].nil?
|
340
|
-
req << [filter['values'].length].pack('N')
|
341
|
-
req << filter['values'].pack('N' * filter['values'].length)
|
342
|
-
else
|
343
|
-
req << [0, filter['min'], filter['max']].pack('NNN')
|
344
|
-
end
|
345
|
-
req << [filter['exclude'] ? 1 : 0].pack('N')
|
346
|
-
end
|
347
|
-
|
348
|
-
# group-by, max matches, sort-by-group flag
|
349
|
-
req << [@groupfunc, @groupby.length].pack('NN') + @groupby
|
350
|
-
req << [@maxmatches].pack('N')
|
351
|
-
req << [@groupsort.length].pack('N') + @groupsort
|
352
|
-
|
353
|
-
# send query, get response
|
354
|
-
len = req.length
|
355
|
-
# add header
|
356
|
-
req = [SEARCHD_COMMAND_SEARCH, VER_COMMAND_SEARCH, len].pack('nnN') + req
|
357
|
-
sock.send(req, 0)
|
358
|
-
|
359
|
-
response = GetResponse(sock, VER_COMMAND_SEARCH)
|
360
|
-
|
361
|
-
# parse response
|
362
|
-
result = {}
|
363
|
-
max = response.length # protection from broken response
|
364
|
-
|
365
|
-
# read schema
|
366
|
-
p = 0
|
367
|
-
fields = []
|
368
|
-
attrs = {}
|
369
|
-
attrs_names_in_order = []
|
370
|
-
|
371
|
-
nfields = response[p, 4].unpack('N*').first; p += 4
|
372
|
-
while nfields > 0 and p < max
|
373
|
-
nfields -= 1
|
374
|
-
len = response[p, 4].unpack('N*').first; p += 4
|
375
|
-
fields << response[p, len]; p += len
|
376
|
-
end
|
377
|
-
result['fields'] = fields
|
378
|
-
|
379
|
-
nattrs = response[p, 4].unpack('N*').first; p += 4
|
380
|
-
while nattrs > 0 && p < max
|
381
|
-
nattrs -= 1
|
382
|
-
len = response[p, 4].unpack('N*').first; p += 4
|
383
|
-
attr = response[p, len]; p += len
|
384
|
-
type = response[p, 4].unpack('N*').first; p += 4
|
385
|
-
attrs[attr] = type
|
386
|
-
attrs_names_in_order << attr
|
387
|
-
end
|
388
|
-
result['attrs'] = attrs
|
389
|
-
|
390
|
-
# read match count
|
391
|
-
count = response[p, 4].unpack('N*').first; p += 4
|
392
|
-
|
393
|
-
# read matches
|
394
|
-
result['matches'], index = {}, 0
|
395
|
-
while count > 0 and p < max
|
396
|
-
count -= 1
|
397
|
-
doc, weight = response[p, 8].unpack('N*N*'); p += 8
|
398
|
-
|
399
|
-
result['matches'][doc] ||= {}
|
400
|
-
result['matches'][doc]['weight'] = weight
|
401
|
-
result['matches'][doc]['index'] = index
|
402
|
-
attrs_names_in_order.each do |attr|
|
403
|
-
val = response[p, 4].unpack('N*').first; p += 4
|
404
|
-
result['matches'][doc]['attrs'] ||= {}
|
405
|
-
result['matches'][doc]['attrs'][attr] = val
|
406
|
-
end
|
407
|
-
index += 1
|
408
|
-
end
|
409
|
-
result['total'], result['total_found'], msecs, words = response[p, 16].unpack('N*N*N*N*'); p += 16
|
410
|
-
result['time'] = '%.3f' % (msecs / 1000.0)
|
411
|
-
|
412
|
-
result['words'] = {}
|
413
|
-
while words > 0 and p < max
|
414
|
-
words -= 1
|
415
|
-
len = response[p, 4].unpack('N*').first; p += 4
|
416
|
-
word = response[p, len]; p += len
|
417
|
-
docs, hits = response[p, 8].unpack('N*N*'); p += 8
|
418
|
-
result['words'][word] = { 'docs' => docs, 'hits' => hits }
|
419
|
-
end
|
420
|
-
|
421
|
-
result
|
422
|
-
end
|
423
|
-
|
424
|
-
# Connect to searchd server and generate exceprts from given documents.
|
425
|
-
#
|
426
|
-
# * <tt>docs</tt> -- an array of strings which represent the documents' contents
|
427
|
-
# * <tt>index</tt> -- a string specifiying the index which settings will be used
|
428
|
-
# for stemming, lexing and case folding
|
429
|
-
# * <tt>words</tt> -- a string which contains the words to highlight
|
430
|
-
# * <tt>opts</tt> is a hash which contains additional optional highlighting parameters.
|
431
|
-
#
|
432
|
-
# You can use following parameters:
|
433
|
-
# * <tt>'before_match'</tt> -- a string to insert before a set of matching words, default is "<b>"
|
434
|
-
# * <tt>'after_match'</tt> -- a string to insert after a set of matching words, default is "<b>"
|
435
|
-
# * <tt>'chunk_separator'</tt> -- a string to insert between excerpts chunks, default is " ... "
|
436
|
-
# * <tt>'limit'</tt> -- max excerpt size in symbols (codepoints), default is 256
|
437
|
-
# * <tt>'around'</tt> -- how much words to highlight around each match, default is 5
|
438
|
-
#
|
439
|
-
# Returns an array of string excerpts on success.
|
440
|
-
def BuildExcerpts(docs, index, words, opts = {})
|
441
|
-
assert { docs.instance_of? Array }
|
442
|
-
assert { index.instance_of? String }
|
443
|
-
assert { words.instance_of? String }
|
444
|
-
assert { opts.instance_of? Hash }
|
445
|
-
|
446
|
-
sock = self.Connect
|
447
|
-
|
448
|
-
# fixup options
|
449
|
-
opts['before_match'] ||= '<b>';
|
450
|
-
opts['after_match'] ||= '</b>';
|
451
|
-
opts['chunk_separator'] ||= ' ... ';
|
452
|
-
opts['limit'] ||= 256;
|
453
|
-
opts['around'] ||= 5;
|
454
|
-
|
455
|
-
# build request
|
456
|
-
|
457
|
-
# v.1.0 req
|
458
|
-
req = [0, 1].pack('N2'); # mode=0, flags=1 (remove spaces)
|
459
|
-
# req index
|
460
|
-
req << [index.length].pack('N') + index
|
461
|
-
# req words
|
462
|
-
req << [words.length].pack('N') + words
|
463
|
-
|
464
|
-
# options
|
465
|
-
req << [opts['before_match'].length].pack('N') + opts['before_match']
|
466
|
-
req << [opts['after_match'].length].pack('N') + opts['after_match']
|
467
|
-
req << [opts['chunk_separator'].length].pack('N') + opts['chunk_separator']
|
468
|
-
req << [opts['limit'].to_i, opts['around'].to_i].pack('NN')
|
469
|
-
|
470
|
-
# documents
|
471
|
-
req << [docs.size].pack('N');
|
472
|
-
docs.each do |doc|
|
473
|
-
assert { doc.instance_of? String }
|
474
|
-
|
475
|
-
req << [doc.length].pack('N') + doc
|
476
|
-
end
|
477
|
-
|
478
|
-
# send query, get response
|
479
|
-
len = req.length
|
480
|
-
# add header
|
481
|
-
req = [SEARCHD_COMMAND_EXCERPT, VER_COMMAND_EXCERPT, len].pack('nnN') + req
|
482
|
-
sock.send(req, 0)
|
483
|
-
|
484
|
-
response = GetResponse(sock, VER_COMMAND_EXCERPT)
|
485
|
-
|
486
|
-
# parse response
|
487
|
-
p = 0
|
488
|
-
res = []
|
489
|
-
rlen = response.length
|
490
|
-
docs.each do |doc|
|
491
|
-
len = response[p, 4].unpack('N*').first; p += 4
|
492
|
-
if p + len > rlen
|
493
|
-
@error = 'incomplete reply'
|
494
|
-
raise SphinxResponseError, @error
|
495
|
-
end
|
496
|
-
res << response[p, len]; p += len
|
497
|
-
end
|
498
|
-
return res
|
499
|
-
end
|
500
|
-
|
501
|
-
# Attribute updates
|
502
|
-
#
|
503
|
-
# Update specified attributes on specified documents.
|
504
|
-
#
|
505
|
-
# * <tt>index</tt> is a name of the index to be updated
|
506
|
-
# * <tt>attrs</tt> is an array of attribute name strings.
|
507
|
-
# * <tt>values</tt> is a hash where key is document id, and value is an array of
|
508
|
-
# new attribute values
|
509
|
-
#
|
510
|
-
# Returns number of actually updated documents (0 or more) on success.
|
511
|
-
# Returns -1 on failure.
|
512
|
-
#
|
513
|
-
# Usage example:
|
514
|
-
# sphinx.UpdateAttributes('index', ['group'], { 123 => [456] })
|
515
|
-
def UpdateAttributes(index, attrs, values)
|
516
|
-
# verify everything
|
517
|
-
assert { index.instance_of? String }
|
518
|
-
|
519
|
-
assert { attrs.instance_of? Array }
|
520
|
-
attrs.each do |attr|
|
521
|
-
assert { attr.instance_of? String }
|
522
|
-
end
|
523
|
-
|
524
|
-
assert { values.instance_of? Hash }
|
525
|
-
values.each do |id, entry|
|
526
|
-
assert { id.instance_of? Fixnum }
|
527
|
-
assert { entry.instance_of? Array }
|
528
|
-
assert { entry.length == attrs.length }
|
529
|
-
entry.each do |v|
|
530
|
-
assert { v.instance_of? Fixnum }
|
531
|
-
end
|
532
|
-
end
|
533
|
-
|
534
|
-
# build request
|
535
|
-
req = [index.length].pack('N') + index
|
536
|
-
|
537
|
-
req << [attrs.length].pack('N')
|
538
|
-
attrs.each do |attr|
|
539
|
-
req << [attr.length].pack('N') + attr
|
540
|
-
end
|
541
|
-
|
542
|
-
req << [values.length].pack('N')
|
543
|
-
values.each do |id, entry|
|
544
|
-
req << [id].pack('N')
|
545
|
-
req << entry.pack('N' * entry.length)
|
546
|
-
end
|
547
|
-
|
548
|
-
# connect, send query, get response
|
549
|
-
sock = self.Connect
|
550
|
-
len = req.length
|
551
|
-
req = [SEARCHD_COMMAND_UPDATE, VER_COMMAND_UPDATE, len].pack('nnN') + req # add header
|
552
|
-
sock.send(req, 0)
|
553
|
-
|
554
|
-
response = self.GetResponse(sock, VER_COMMAND_UPDATE)
|
555
|
-
|
556
|
-
# parse response
|
557
|
-
response[0, 4].unpack('N*').first
|
558
|
-
end
|
559
|
-
|
560
|
-
protected
|
561
|
-
|
562
|
-
# Connect to searchd server.
|
563
|
-
def Connect
|
564
|
-
begin
|
565
|
-
sock = TCPSocket.new(@host, @port)
|
566
|
-
rescue
|
567
|
-
@error = "connection to #{@host}:#{@port} failed"
|
568
|
-
raise SphinxConnectError, @error
|
569
|
-
end
|
570
|
-
|
571
|
-
v = sock.recv(4).unpack('N*').first
|
572
|
-
if v < 1
|
573
|
-
sock.close
|
574
|
-
@error = "expected searchd protocol version 1+, got version '#{v}'"
|
575
|
-
raise SphinxConnectError, @error
|
576
|
-
end
|
577
|
-
|
578
|
-
sock.send([1].pack('N'), 0)
|
579
|
-
sock
|
580
|
-
end
|
581
|
-
|
582
|
-
# Get and check response packet from searchd server.
|
583
|
-
def GetResponse(sock, client_version)
|
584
|
-
header = sock.recv(8)
|
585
|
-
status, ver, len = header.unpack('n2N')
|
586
|
-
response = ''
|
587
|
-
left = len
|
588
|
-
while left > 0 do
|
589
|
-
begin
|
590
|
-
chunk = sock.recv(left)
|
591
|
-
if chunk
|
592
|
-
response << chunk
|
593
|
-
left -= chunk.length
|
594
|
-
end
|
595
|
-
rescue EOFError
|
596
|
-
break
|
597
|
-
end
|
598
|
-
end
|
599
|
-
sock.close
|
600
|
-
|
601
|
-
# check response
|
602
|
-
read = response.length
|
603
|
-
if response.empty? or read != len
|
604
|
-
@error = len \
|
605
|
-
? "failed to read searchd response (status=#{status}, ver=#{ver}, len=#{len}, read=#{read})" \
|
606
|
-
: 'received zero-sized searchd response'
|
607
|
-
raise SphinxResponseError, @error
|
608
|
-
end
|
609
|
-
|
610
|
-
# check status
|
611
|
-
if (status == SEARCHD_WARNING)
|
612
|
-
wlen = response[0, 4].unpack('N*').first
|
613
|
-
@warning = response[4, wlen]
|
614
|
-
return response[4 + wlen, response.length - 4 - wlen]
|
615
|
-
end
|
616
|
-
|
617
|
-
if status == SEARCHD_ERROR
|
618
|
-
@error = 'searchd error: ' + response[4, response.length - 4]
|
619
|
-
raise SphinxInternalError, @error
|
620
|
-
end
|
621
|
-
|
622
|
-
if status == SEARCHD_RETRY
|
623
|
-
@error = 'temporary searchd error: ' + response[4, response.length - 4]
|
624
|
-
raise SphinxTemporaryError, @error
|
625
|
-
end
|
626
|
-
|
627
|
-
unless status == SEARCHD_OK
|
628
|
-
@error = "unknown status code: '#{status}'"
|
629
|
-
raise SphinxUnknownError, @error
|
630
|
-
end
|
631
|
-
|
632
|
-
# check version
|
633
|
-
if ver < client_version
|
634
|
-
@warning = "searchd command v.#{ver >> 8}.#{ver & 0xff} older than client's " +
|
635
|
-
"v.#{client_version >> 8}.#{client_version & 0xff}, some options might not work"
|
636
|
-
end
|
637
|
-
|
638
|
-
return response
|
639
|
-
end
|
640
|
-
|
641
|
-
# :stopdoc:
|
642
|
-
def assert
|
643
|
-
raise 'Assertion failed!' unless yield if $DEBUG
|
644
|
-
end
|
645
|
-
# :startdoc:
|
646
|
-
end
|
647
|
-
end
|