picky 2.6.0 → 2.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/picky/analyzer.rb +4 -4
- data/lib/picky/application.rb +6 -7
- data/lib/picky/backend/{backend.rb → base.rb} +31 -14
- data/lib/picky/backend/file/basic.rb +12 -4
- data/lib/picky/backend/file/json.rb +5 -5
- data/lib/picky/backend/file/text.rb +1 -1
- data/lib/picky/backend/files.rb +3 -9
- data/lib/picky/backend/redis/basic.rb +8 -0
- data/lib/picky/backend/redis/list_hash.rb +5 -5
- data/lib/picky/backend/redis/string_hash.rb +5 -5
- data/lib/picky/backend/redis.rb +5 -5
- data/lib/picky/bundle.rb +62 -0
- data/lib/picky/categories.rb +10 -9
- data/lib/picky/categories_indexed.rb +12 -7
- data/lib/picky/categories_indexing.rb +7 -9
- data/lib/picky/category.rb +38 -26
- data/lib/picky/category_indexed.rb +4 -20
- data/lib/picky/category_indexing.rb +71 -68
- data/lib/picky/generators/base.rb +6 -6
- data/lib/picky/generators/partial/substring.rb +28 -26
- data/lib/picky/generators/partial_generator.rb +3 -3
- data/lib/picky/generators/similarity/phonetic.rb +5 -5
- data/lib/picky/generators/similarity_generator.rb +2 -2
- data/lib/picky/generators/weights/logarithmic.rb +3 -3
- data/lib/picky/generators/weights_generator.rb +2 -2
- data/lib/picky/index/base.rb +13 -10
- data/lib/picky/index/base_indexed.rb +2 -0
- data/lib/picky/index/base_indexing.rb +65 -57
- data/lib/picky/indexed/bundle/base.rb +21 -86
- data/lib/picky/indexed/bundle/memory.rb +5 -12
- data/lib/picky/indexed/bundle/redis.rb +42 -0
- data/lib/picky/indexed/wrappers/bundle/wrapper.rb +3 -3
- data/lib/picky/indexers/base.rb +20 -3
- data/lib/picky/indexers/parallel.rb +32 -14
- data/lib/picky/indexers/serial.rb +29 -26
- data/lib/picky/indexes.rb +5 -3
- data/lib/picky/indexes_indexed.rb +3 -15
- data/lib/picky/indexes_indexing.rb +18 -21
- data/lib/picky/indexing/bundle/base.rb +64 -45
- data/lib/picky/indexing/bundle/memory.rb +0 -4
- data/lib/picky/loader.rb +7 -6
- data/lib/picky/query/allocation.rb +3 -3
- data/lib/picky/query/token.rb +5 -1
- data/lib/picky/search.rb +5 -0
- data/lib/picky/sources/base.rb +21 -2
- data/lib/picky/sources/db.rb +0 -7
- data/lib/picky/statistics.rb +9 -12
- data/lib/picky/tokenizers/location.rb +1 -1
- data/lib/tasks/checks.rake +8 -6
- data/lib/tasks/index.rake +14 -20
- data/lib/tasks/server.rake +18 -2
- data/lib/tasks/statistics.rake +27 -14
- data/lib/tasks/todo.rake +2 -2
- data/lib/tasks/try.rake +12 -27
- data/spec/lib/application_spec.rb +1 -1
- data/spec/lib/backend/file/basic_spec.rb +6 -6
- data/spec/lib/backend/file/json_spec.rb +11 -6
- data/spec/lib/backend/file/marshal_spec.rb +11 -6
- data/spec/lib/backend/files_spec.rb +21 -7
- data/spec/lib/backend/redis/basic_spec.rb +6 -0
- data/spec/lib/backend/redis/list_hash_spec.rb +9 -3
- data/spec/lib/backend/redis/string_hash_spec.rb +7 -1
- data/spec/lib/backend/redis_spec.rb +22 -12
- data/spec/lib/categories_indexed_spec.rb +2 -2
- data/spec/lib/category_indexing_spec.rb +12 -33
- data/spec/lib/category_spec.rb +22 -0
- data/spec/lib/index/base_indexing_spec.rb +30 -0
- data/spec/lib/indexed/bundle/memory_spec.rb +13 -20
- data/spec/lib/indexers/base_spec.rb +39 -4
- data/spec/lib/indexers/parallel_spec.rb +2 -10
- data/spec/lib/indexers/serial_spec.rb +11 -26
- data/spec/lib/indexes_class_spec.rb +4 -4
- data/spec/lib/indexes_indexed_spec.rb +2 -2
- data/spec/lib/indexes_indexing_spec.rb +6 -10
- data/spec/lib/indexes_spec.rb +3 -3
- data/spec/lib/indexing/bundle/{super_base_spec.rb → base_spec.rb} +2 -2
- data/spec/lib/indexing/bundle/memory_partial_generation_speed_spec.rb +3 -3
- data/spec/lib/indexing/bundle/memory_spec.rb +16 -14
- data/spec/lib/indexing/bundle/redis_spec.rb +18 -16
- data/spec/lib/query/allocation_spec.rb +1 -1
- data/spec/lib/query/token_spec.rb +5 -7
- data/spec/lib/sources/base_spec.rb +53 -0
- data/spec/lib/sources/db_spec.rb +0 -7
- metadata +11 -12
- data/lib/picky/indexers/solr.rb +0 -56
- data/lib/picky/indexing/bundle/super_base.rb +0 -61
- data/lib/picky/solr/schema_generator.rb +0 -74
- data/lib/tasks/search.rake +0 -9
- data/lib/tasks/shortcuts.rake +0 -32
- data/lib/tasks/solr.rake +0 -36
metadata
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
name: picky
|
3
3
|
version: !ruby/object:Gem::Version
|
4
4
|
prerelease:
|
5
|
-
version: 2.
|
5
|
+
version: 2.7.0
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
8
8
|
- Florian Hanke
|
@@ -10,7 +10,7 @@ autorequire:
|
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
12
|
|
13
|
-
date: 2011-07-
|
13
|
+
date: 2011-07-07 00:00:00 +10:00
|
14
14
|
default_executable: picky
|
15
15
|
dependencies:
|
16
16
|
- !ruby/object:Gem::Dependency
|
@@ -32,7 +32,7 @@ dependencies:
|
|
32
32
|
requirements:
|
33
33
|
- - "="
|
34
34
|
- !ruby/object:Gem::Version
|
35
|
-
version: 2.
|
35
|
+
version: 2.7.0
|
36
36
|
type: :development
|
37
37
|
version_requirements: *id002
|
38
38
|
description: Fast Ruby semantic text search engine with comfortable single field interface.
|
@@ -54,7 +54,7 @@ files:
|
|
54
54
|
- lib/picky/adapters/rack.rb
|
55
55
|
- lib/picky/analyzer.rb
|
56
56
|
- lib/picky/application.rb
|
57
|
-
- lib/picky/backend/
|
57
|
+
- lib/picky/backend/base.rb
|
58
58
|
- lib/picky/backend/file/basic.rb
|
59
59
|
- lib/picky/backend/file/json.rb
|
60
60
|
- lib/picky/backend/file/marshal.rb
|
@@ -64,6 +64,7 @@ files:
|
|
64
64
|
- lib/picky/backend/redis/list_hash.rb
|
65
65
|
- lib/picky/backend/redis/string_hash.rb
|
66
66
|
- lib/picky/backend/redis.rb
|
67
|
+
- lib/picky/bundle.rb
|
67
68
|
- lib/picky/calculations/location.rb
|
68
69
|
- lib/picky/categories.rb
|
69
70
|
- lib/picky/categories_indexed.rb
|
@@ -119,14 +120,12 @@ files:
|
|
119
120
|
- lib/picky/indexers/base.rb
|
120
121
|
- lib/picky/indexers/parallel.rb
|
121
122
|
- lib/picky/indexers/serial.rb
|
122
|
-
- lib/picky/indexers/solr.rb
|
123
123
|
- lib/picky/indexes.rb
|
124
124
|
- lib/picky/indexes_indexed.rb
|
125
125
|
- lib/picky/indexes_indexing.rb
|
126
126
|
- lib/picky/indexing/bundle/base.rb
|
127
127
|
- lib/picky/indexing/bundle/memory.rb
|
128
128
|
- lib/picky/indexing/bundle/redis.rb
|
129
|
-
- lib/picky/indexing/bundle/super_base.rb
|
130
129
|
- lib/picky/indexing/wrappers/category/location.rb
|
131
130
|
- lib/picky/interfaces/live_parameters.rb
|
132
131
|
- lib/picky/loader.rb
|
@@ -150,7 +149,6 @@ files:
|
|
150
149
|
- lib/picky/results.rb
|
151
150
|
- lib/picky/search.rb
|
152
151
|
- lib/picky/signals.rb
|
153
|
-
- lib/picky/solr/schema_generator.rb
|
154
152
|
- lib/picky/sources/base.rb
|
155
153
|
- lib/picky/sources/couch.rb
|
156
154
|
- lib/picky/sources/csv.rb
|
@@ -171,10 +169,7 @@ files:
|
|
171
169
|
- lib/tasks/framework.rake
|
172
170
|
- lib/tasks/index.rake
|
173
171
|
- lib/tasks/routes.rake
|
174
|
-
- lib/tasks/search.rake
|
175
172
|
- lib/tasks/server.rake
|
176
|
-
- lib/tasks/shortcuts.rake
|
177
|
-
- lib/tasks/solr.rake
|
178
173
|
- lib/tasks/spec.rake
|
179
174
|
- lib/tasks/statistics.rake
|
180
175
|
- lib/tasks/todo.rake
|
@@ -201,6 +196,7 @@ files:
|
|
201
196
|
- spec/lib/categories_indexed_spec.rb
|
202
197
|
- spec/lib/category_indexed_spec.rb
|
203
198
|
- spec/lib/category_indexing_spec.rb
|
199
|
+
- spec/lib/category_spec.rb
|
204
200
|
- spec/lib/character_substituters/west_european_spec.rb
|
205
201
|
- spec/lib/cores_spec.rb
|
206
202
|
- spec/lib/extensions/array_spec.rb
|
@@ -240,10 +236,10 @@ files:
|
|
240
236
|
- spec/lib/indexes_indexed_spec.rb
|
241
237
|
- spec/lib/indexes_indexing_spec.rb
|
242
238
|
- spec/lib/indexes_spec.rb
|
239
|
+
- spec/lib/indexing/bundle/base_spec.rb
|
243
240
|
- spec/lib/indexing/bundle/memory_partial_generation_speed_spec.rb
|
244
241
|
- spec/lib/indexing/bundle/memory_spec.rb
|
245
242
|
- spec/lib/indexing/bundle/redis_spec.rb
|
246
|
-
- spec/lib/indexing/bundle/super_base_spec.rb
|
247
243
|
- spec/lib/interfaces/live_parameters_spec.rb
|
248
244
|
- spec/lib/loader_spec.rb
|
249
245
|
- spec/lib/loggers/search_spec.rb
|
@@ -263,6 +259,7 @@ files:
|
|
263
259
|
- spec/lib/results_spec.rb
|
264
260
|
- spec/lib/search_spec.rb
|
265
261
|
- spec/lib/solr/schema_generator_spec.rb
|
262
|
+
- spec/lib/sources/base_spec.rb
|
266
263
|
- spec/lib/sources/couch_spec.rb
|
267
264
|
- spec/lib/sources/csv_spec.rb
|
268
265
|
- spec/lib/sources/db_spec.rb
|
@@ -326,6 +323,7 @@ test_files:
|
|
326
323
|
- spec/lib/categories_indexed_spec.rb
|
327
324
|
- spec/lib/category_indexed_spec.rb
|
328
325
|
- spec/lib/category_indexing_spec.rb
|
326
|
+
- spec/lib/category_spec.rb
|
329
327
|
- spec/lib/character_substituters/west_european_spec.rb
|
330
328
|
- spec/lib/cores_spec.rb
|
331
329
|
- spec/lib/extensions/array_spec.rb
|
@@ -365,10 +363,10 @@ test_files:
|
|
365
363
|
- spec/lib/indexes_indexed_spec.rb
|
366
364
|
- spec/lib/indexes_indexing_spec.rb
|
367
365
|
- spec/lib/indexes_spec.rb
|
366
|
+
- spec/lib/indexing/bundle/base_spec.rb
|
368
367
|
- spec/lib/indexing/bundle/memory_partial_generation_speed_spec.rb
|
369
368
|
- spec/lib/indexing/bundle/memory_spec.rb
|
370
369
|
- spec/lib/indexing/bundle/redis_spec.rb
|
371
|
-
- spec/lib/indexing/bundle/super_base_spec.rb
|
372
370
|
- spec/lib/interfaces/live_parameters_spec.rb
|
373
371
|
- spec/lib/loader_spec.rb
|
374
372
|
- spec/lib/loggers/search_spec.rb
|
@@ -388,6 +386,7 @@ test_files:
|
|
388
386
|
- spec/lib/results_spec.rb
|
389
387
|
- spec/lib/search_spec.rb
|
390
388
|
- spec/lib/solr/schema_generator_spec.rb
|
389
|
+
- spec/lib/sources/base_spec.rb
|
391
390
|
- spec/lib/sources/couch_spec.rb
|
392
391
|
- spec/lib/sources/csv_spec.rb
|
393
392
|
- spec/lib/sources/db_spec.rb
|
data/lib/picky/indexers/solr.rb
DELETED
@@ -1,56 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
#
|
3
|
-
require 'rsolr'
|
4
|
-
|
5
|
-
module Indexers
|
6
|
-
|
7
|
-
# Deprecated. Only here as an example.
|
8
|
-
#
|
9
|
-
class Solr # :nodoc:
|
10
|
-
|
11
|
-
attr_reader :type, :fields, :solr
|
12
|
-
|
13
|
-
# Takes a Configuration::Type.
|
14
|
-
#
|
15
|
-
def initialize type
|
16
|
-
@type = type
|
17
|
-
@fields = type.solr_fields.map(&:name).map(&:to_sym)
|
18
|
-
@solr = RSolr.connect
|
19
|
-
end
|
20
|
-
|
21
|
-
def index
|
22
|
-
timed_exclaim "Indexing solr for #{type.name}:#{fields.join(', ')}"
|
23
|
-
statement = "SELECT indexed_id, #{fields.join(',')} FROM #{type.snapshot_table_name}"
|
24
|
-
|
25
|
-
DB.connect
|
26
|
-
results = DB.connection.execute statement
|
27
|
-
|
28
|
-
return unless results
|
29
|
-
|
30
|
-
type_name = @type.name.to_s
|
31
|
-
|
32
|
-
solr.delete_by_query "type:#{type_name}"
|
33
|
-
solr.commit
|
34
|
-
|
35
|
-
documents = []
|
36
|
-
|
37
|
-
results.each do |indexed_id, *values|
|
38
|
-
values.each &:downcase!
|
39
|
-
documents << hashed(values).merge(id: indexed_id, type: type_name)
|
40
|
-
end
|
41
|
-
|
42
|
-
solr.add documents
|
43
|
-
solr.commit
|
44
|
-
solr.optimize
|
45
|
-
end
|
46
|
-
|
47
|
-
def hashed values
|
48
|
-
result = {}
|
49
|
-
fields.zip(values).each do |field, value|
|
50
|
-
result[field] = value
|
51
|
-
end
|
52
|
-
result
|
53
|
-
end
|
54
|
-
|
55
|
-
end
|
56
|
-
end
|
@@ -1,61 +0,0 @@
|
|
1
|
-
# TODO Merge into Base, extract common with Indexed::Base.
|
2
|
-
#
|
3
|
-
module Indexing # :nodoc:all
|
4
|
-
# A Bundle is a number of indexes
|
5
|
-
# per [index, category] combination.
|
6
|
-
#
|
7
|
-
# At most, there are three indexes:
|
8
|
-
# * *core* index (always used)
|
9
|
-
# * *weights* index (always used)
|
10
|
-
# * *similarity* index (used with similarity)
|
11
|
-
#
|
12
|
-
# In Picky, indexing is separated from the index
|
13
|
-
# handling itself through a parallel structure.
|
14
|
-
#
|
15
|
-
# Both use methods provided by this base class, but
|
16
|
-
# have very different goals:
|
17
|
-
#
|
18
|
-
# * *Indexing*::*Bundle* is just concerned with creating index files
|
19
|
-
# and providing helper functions to e.g. check the indexes.
|
20
|
-
#
|
21
|
-
# * *Index*::*Bundle* is concerned with loading these index files into
|
22
|
-
# memory and looking up search data as fast as possible.
|
23
|
-
#
|
24
|
-
module Bundle
|
25
|
-
|
26
|
-
class SuperBase
|
27
|
-
|
28
|
-
attr_reader :identifier, :files
|
29
|
-
attr_accessor :index, :weights, :similarity, :configuration, :similarity_strategy
|
30
|
-
|
31
|
-
delegate :clear, :to => :index
|
32
|
-
delegate :[], :[]=, :to => :configuration
|
33
|
-
|
34
|
-
def initialize name, category, similarity_strategy
|
35
|
-
@identifier = "#{category.identifier}:#{name}"
|
36
|
-
@files = Backend::Files.new name, category
|
37
|
-
|
38
|
-
@index = {}
|
39
|
-
@weights = {}
|
40
|
-
@similarity = {}
|
41
|
-
@configuration = {} # A hash with config options.
|
42
|
-
|
43
|
-
@similarity_strategy = similarity_strategy
|
44
|
-
end
|
45
|
-
|
46
|
-
# Get a list of similar texts.
|
47
|
-
#
|
48
|
-
# Note: Does not return itself.
|
49
|
-
#
|
50
|
-
def similar text
|
51
|
-
code = similarity_strategy.encoded text
|
52
|
-
similar_codes = code && @similarity[code]
|
53
|
-
similar_codes.delete text if similar_codes
|
54
|
-
similar_codes || []
|
55
|
-
end
|
56
|
-
|
57
|
-
end
|
58
|
-
|
59
|
-
end
|
60
|
-
|
61
|
-
end
|
@@ -1,74 +0,0 @@
|
|
1
|
-
module Solr # :nodoc:all
|
2
|
-
|
3
|
-
class SchemaGenerator
|
4
|
-
|
5
|
-
attr_reader :types
|
6
|
-
|
7
|
-
# Takes an array of index type configs.
|
8
|
-
#
|
9
|
-
def initialize configuration
|
10
|
-
@types = configuration.types
|
11
|
-
end
|
12
|
-
|
13
|
-
#
|
14
|
-
#
|
15
|
-
def generate
|
16
|
-
generate_schema_for bound_field_names
|
17
|
-
end
|
18
|
-
|
19
|
-
# Returns a binding with the values needed for the schema xml.
|
20
|
-
#
|
21
|
-
def bound_field_names
|
22
|
-
field_names = combine_field_names
|
23
|
-
binding
|
24
|
-
end
|
25
|
-
|
26
|
-
# TODO
|
27
|
-
#
|
28
|
-
def combine_field_names
|
29
|
-
field_names = []
|
30
|
-
types.each do |type|
|
31
|
-
field_names += type.solr_fields.map(&:name)
|
32
|
-
end
|
33
|
-
field_names.uniq!
|
34
|
-
field_names
|
35
|
-
end
|
36
|
-
|
37
|
-
#
|
38
|
-
#
|
39
|
-
def generate_schema_for binding
|
40
|
-
template_text = read_template
|
41
|
-
result = evaluate_erb template_text, binding
|
42
|
-
write result
|
43
|
-
end
|
44
|
-
|
45
|
-
#
|
46
|
-
#
|
47
|
-
def evaluate_erb text, binding
|
48
|
-
require 'erb'
|
49
|
-
template = ERB.new text
|
50
|
-
template.result binding
|
51
|
-
end
|
52
|
-
|
53
|
-
#
|
54
|
-
#
|
55
|
-
def read_template
|
56
|
-
template_path = File.join PICKY_ROOT, 'solr', 'conf', 'schema.xml.erb'
|
57
|
-
schema = ''
|
58
|
-
File.open(template_path, 'r') do |file|
|
59
|
-
schema = file.read
|
60
|
-
end
|
61
|
-
schema
|
62
|
-
end
|
63
|
-
|
64
|
-
#
|
65
|
-
#
|
66
|
-
def write result
|
67
|
-
schema_path = File.join PICKY_ROOT, 'solr', 'conf', 'schema.xml'
|
68
|
-
File.open(schema_path, 'w') do |file|
|
69
|
-
file << result
|
70
|
-
end
|
71
|
-
end
|
72
|
-
|
73
|
-
end
|
74
|
-
end
|
data/lib/tasks/search.rake
DELETED
@@ -1,9 +0,0 @@
|
|
1
|
-
# Tasks for testing your engine configuration in the terminal.
|
2
|
-
#
|
3
|
-
desc 'Simple terminal search - pass it an URL to search on, e.g. /books.'
|
4
|
-
task :search do
|
5
|
-
puts <<-DEPRECATED
|
6
|
-
Deprecated. New usage:
|
7
|
-
picky search <URL> [<result id amount = 20>]
|
8
|
-
DEPRECATED
|
9
|
-
end
|
data/lib/tasks/shortcuts.rake
DELETED
@@ -1,32 +0,0 @@
|
|
1
|
-
# Shortcut tasks.
|
2
|
-
#
|
3
|
-
|
4
|
-
desc "Generate the index (random order)."
|
5
|
-
task :index => :application do
|
6
|
-
Rake::Task[:'index:randomly'].invoke
|
7
|
-
end
|
8
|
-
|
9
|
-
desc "Try the given text in the indexer/query (index and category optional)."
|
10
|
-
task :try, [:text, :index, :category] => :application do |_, options|
|
11
|
-
text, index, category = options.text, options.index, options.category
|
12
|
-
|
13
|
-
Rake::Task[:'try:both'].invoke text, index, category
|
14
|
-
end
|
15
|
-
|
16
|
-
desc "Application summary."
|
17
|
-
task :stats do
|
18
|
-
Rake::Task[:'stats:app'].invoke
|
19
|
-
end
|
20
|
-
desc "Analyze your indexes (needs rake index)."
|
21
|
-
task :analyze do
|
22
|
-
Rake::Task[:'stats:analyze'].invoke
|
23
|
-
end
|
24
|
-
|
25
|
-
desc "Start the server."
|
26
|
-
task :start do
|
27
|
-
Rake::Task[:'server:start'].invoke
|
28
|
-
end
|
29
|
-
desc "Stop the server."
|
30
|
-
task :stop do
|
31
|
-
Rake::Task[:'server:stop'].invoke
|
32
|
-
end
|
data/lib/tasks/solr.rake
DELETED
@@ -1,36 +0,0 @@
|
|
1
|
-
# sunspot-solr start --solr-home=solr --data-directory=index/development/solr --pid-dir=solr/pids --log-file=log/solr.log
|
2
|
-
|
3
|
-
namespace :solr do
|
4
|
-
|
5
|
-
namespace :schema do
|
6
|
-
task :generate => :application do
|
7
|
-
generator = Solr::SchemaGenerator.new Indexes.configuration
|
8
|
-
generator.generate
|
9
|
-
end
|
10
|
-
end
|
11
|
-
|
12
|
-
|
13
|
-
task :index => :application do
|
14
|
-
Rake::Task[:"solr:start"].invoke
|
15
|
-
sleep 3
|
16
|
-
Indexes.index_solr
|
17
|
-
end
|
18
|
-
|
19
|
-
|
20
|
-
def action name
|
21
|
-
`sunspot-solr #{name} --solr-home=solr --data-directory=index/#{PICKY_ENVIRONMENT}/solr --pid-dir=solr/pids --log-file=log/solr.log`
|
22
|
-
end
|
23
|
-
task :start => :application do
|
24
|
-
Rake::Task['solr:schema:generate'].invoke
|
25
|
-
action :start
|
26
|
-
end
|
27
|
-
task :stop => :application do
|
28
|
-
action :stop
|
29
|
-
end
|
30
|
-
task :restart => :application do
|
31
|
-
action :stop
|
32
|
-
sleep 2
|
33
|
-
action :start
|
34
|
-
end
|
35
|
-
|
36
|
-
end
|