picky 2.6.0 → 2.7.0
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/picky/analyzer.rb +4 -4
- data/lib/picky/application.rb +6 -7
- data/lib/picky/backend/{backend.rb → base.rb} +31 -14
- data/lib/picky/backend/file/basic.rb +12 -4
- data/lib/picky/backend/file/json.rb +5 -5
- data/lib/picky/backend/file/text.rb +1 -1
- data/lib/picky/backend/files.rb +3 -9
- data/lib/picky/backend/redis/basic.rb +8 -0
- data/lib/picky/backend/redis/list_hash.rb +5 -5
- data/lib/picky/backend/redis/string_hash.rb +5 -5
- data/lib/picky/backend/redis.rb +5 -5
- data/lib/picky/bundle.rb +62 -0
- data/lib/picky/categories.rb +10 -9
- data/lib/picky/categories_indexed.rb +12 -7
- data/lib/picky/categories_indexing.rb +7 -9
- data/lib/picky/category.rb +38 -26
- data/lib/picky/category_indexed.rb +4 -20
- data/lib/picky/category_indexing.rb +71 -68
- data/lib/picky/generators/base.rb +6 -6
- data/lib/picky/generators/partial/substring.rb +28 -26
- data/lib/picky/generators/partial_generator.rb +3 -3
- data/lib/picky/generators/similarity/phonetic.rb +5 -5
- data/lib/picky/generators/similarity_generator.rb +2 -2
- data/lib/picky/generators/weights/logarithmic.rb +3 -3
- data/lib/picky/generators/weights_generator.rb +2 -2
- data/lib/picky/index/base.rb +13 -10
- data/lib/picky/index/base_indexed.rb +2 -0
- data/lib/picky/index/base_indexing.rb +65 -57
- data/lib/picky/indexed/bundle/base.rb +21 -86
- data/lib/picky/indexed/bundle/memory.rb +5 -12
- data/lib/picky/indexed/bundle/redis.rb +42 -0
- data/lib/picky/indexed/wrappers/bundle/wrapper.rb +3 -3
- data/lib/picky/indexers/base.rb +20 -3
- data/lib/picky/indexers/parallel.rb +32 -14
- data/lib/picky/indexers/serial.rb +29 -26
- data/lib/picky/indexes.rb +5 -3
- data/lib/picky/indexes_indexed.rb +3 -15
- data/lib/picky/indexes_indexing.rb +18 -21
- data/lib/picky/indexing/bundle/base.rb +64 -45
- data/lib/picky/indexing/bundle/memory.rb +0 -4
- data/lib/picky/loader.rb +7 -6
- data/lib/picky/query/allocation.rb +3 -3
- data/lib/picky/query/token.rb +5 -1
- data/lib/picky/search.rb +5 -0
- data/lib/picky/sources/base.rb +21 -2
- data/lib/picky/sources/db.rb +0 -7
- data/lib/picky/statistics.rb +9 -12
- data/lib/picky/tokenizers/location.rb +1 -1
- data/lib/tasks/checks.rake +8 -6
- data/lib/tasks/index.rake +14 -20
- data/lib/tasks/server.rake +18 -2
- data/lib/tasks/statistics.rake +27 -14
- data/lib/tasks/todo.rake +2 -2
- data/lib/tasks/try.rake +12 -27
- data/spec/lib/application_spec.rb +1 -1
- data/spec/lib/backend/file/basic_spec.rb +6 -6
- data/spec/lib/backend/file/json_spec.rb +11 -6
- data/spec/lib/backend/file/marshal_spec.rb +11 -6
- data/spec/lib/backend/files_spec.rb +21 -7
- data/spec/lib/backend/redis/basic_spec.rb +6 -0
- data/spec/lib/backend/redis/list_hash_spec.rb +9 -3
- data/spec/lib/backend/redis/string_hash_spec.rb +7 -1
- data/spec/lib/backend/redis_spec.rb +22 -12
- data/spec/lib/categories_indexed_spec.rb +2 -2
- data/spec/lib/category_indexing_spec.rb +12 -33
- data/spec/lib/category_spec.rb +22 -0
- data/spec/lib/index/base_indexing_spec.rb +30 -0
- data/spec/lib/indexed/bundle/memory_spec.rb +13 -20
- data/spec/lib/indexers/base_spec.rb +39 -4
- data/spec/lib/indexers/parallel_spec.rb +2 -10
- data/spec/lib/indexers/serial_spec.rb +11 -26
- data/spec/lib/indexes_class_spec.rb +4 -4
- data/spec/lib/indexes_indexed_spec.rb +2 -2
- data/spec/lib/indexes_indexing_spec.rb +6 -10
- data/spec/lib/indexes_spec.rb +3 -3
- data/spec/lib/indexing/bundle/{super_base_spec.rb → base_spec.rb} +2 -2
- data/spec/lib/indexing/bundle/memory_partial_generation_speed_spec.rb +3 -3
- data/spec/lib/indexing/bundle/memory_spec.rb +16 -14
- data/spec/lib/indexing/bundle/redis_spec.rb +18 -16
- data/spec/lib/query/allocation_spec.rb +1 -1
- data/spec/lib/query/token_spec.rb +5 -7
- data/spec/lib/sources/base_spec.rb +53 -0
- data/spec/lib/sources/db_spec.rb +0 -7
- metadata +11 -12
- data/lib/picky/indexers/solr.rb +0 -56
- data/lib/picky/indexing/bundle/super_base.rb +0 -61
- data/lib/picky/solr/schema_generator.rb +0 -74
- data/lib/tasks/search.rake +0 -9
- data/lib/tasks/shortcuts.rake +0 -32
- data/lib/tasks/solr.rake +0 -36
metadata
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
name: picky
|
3
3
|
version: !ruby/object:Gem::Version
|
4
4
|
prerelease:
|
5
|
-
version: 2.
|
5
|
+
version: 2.7.0
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
8
8
|
- Florian Hanke
|
@@ -10,7 +10,7 @@ autorequire:
|
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
12
|
|
13
|
-
date: 2011-07-
|
13
|
+
date: 2011-07-07 00:00:00 +10:00
|
14
14
|
default_executable: picky
|
15
15
|
dependencies:
|
16
16
|
- !ruby/object:Gem::Dependency
|
@@ -32,7 +32,7 @@ dependencies:
|
|
32
32
|
requirements:
|
33
33
|
- - "="
|
34
34
|
- !ruby/object:Gem::Version
|
35
|
-
version: 2.
|
35
|
+
version: 2.7.0
|
36
36
|
type: :development
|
37
37
|
version_requirements: *id002
|
38
38
|
description: Fast Ruby semantic text search engine with comfortable single field interface.
|
@@ -54,7 +54,7 @@ files:
|
|
54
54
|
- lib/picky/adapters/rack.rb
|
55
55
|
- lib/picky/analyzer.rb
|
56
56
|
- lib/picky/application.rb
|
57
|
-
- lib/picky/backend/
|
57
|
+
- lib/picky/backend/base.rb
|
58
58
|
- lib/picky/backend/file/basic.rb
|
59
59
|
- lib/picky/backend/file/json.rb
|
60
60
|
- lib/picky/backend/file/marshal.rb
|
@@ -64,6 +64,7 @@ files:
|
|
64
64
|
- lib/picky/backend/redis/list_hash.rb
|
65
65
|
- lib/picky/backend/redis/string_hash.rb
|
66
66
|
- lib/picky/backend/redis.rb
|
67
|
+
- lib/picky/bundle.rb
|
67
68
|
- lib/picky/calculations/location.rb
|
68
69
|
- lib/picky/categories.rb
|
69
70
|
- lib/picky/categories_indexed.rb
|
@@ -119,14 +120,12 @@ files:
|
|
119
120
|
- lib/picky/indexers/base.rb
|
120
121
|
- lib/picky/indexers/parallel.rb
|
121
122
|
- lib/picky/indexers/serial.rb
|
122
|
-
- lib/picky/indexers/solr.rb
|
123
123
|
- lib/picky/indexes.rb
|
124
124
|
- lib/picky/indexes_indexed.rb
|
125
125
|
- lib/picky/indexes_indexing.rb
|
126
126
|
- lib/picky/indexing/bundle/base.rb
|
127
127
|
- lib/picky/indexing/bundle/memory.rb
|
128
128
|
- lib/picky/indexing/bundle/redis.rb
|
129
|
-
- lib/picky/indexing/bundle/super_base.rb
|
130
129
|
- lib/picky/indexing/wrappers/category/location.rb
|
131
130
|
- lib/picky/interfaces/live_parameters.rb
|
132
131
|
- lib/picky/loader.rb
|
@@ -150,7 +149,6 @@ files:
|
|
150
149
|
- lib/picky/results.rb
|
151
150
|
- lib/picky/search.rb
|
152
151
|
- lib/picky/signals.rb
|
153
|
-
- lib/picky/solr/schema_generator.rb
|
154
152
|
- lib/picky/sources/base.rb
|
155
153
|
- lib/picky/sources/couch.rb
|
156
154
|
- lib/picky/sources/csv.rb
|
@@ -171,10 +169,7 @@ files:
|
|
171
169
|
- lib/tasks/framework.rake
|
172
170
|
- lib/tasks/index.rake
|
173
171
|
- lib/tasks/routes.rake
|
174
|
-
- lib/tasks/search.rake
|
175
172
|
- lib/tasks/server.rake
|
176
|
-
- lib/tasks/shortcuts.rake
|
177
|
-
- lib/tasks/solr.rake
|
178
173
|
- lib/tasks/spec.rake
|
179
174
|
- lib/tasks/statistics.rake
|
180
175
|
- lib/tasks/todo.rake
|
@@ -201,6 +196,7 @@ files:
|
|
201
196
|
- spec/lib/categories_indexed_spec.rb
|
202
197
|
- spec/lib/category_indexed_spec.rb
|
203
198
|
- spec/lib/category_indexing_spec.rb
|
199
|
+
- spec/lib/category_spec.rb
|
204
200
|
- spec/lib/character_substituters/west_european_spec.rb
|
205
201
|
- spec/lib/cores_spec.rb
|
206
202
|
- spec/lib/extensions/array_spec.rb
|
@@ -240,10 +236,10 @@ files:
|
|
240
236
|
- spec/lib/indexes_indexed_spec.rb
|
241
237
|
- spec/lib/indexes_indexing_spec.rb
|
242
238
|
- spec/lib/indexes_spec.rb
|
239
|
+
- spec/lib/indexing/bundle/base_spec.rb
|
243
240
|
- spec/lib/indexing/bundle/memory_partial_generation_speed_spec.rb
|
244
241
|
- spec/lib/indexing/bundle/memory_spec.rb
|
245
242
|
- spec/lib/indexing/bundle/redis_spec.rb
|
246
|
-
- spec/lib/indexing/bundle/super_base_spec.rb
|
247
243
|
- spec/lib/interfaces/live_parameters_spec.rb
|
248
244
|
- spec/lib/loader_spec.rb
|
249
245
|
- spec/lib/loggers/search_spec.rb
|
@@ -263,6 +259,7 @@ files:
|
|
263
259
|
- spec/lib/results_spec.rb
|
264
260
|
- spec/lib/search_spec.rb
|
265
261
|
- spec/lib/solr/schema_generator_spec.rb
|
262
|
+
- spec/lib/sources/base_spec.rb
|
266
263
|
- spec/lib/sources/couch_spec.rb
|
267
264
|
- spec/lib/sources/csv_spec.rb
|
268
265
|
- spec/lib/sources/db_spec.rb
|
@@ -326,6 +323,7 @@ test_files:
|
|
326
323
|
- spec/lib/categories_indexed_spec.rb
|
327
324
|
- spec/lib/category_indexed_spec.rb
|
328
325
|
- spec/lib/category_indexing_spec.rb
|
326
|
+
- spec/lib/category_spec.rb
|
329
327
|
- spec/lib/character_substituters/west_european_spec.rb
|
330
328
|
- spec/lib/cores_spec.rb
|
331
329
|
- spec/lib/extensions/array_spec.rb
|
@@ -365,10 +363,10 @@ test_files:
|
|
365
363
|
- spec/lib/indexes_indexed_spec.rb
|
366
364
|
- spec/lib/indexes_indexing_spec.rb
|
367
365
|
- spec/lib/indexes_spec.rb
|
366
|
+
- spec/lib/indexing/bundle/base_spec.rb
|
368
367
|
- spec/lib/indexing/bundle/memory_partial_generation_speed_spec.rb
|
369
368
|
- spec/lib/indexing/bundle/memory_spec.rb
|
370
369
|
- spec/lib/indexing/bundle/redis_spec.rb
|
371
|
-
- spec/lib/indexing/bundle/super_base_spec.rb
|
372
370
|
- spec/lib/interfaces/live_parameters_spec.rb
|
373
371
|
- spec/lib/loader_spec.rb
|
374
372
|
- spec/lib/loggers/search_spec.rb
|
@@ -388,6 +386,7 @@ test_files:
|
|
388
386
|
- spec/lib/results_spec.rb
|
389
387
|
- spec/lib/search_spec.rb
|
390
388
|
- spec/lib/solr/schema_generator_spec.rb
|
389
|
+
- spec/lib/sources/base_spec.rb
|
391
390
|
- spec/lib/sources/couch_spec.rb
|
392
391
|
- spec/lib/sources/csv_spec.rb
|
393
392
|
- spec/lib/sources/db_spec.rb
|
data/lib/picky/indexers/solr.rb
DELETED
@@ -1,56 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
#
|
3
|
-
require 'rsolr'
|
4
|
-
|
5
|
-
module Indexers
|
6
|
-
|
7
|
-
# Deprecated. Only here as an example.
|
8
|
-
#
|
9
|
-
class Solr # :nodoc:
|
10
|
-
|
11
|
-
attr_reader :type, :fields, :solr
|
12
|
-
|
13
|
-
# Takes a Configuration::Type.
|
14
|
-
#
|
15
|
-
def initialize type
|
16
|
-
@type = type
|
17
|
-
@fields = type.solr_fields.map(&:name).map(&:to_sym)
|
18
|
-
@solr = RSolr.connect
|
19
|
-
end
|
20
|
-
|
21
|
-
def index
|
22
|
-
timed_exclaim "Indexing solr for #{type.name}:#{fields.join(', ')}"
|
23
|
-
statement = "SELECT indexed_id, #{fields.join(',')} FROM #{type.snapshot_table_name}"
|
24
|
-
|
25
|
-
DB.connect
|
26
|
-
results = DB.connection.execute statement
|
27
|
-
|
28
|
-
return unless results
|
29
|
-
|
30
|
-
type_name = @type.name.to_s
|
31
|
-
|
32
|
-
solr.delete_by_query "type:#{type_name}"
|
33
|
-
solr.commit
|
34
|
-
|
35
|
-
documents = []
|
36
|
-
|
37
|
-
results.each do |indexed_id, *values|
|
38
|
-
values.each &:downcase!
|
39
|
-
documents << hashed(values).merge(id: indexed_id, type: type_name)
|
40
|
-
end
|
41
|
-
|
42
|
-
solr.add documents
|
43
|
-
solr.commit
|
44
|
-
solr.optimize
|
45
|
-
end
|
46
|
-
|
47
|
-
def hashed values
|
48
|
-
result = {}
|
49
|
-
fields.zip(values).each do |field, value|
|
50
|
-
result[field] = value
|
51
|
-
end
|
52
|
-
result
|
53
|
-
end
|
54
|
-
|
55
|
-
end
|
56
|
-
end
|
@@ -1,61 +0,0 @@
|
|
1
|
-
# TODO Merge into Base, extract common with Indexed::Base.
|
2
|
-
#
|
3
|
-
module Indexing # :nodoc:all
|
4
|
-
# A Bundle is a number of indexes
|
5
|
-
# per [index, category] combination.
|
6
|
-
#
|
7
|
-
# At most, there are three indexes:
|
8
|
-
# * *core* index (always used)
|
9
|
-
# * *weights* index (always used)
|
10
|
-
# * *similarity* index (used with similarity)
|
11
|
-
#
|
12
|
-
# In Picky, indexing is separated from the index
|
13
|
-
# handling itself through a parallel structure.
|
14
|
-
#
|
15
|
-
# Both use methods provided by this base class, but
|
16
|
-
# have very different goals:
|
17
|
-
#
|
18
|
-
# * *Indexing*::*Bundle* is just concerned with creating index files
|
19
|
-
# and providing helper functions to e.g. check the indexes.
|
20
|
-
#
|
21
|
-
# * *Index*::*Bundle* is concerned with loading these index files into
|
22
|
-
# memory and looking up search data as fast as possible.
|
23
|
-
#
|
24
|
-
module Bundle
|
25
|
-
|
26
|
-
class SuperBase
|
27
|
-
|
28
|
-
attr_reader :identifier, :files
|
29
|
-
attr_accessor :index, :weights, :similarity, :configuration, :similarity_strategy
|
30
|
-
|
31
|
-
delegate :clear, :to => :index
|
32
|
-
delegate :[], :[]=, :to => :configuration
|
33
|
-
|
34
|
-
def initialize name, category, similarity_strategy
|
35
|
-
@identifier = "#{category.identifier}:#{name}"
|
36
|
-
@files = Backend::Files.new name, category
|
37
|
-
|
38
|
-
@index = {}
|
39
|
-
@weights = {}
|
40
|
-
@similarity = {}
|
41
|
-
@configuration = {} # A hash with config options.
|
42
|
-
|
43
|
-
@similarity_strategy = similarity_strategy
|
44
|
-
end
|
45
|
-
|
46
|
-
# Get a list of similar texts.
|
47
|
-
#
|
48
|
-
# Note: Does not return itself.
|
49
|
-
#
|
50
|
-
def similar text
|
51
|
-
code = similarity_strategy.encoded text
|
52
|
-
similar_codes = code && @similarity[code]
|
53
|
-
similar_codes.delete text if similar_codes
|
54
|
-
similar_codes || []
|
55
|
-
end
|
56
|
-
|
57
|
-
end
|
58
|
-
|
59
|
-
end
|
60
|
-
|
61
|
-
end
|
@@ -1,74 +0,0 @@
|
|
1
|
-
module Solr # :nodoc:all
|
2
|
-
|
3
|
-
class SchemaGenerator
|
4
|
-
|
5
|
-
attr_reader :types
|
6
|
-
|
7
|
-
# Takes an array of index type configs.
|
8
|
-
#
|
9
|
-
def initialize configuration
|
10
|
-
@types = configuration.types
|
11
|
-
end
|
12
|
-
|
13
|
-
#
|
14
|
-
#
|
15
|
-
def generate
|
16
|
-
generate_schema_for bound_field_names
|
17
|
-
end
|
18
|
-
|
19
|
-
# Returns a binding with the values needed for the schema xml.
|
20
|
-
#
|
21
|
-
def bound_field_names
|
22
|
-
field_names = combine_field_names
|
23
|
-
binding
|
24
|
-
end
|
25
|
-
|
26
|
-
# TODO
|
27
|
-
#
|
28
|
-
def combine_field_names
|
29
|
-
field_names = []
|
30
|
-
types.each do |type|
|
31
|
-
field_names += type.solr_fields.map(&:name)
|
32
|
-
end
|
33
|
-
field_names.uniq!
|
34
|
-
field_names
|
35
|
-
end
|
36
|
-
|
37
|
-
#
|
38
|
-
#
|
39
|
-
def generate_schema_for binding
|
40
|
-
template_text = read_template
|
41
|
-
result = evaluate_erb template_text, binding
|
42
|
-
write result
|
43
|
-
end
|
44
|
-
|
45
|
-
#
|
46
|
-
#
|
47
|
-
def evaluate_erb text, binding
|
48
|
-
require 'erb'
|
49
|
-
template = ERB.new text
|
50
|
-
template.result binding
|
51
|
-
end
|
52
|
-
|
53
|
-
#
|
54
|
-
#
|
55
|
-
def read_template
|
56
|
-
template_path = File.join PICKY_ROOT, 'solr', 'conf', 'schema.xml.erb'
|
57
|
-
schema = ''
|
58
|
-
File.open(template_path, 'r') do |file|
|
59
|
-
schema = file.read
|
60
|
-
end
|
61
|
-
schema
|
62
|
-
end
|
63
|
-
|
64
|
-
#
|
65
|
-
#
|
66
|
-
def write result
|
67
|
-
schema_path = File.join PICKY_ROOT, 'solr', 'conf', 'schema.xml'
|
68
|
-
File.open(schema_path, 'w') do |file|
|
69
|
-
file << result
|
70
|
-
end
|
71
|
-
end
|
72
|
-
|
73
|
-
end
|
74
|
-
end
|
data/lib/tasks/search.rake
DELETED
@@ -1,9 +0,0 @@
|
|
1
|
-
# Tasks for testing your engine configuration in the terminal.
|
2
|
-
#
|
3
|
-
desc 'Simple terminal search - pass it an URL to search on, e.g. /books.'
|
4
|
-
task :search do
|
5
|
-
puts <<-DEPRECATED
|
6
|
-
Deprecated. New usage:
|
7
|
-
picky search <URL> [<result id amount = 20>]
|
8
|
-
DEPRECATED
|
9
|
-
end
|
data/lib/tasks/shortcuts.rake
DELETED
@@ -1,32 +0,0 @@
|
|
1
|
-
# Shortcut tasks.
|
2
|
-
#
|
3
|
-
|
4
|
-
desc "Generate the index (random order)."
|
5
|
-
task :index => :application do
|
6
|
-
Rake::Task[:'index:randomly'].invoke
|
7
|
-
end
|
8
|
-
|
9
|
-
desc "Try the given text in the indexer/query (index and category optional)."
|
10
|
-
task :try, [:text, :index, :category] => :application do |_, options|
|
11
|
-
text, index, category = options.text, options.index, options.category
|
12
|
-
|
13
|
-
Rake::Task[:'try:both'].invoke text, index, category
|
14
|
-
end
|
15
|
-
|
16
|
-
desc "Application summary."
|
17
|
-
task :stats do
|
18
|
-
Rake::Task[:'stats:app'].invoke
|
19
|
-
end
|
20
|
-
desc "Analyze your indexes (needs rake index)."
|
21
|
-
task :analyze do
|
22
|
-
Rake::Task[:'stats:analyze'].invoke
|
23
|
-
end
|
24
|
-
|
25
|
-
desc "Start the server."
|
26
|
-
task :start do
|
27
|
-
Rake::Task[:'server:start'].invoke
|
28
|
-
end
|
29
|
-
desc "Stop the server."
|
30
|
-
task :stop do
|
31
|
-
Rake::Task[:'server:stop'].invoke
|
32
|
-
end
|
data/lib/tasks/solr.rake
DELETED
@@ -1,36 +0,0 @@
|
|
1
|
-
# sunspot-solr start --solr-home=solr --data-directory=index/development/solr --pid-dir=solr/pids --log-file=log/solr.log
|
2
|
-
|
3
|
-
namespace :solr do
|
4
|
-
|
5
|
-
namespace :schema do
|
6
|
-
task :generate => :application do
|
7
|
-
generator = Solr::SchemaGenerator.new Indexes.configuration
|
8
|
-
generator.generate
|
9
|
-
end
|
10
|
-
end
|
11
|
-
|
12
|
-
|
13
|
-
task :index => :application do
|
14
|
-
Rake::Task[:"solr:start"].invoke
|
15
|
-
sleep 3
|
16
|
-
Indexes.index_solr
|
17
|
-
end
|
18
|
-
|
19
|
-
|
20
|
-
def action name
|
21
|
-
`sunspot-solr #{name} --solr-home=solr --data-directory=index/#{PICKY_ENVIRONMENT}/solr --pid-dir=solr/pids --log-file=log/solr.log`
|
22
|
-
end
|
23
|
-
task :start => :application do
|
24
|
-
Rake::Task['solr:schema:generate'].invoke
|
25
|
-
action :start
|
26
|
-
end
|
27
|
-
task :stop => :application do
|
28
|
-
action :stop
|
29
|
-
end
|
30
|
-
task :restart => :application do
|
31
|
-
action :stop
|
32
|
-
sleep 2
|
33
|
-
action :start
|
34
|
-
end
|
35
|
-
|
36
|
-
end
|