picky 4.0.0pre1 → 4.0.0pre2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/aux/picky/cli.rb +6 -2
- data/lib/picky.rb +10 -8
- data/lib/picky/backends/backend.rb +37 -0
- data/lib/picky/backends/file.rb +0 -20
- data/lib/picky/backends/memory.rb +0 -29
- data/lib/picky/backends/redis.rb +74 -15
- data/lib/picky/backends/redis/list.rb +1 -1
- data/lib/picky/backends/sqlite.rb +0 -27
- data/lib/picky/bundle.rb +2 -2
- data/lib/picky/bundle_indexed.rb +1 -1
- data/lib/picky/bundle_indexing.rb +1 -1
- data/lib/picky/categories_indexed.rb +1 -11
- data/lib/picky/category.rb +4 -4
- data/lib/picky/category/location.rb +25 -0
- data/lib/picky/category_realtime.rb +4 -3
- data/lib/picky/console.rb +1 -1
- data/lib/picky/constants.rb +1 -1
- data/lib/picky/ext/maybe_compile.rb +2 -2
- data/lib/picky/extensions/object.rb +3 -2
- data/lib/picky/generators/aliases.rb +7 -2
- data/lib/picky/generators/partial/default.rb +1 -0
- data/lib/picky/generators/similarity/default.rb +1 -0
- data/lib/picky/generators/similarity/phonetic.rb +13 -2
- data/lib/picky/generators/strategy.rb +0 -2
- data/lib/picky/generators/weights/constant.rb +1 -2
- data/lib/picky/generators/weights/default.rb +1 -0
- data/lib/picky/generators/weights/dynamic.rb +1 -1
- data/lib/picky/generators/weights/logarithmic.rb +1 -1
- data/lib/picky/generators/weights/{runtime.rb → stub.rb} +1 -3
- data/lib/picky/index.rb +3 -3
- data/lib/picky/index_indexing.rb +0 -2
- data/lib/picky/index_realtime.rb +1 -1
- data/lib/picky/indexers/base.rb +7 -0
- data/lib/picky/indexers/parallel.rb +2 -4
- data/lib/picky/indexers/serial.rb +2 -0
- data/lib/picky/indexes_indexing.rb +1 -1
- data/lib/picky/interfaces/live_parameters/master_child.rb +175 -0
- data/lib/picky/interfaces/live_parameters/unicorn.rb +37 -0
- data/lib/picky/loader.rb +238 -259
- data/lib/picky/query/allocation.rb +19 -10
- data/lib/picky/query/combination.rb +7 -1
- data/lib/picky/query/combinations.rb +1 -6
- data/lib/picky/query/token.rb +26 -36
- data/lib/picky/results.rb +18 -17
- data/lib/picky/scheduler.rb +2 -1
- data/lib/picky/search.rb +1 -1
- data/lib/picky/sinatra.rb +6 -6
- data/lib/picky/statistics.rb +2 -0
- data/lib/picky/tokenizer.rb +8 -8
- data/lib/picky/wrappers/bundle/calculation.rb +4 -4
- data/lib/picky/wrappers/bundle/location.rb +1 -2
- data/lib/tasks/framework.rake +1 -1
- data/lib/tasks/statistics.rake +1 -1
- data/lib/tasks/try.rake +1 -1
- data/lib/tasks/try.rb +1 -1
- data/spec/aux/picky/cli_spec.rb +12 -12
- data/spec/ext/performant_spec.rb +16 -16
- data/spec/functional/backends/file_spec.rb +78 -7
- data/spec/functional/backends/memory_spec.rb +78 -7
- data/spec/functional/backends/redis_spec.rb +73 -13
- data/spec/functional/dynamic_weights_spec.rb +3 -4
- data/spec/functional/realtime_spec.rb +2 -2
- data/spec/functional/speed_spec.rb +2 -2
- data/spec/functional/terminate_early_spec.rb +3 -3
- data/spec/lib/analytics_spec.rb +1 -1
- data/spec/lib/analyzer_spec.rb +5 -3
- data/spec/lib/categories_indexed_spec.rb +38 -20
- data/spec/lib/category/location_spec.rb +30 -0
- data/spec/lib/character_substituters/west_european_spec.rb +1 -0
- data/spec/lib/extensions/hash_spec.rb +6 -5
- data/spec/lib/extensions/module_spec.rb +6 -6
- data/spec/lib/extensions/object_spec.rb +9 -8
- data/spec/lib/extensions/string_spec.rb +1 -1
- data/spec/lib/generators/similarity/phonetic_spec.rb +11 -0
- data/spec/lib/index_realtime_spec.rb +5 -5
- data/spec/lib/interfaces/{live_parameters_spec.rb → live_parameters/master_child_spec.rb} +26 -26
- data/spec/lib/interfaces/live_parameters/unicorn_spec.rb +160 -0
- data/spec/lib/loader_spec.rb +65 -25
- data/spec/lib/query/allocation_spec.rb +25 -22
- data/spec/lib/query/combinations_spec.rb +13 -36
- data/spec/lib/query/token_spec.rb +144 -131
- data/spec/lib/query/tokens_spec.rb +14 -0
- data/spec/lib/results_spec.rb +14 -8
- data/spec/lib/search_spec.rb +1 -1
- data/spec/lib/sinatra_spec.rb +8 -8
- metadata +28 -91
- data/lib/picky/adapters/rack.rb +0 -34
- data/lib/picky/adapters/rack/base.rb +0 -27
- data/lib/picky/adapters/rack/live_parameters.rb +0 -37
- data/lib/picky/adapters/rack/search.rb +0 -67
- data/lib/picky/application.rb +0 -268
- data/lib/picky/frontend_adapters/rack.rb +0 -161
- data/lib/picky/interfaces/live_parameters.rb +0 -187
- data/lib/picky/sources/base.rb +0 -92
- data/lib/picky/sources/couch.rb +0 -76
- data/lib/picky/sources/csv.rb +0 -83
- data/lib/picky/sources/db.rb +0 -189
- data/lib/picky/sources/delicious.rb +0 -63
- data/lib/picky/sources/mongo.rb +0 -80
- data/lib/picky/wrappers/category/location.rb +0 -38
- data/lib/tasks/routes.rake +0 -8
- data/spec/lib/adapters/rack/base_spec.rb +0 -24
- data/spec/lib/adapters/rack/live_parameters_spec.rb +0 -26
- data/spec/lib/adapters/rack/query_spec.rb +0 -39
- data/spec/lib/application_spec.rb +0 -155
- data/spec/lib/frontend_adapters/rack_spec.rb +0 -294
- data/spec/lib/sources/base_spec.rb +0 -53
- data/spec/lib/sources/couch_spec.rb +0 -114
- data/spec/lib/sources/csv_spec.rb +0 -89
- data/spec/lib/sources/db_spec.rb +0 -125
- data/spec/lib/sources/delicious_spec.rb +0 -94
- data/spec/lib/sources/mongo_spec.rb +0 -50
data/aux/picky/cli.rb
CHANGED
|
@@ -38,7 +38,11 @@ module Picky
|
|
|
38
38
|
def execute name, args, params
|
|
39
39
|
commands = Picky::CLI.mapping.map do |command, object_and_params|
|
|
40
40
|
_, *params = object_and_params
|
|
41
|
-
|
|
41
|
+
ary = []
|
|
42
|
+
ary << " picky"
|
|
43
|
+
ary << command
|
|
44
|
+
ary << params_to_s(params) unless params.empty?
|
|
45
|
+
ary.join ' '
|
|
42
46
|
end.join(?\n)
|
|
43
47
|
|
|
44
48
|
Kernel.puts "Possible commands:\n#{commands}\n"
|
|
@@ -103,7 +107,7 @@ module Picky
|
|
|
103
107
|
# TODO Try to load the other gems and get the commands dynamically.
|
|
104
108
|
#
|
|
105
109
|
@@mapping = {
|
|
106
|
-
:generate => [Generate, :'{client,server,
|
|
110
|
+
:generate => [Generate, :'{client,server,all_in_one}', :'app_directory_name'],
|
|
107
111
|
:help => [Help],
|
|
108
112
|
:live => [Live, 'host:port/path (default: localhost:8080/admin)', 'port (default: 4568)'],
|
|
109
113
|
:search => [Search, :url_or_path, 'amount of ids (default 20)'],
|
data/lib/picky.rb
CHANGED
|
@@ -2,20 +2,22 @@ module Picky
|
|
|
2
2
|
|
|
3
3
|
# External libraries.
|
|
4
4
|
#
|
|
5
|
-
require 'active_support/core_ext'
|
|
6
|
-
require '
|
|
7
|
-
require '
|
|
8
|
-
require '
|
|
9
|
-
require '
|
|
5
|
+
require 'active_support/core_ext/module/delegation'
|
|
6
|
+
require 'active_support/core_ext/logger'
|
|
7
|
+
require 'active_support/core_ext/object/blank'
|
|
8
|
+
require 'active_support/core_ext/enumerable'
|
|
9
|
+
require 'active_support/multibyte'
|
|
10
|
+
require 'yajl'
|
|
10
11
|
require 'procrastinate'
|
|
12
|
+
require 'rack_fast_escape' if defined? Rack
|
|
11
13
|
|
|
12
14
|
# Require the constants.
|
|
13
15
|
#
|
|
14
|
-
|
|
16
|
+
require_relative 'picky/constants'
|
|
15
17
|
|
|
16
18
|
# Loader which handles framework and app loading.
|
|
17
19
|
#
|
|
18
|
-
|
|
20
|
+
require_relative 'picky/loader'
|
|
19
21
|
|
|
20
22
|
# Load the framework
|
|
21
23
|
#
|
|
@@ -23,7 +25,7 @@ module Picky
|
|
|
23
25
|
|
|
24
26
|
# Check if delegators need to be installed.
|
|
25
27
|
#
|
|
26
|
-
|
|
28
|
+
require_relative 'picky/sinatra'
|
|
27
29
|
|
|
28
30
|
# This is only used in the classic project style.
|
|
29
31
|
#
|
|
@@ -22,6 +22,43 @@ module Picky
|
|
|
22
22
|
thing && (thing.respond_to?(:call) && thing.call(*args) || thing)
|
|
23
23
|
end
|
|
24
24
|
|
|
25
|
+
# Returns the total score of the combinations.
|
|
26
|
+
#
|
|
27
|
+
# Default implementation. Override to speed up.
|
|
28
|
+
#
|
|
29
|
+
def weight combinations
|
|
30
|
+
combinations.score
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
# Returns the result ids for the allocation.
|
|
34
|
+
#
|
|
35
|
+
# Sorts the ids by size and & through them in the following order (sizes):
|
|
36
|
+
# 0. [100_000, 400, 30, 2]
|
|
37
|
+
# 1. [2, 30, 400, 100_000]
|
|
38
|
+
# 2. (100_000 & (400 & (30 & 2))) # => result
|
|
39
|
+
#
|
|
40
|
+
# Note: Uses a C-optimized intersection routine (in performant.c)
|
|
41
|
+
# for speed and memory efficiency.
|
|
42
|
+
#
|
|
43
|
+
# Note: In the memory based version we ignore the amount and
|
|
44
|
+
# offset hints.
|
|
45
|
+
# We cannot use the information to speed up the algorithm,
|
|
46
|
+
# unfortunately.
|
|
47
|
+
#
|
|
48
|
+
def ids combinations, _, _
|
|
49
|
+
# Get the ids for each combination.
|
|
50
|
+
#
|
|
51
|
+
id_arrays = combinations.inject([]) do |total, combination|
|
|
52
|
+
total << combination.ids
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
# Call the optimized C algorithm.
|
|
56
|
+
#
|
|
57
|
+
# Note: It orders the passed arrays by size.
|
|
58
|
+
#
|
|
59
|
+
Performant::Array.memory_efficient_intersect id_arrays
|
|
60
|
+
end
|
|
61
|
+
|
|
25
62
|
#
|
|
26
63
|
#
|
|
27
64
|
def to_s
|
data/lib/picky/backends/file.rb
CHANGED
|
@@ -46,26 +46,6 @@ module Picky
|
|
|
46
46
|
JSON.new(bundle.index_path(:realtime))
|
|
47
47
|
end
|
|
48
48
|
|
|
49
|
-
# Currently, the loaded ids are intersected using
|
|
50
|
-
# the fast C-based intersection.
|
|
51
|
-
#
|
|
52
|
-
# However, if we could come up with a clever way
|
|
53
|
-
# to do this faster, it would be most welcome.
|
|
54
|
-
#
|
|
55
|
-
def ids combinations, _, _
|
|
56
|
-
# Get the ids for each combination.
|
|
57
|
-
#
|
|
58
|
-
id_arrays = combinations.inject([]) do |total, combination|
|
|
59
|
-
total << combination.ids
|
|
60
|
-
end
|
|
61
|
-
|
|
62
|
-
# Call the optimized C algorithm.
|
|
63
|
-
#
|
|
64
|
-
# Note: It orders the passed arrays by size.
|
|
65
|
-
#
|
|
66
|
-
Performant::Array.memory_efficient_intersect id_arrays
|
|
67
|
-
end
|
|
68
|
-
|
|
69
49
|
end
|
|
70
50
|
|
|
71
51
|
end
|
|
@@ -45,35 +45,6 @@ module Picky
|
|
|
45
45
|
JSON.new(bundle.index_path(:realtime))
|
|
46
46
|
end
|
|
47
47
|
|
|
48
|
-
# Returns the result ids for the allocation.
|
|
49
|
-
#
|
|
50
|
-
# Sorts the ids by size and & through them in the following order (sizes):
|
|
51
|
-
# 0. [100_000, 400, 30, 2]
|
|
52
|
-
# 1. [2, 30, 400, 100_000]
|
|
53
|
-
# 2. (100_000 & (400 & (30 & 2))) # => result
|
|
54
|
-
#
|
|
55
|
-
# Note: Uses a C-optimized intersection routine (in performant.c)
|
|
56
|
-
# for speed and memory efficiency.
|
|
57
|
-
#
|
|
58
|
-
# Note: In the memory based version we ignore the amount and
|
|
59
|
-
# offset hints.
|
|
60
|
-
# We cannot use the information to speed up the algorithm,
|
|
61
|
-
# unfortunately.
|
|
62
|
-
#
|
|
63
|
-
def ids combinations, _, _
|
|
64
|
-
# Get the ids for each combination.
|
|
65
|
-
#
|
|
66
|
-
id_arrays = combinations.inject([]) do |total, combination|
|
|
67
|
-
total << combination.ids
|
|
68
|
-
end
|
|
69
|
-
|
|
70
|
-
# Call the optimized C algorithm.
|
|
71
|
-
#
|
|
72
|
-
# Note: It orders the passed arrays by size.
|
|
73
|
-
#
|
|
74
|
-
Performant::Array.memory_efficient_intersect id_arrays
|
|
75
|
-
end
|
|
76
|
-
|
|
77
48
|
end
|
|
78
49
|
|
|
79
50
|
end
|
data/lib/picky/backends/redis.rb
CHANGED
|
@@ -12,9 +12,25 @@ module Picky
|
|
|
12
12
|
def initialize options = {}
|
|
13
13
|
super options
|
|
14
14
|
|
|
15
|
-
|
|
15
|
+
maybe_load_hiredis
|
|
16
|
+
check_hiredis_gem
|
|
17
|
+
check_redis_gem
|
|
18
|
+
|
|
16
19
|
@client = options[:client] || ::Redis.new(:db => (options[:db] || 15))
|
|
17
20
|
@immediate = options[:immediate]
|
|
21
|
+
end
|
|
22
|
+
def maybe_load_hiredis
|
|
23
|
+
require 'hiredis'
|
|
24
|
+
rescue LoadError
|
|
25
|
+
# It's ok.
|
|
26
|
+
end
|
|
27
|
+
def check_hiredis_gem
|
|
28
|
+
require 'redis/connection/hiredis'
|
|
29
|
+
rescue LoadError
|
|
30
|
+
# It's ok, the next check will fail if this one does.
|
|
31
|
+
end
|
|
32
|
+
def check_redis_gem
|
|
33
|
+
require 'redis'
|
|
18
34
|
rescue LoadError => e
|
|
19
35
|
warn_gem_missing 'redis', 'the Redis client'
|
|
20
36
|
end
|
|
@@ -24,28 +40,28 @@ module Picky
|
|
|
24
40
|
#
|
|
25
41
|
def create_inverted bundle
|
|
26
42
|
extract_lambda_or(inverted, bundle, client) ||
|
|
27
|
-
List.new(client, "#{bundle.identifier}:inverted", immediate: immediate)
|
|
43
|
+
List.new(client, "#{PICKY_ENVIRONMENT}:#{bundle.identifier}:inverted", immediate: immediate)
|
|
28
44
|
end
|
|
29
45
|
# Returns an object that on #initial, #load returns an object that responds to:
|
|
30
46
|
# [:token] # => 1.23 (a weight)
|
|
31
47
|
#
|
|
32
48
|
def create_weights bundle
|
|
33
49
|
extract_lambda_or(weights, bundle, client) ||
|
|
34
|
-
Float.new(client, "#{bundle.identifier}:weights", immediate: immediate)
|
|
50
|
+
Float.new(client, "#{PICKY_ENVIRONMENT}:#{bundle.identifier}:weights", immediate: immediate)
|
|
35
51
|
end
|
|
36
52
|
# Returns an object that on #initial, #load returns an object that responds to:
|
|
37
53
|
# [:encoded] # => [:original, :original] (an array of original symbols this similarity encoded thing maps to)
|
|
38
54
|
#
|
|
39
55
|
def create_similarity bundle
|
|
40
56
|
extract_lambda_or(similarity, bundle, client) ||
|
|
41
|
-
List.new(client, "#{bundle.identifier}:similarity", immediate: immediate)
|
|
57
|
+
List.new(client, "#{PICKY_ENVIRONMENT}:#{bundle.identifier}:similarity", immediate: immediate)
|
|
42
58
|
end
|
|
43
59
|
# Returns an object that on #initial, #load returns an object that responds to:
|
|
44
60
|
# [:key] # => value (a value for this config key)
|
|
45
61
|
#
|
|
46
62
|
def create_configuration bundle
|
|
47
63
|
extract_lambda_or(configuration, bundle, client) ||
|
|
48
|
-
String.new(client, "#{bundle.identifier}:configuration", immediate: immediate)
|
|
64
|
+
String.new(client, "#{PICKY_ENVIRONMENT}:#{bundle.identifier}:configuration", immediate: immediate)
|
|
49
65
|
end
|
|
50
66
|
# Returns an object that on #initial, #load returns an object that responds to:
|
|
51
67
|
# [id] # => [:sym1, :sym2]
|
|
@@ -89,6 +105,54 @@ module Picky
|
|
|
89
105
|
version_string.split('.').map &:to_i
|
|
90
106
|
end
|
|
91
107
|
|
|
108
|
+
# Returns the total weight for the combinations.
|
|
109
|
+
#
|
|
110
|
+
def weight combinations
|
|
111
|
+
# Note: A nice experiment that generated far too many strings.
|
|
112
|
+
#
|
|
113
|
+
# if redis_with_scripting?
|
|
114
|
+
# @@weight_script = "local sum = 0; for i=1,#(KEYS),2 do local value = redis.call('hget', KEYS[i], KEYS[i+1]); if value then sum = sum + value end end return sum;"
|
|
115
|
+
#
|
|
116
|
+
# require 'digest/sha1'
|
|
117
|
+
# @@weight_sent_once = nil
|
|
118
|
+
#
|
|
119
|
+
# # Scripting version of #ids.
|
|
120
|
+
# #
|
|
121
|
+
# class << self
|
|
122
|
+
# def weight combinations
|
|
123
|
+
# namespaces_keys = combinations.inject([]) do |namespaces_keys, combination|
|
|
124
|
+
# namespaces_keys << "#{combination.bundle.identifier}:weights"
|
|
125
|
+
# namespaces_keys << combination.token.text
|
|
126
|
+
# end
|
|
127
|
+
#
|
|
128
|
+
# # Assume it's using EVALSHA.
|
|
129
|
+
# #
|
|
130
|
+
# begin
|
|
131
|
+
# client.evalsha @@weight_sent_once,
|
|
132
|
+
# namespaces_keys.size,
|
|
133
|
+
# *namespaces_keys
|
|
134
|
+
# rescue RuntimeError => e
|
|
135
|
+
# # Make the server have a SHA-1 for the script.
|
|
136
|
+
# #
|
|
137
|
+
# @@weight_sent_once = Digest::SHA1.hexdigest @@weight_script
|
|
138
|
+
# client.eval @@weight_script,
|
|
139
|
+
# namespaces_keys.size,
|
|
140
|
+
# *namespaces_keys
|
|
141
|
+
# end
|
|
142
|
+
# end
|
|
143
|
+
# end
|
|
144
|
+
# else
|
|
145
|
+
# class << self
|
|
146
|
+
# def weight combinations
|
|
147
|
+
combinations.score
|
|
148
|
+
# end
|
|
149
|
+
# end
|
|
150
|
+
# end
|
|
151
|
+
# # Call the newly installed version.
|
|
152
|
+
# #
|
|
153
|
+
# weight combinations
|
|
154
|
+
end
|
|
155
|
+
|
|
92
156
|
# Returns the result ids for the allocation.
|
|
93
157
|
#
|
|
94
158
|
# Developers wanting to program fast intersection
|
|
@@ -97,9 +161,6 @@ module Picky
|
|
|
97
161
|
#
|
|
98
162
|
# Note: We use the amount and offset hints to speed Redis up.
|
|
99
163
|
#
|
|
100
|
-
# TODO What if it hasn't been dumped?
|
|
101
|
-
# Move this method to the actual backends?
|
|
102
|
-
#
|
|
103
164
|
def ids combinations, amount, offset
|
|
104
165
|
# TODO FIXME This is actually not correct:
|
|
105
166
|
# A dumped/loaded Redis backend should use
|
|
@@ -110,10 +171,10 @@ module Picky
|
|
|
110
171
|
# Just checked once on the first call.
|
|
111
172
|
#
|
|
112
173
|
if redis_with_scripting?
|
|
113
|
-
@@
|
|
174
|
+
@@ids_script = "local intersected = redis.call('zinterstore', ARGV[1], #(KEYS), unpack(KEYS)); if intersected == 0 then redis.call('del', ARGV[1]); return {}; end local results = redis.call('zrange', ARGV[1], tonumber(ARGV[2]), tonumber(ARGV[3])); redis.call('del', ARGV[1]); return results;"
|
|
114
175
|
|
|
115
176
|
require 'digest/sha1'
|
|
116
|
-
@@
|
|
177
|
+
@@ids_sent_once = nil
|
|
117
178
|
|
|
118
179
|
# Scripting version of #ids.
|
|
119
180
|
#
|
|
@@ -126,7 +187,7 @@ module Picky
|
|
|
126
187
|
# Assume it's using EVALSHA.
|
|
127
188
|
#
|
|
128
189
|
begin
|
|
129
|
-
client.evalsha @@
|
|
190
|
+
client.evalsha @@ids_sent_once,
|
|
130
191
|
identifiers.size,
|
|
131
192
|
*identifiers,
|
|
132
193
|
generate_intermediate_result_id,
|
|
@@ -135,8 +196,8 @@ module Picky
|
|
|
135
196
|
rescue RuntimeError => e
|
|
136
197
|
# Make the server have a SHA-1 for the script.
|
|
137
198
|
#
|
|
138
|
-
@@
|
|
139
|
-
client.eval @@
|
|
199
|
+
@@ids_sent_once = Digest::SHA1.hexdigest @@ids_script
|
|
200
|
+
client.eval @@ids_script,
|
|
140
201
|
identifiers.size,
|
|
141
202
|
*identifiers,
|
|
142
203
|
generate_intermediate_result_id,
|
|
@@ -191,8 +252,6 @@ module Picky
|
|
|
191
252
|
end
|
|
192
253
|
end
|
|
193
254
|
else
|
|
194
|
-
# TODO Refactor!
|
|
195
|
-
#
|
|
196
255
|
class << self
|
|
197
256
|
def ids combinations, _, _
|
|
198
257
|
# Get the ids for each combination.
|
|
@@ -51,33 +51,6 @@ module Picky
|
|
|
51
51
|
IntegerKeyArray.new(bundle.index_path(:realtime), self_indexed: self_indexed)
|
|
52
52
|
end
|
|
53
53
|
|
|
54
|
-
# Returns the result ids for the allocation.
|
|
55
|
-
#
|
|
56
|
-
# Sorts the ids by size and & through them in the following order (sizes):
|
|
57
|
-
# 0. [100_000, 400, 30, 2]
|
|
58
|
-
# 1. [2, 30, 400, 100_000]
|
|
59
|
-
# 2. (100_000 & (400 & (30 & 2))) # => result
|
|
60
|
-
#
|
|
61
|
-
# Note: Uses a C-optimized intersection routine (in performant.c)
|
|
62
|
-
# for speed and memory efficiency.
|
|
63
|
-
#
|
|
64
|
-
# Note: In the memory based version we ignore the amount and offset hints.
|
|
65
|
-
# We cannot use the information to speed up the algorithm, unfortunately.
|
|
66
|
-
#
|
|
67
|
-
def ids combinations, _, _
|
|
68
|
-
# Get the ids for each combination.
|
|
69
|
-
#
|
|
70
|
-
id_arrays = combinations.inject([]) do |total, combination|
|
|
71
|
-
total << combination.ids
|
|
72
|
-
end
|
|
73
|
-
|
|
74
|
-
# Call the optimized C algorithm.
|
|
75
|
-
#
|
|
76
|
-
# Note: It orders the passed arrays by size.
|
|
77
|
-
#
|
|
78
|
-
Performant::Array.memory_efficient_intersect id_arrays
|
|
79
|
-
end
|
|
80
|
-
|
|
81
54
|
end
|
|
82
55
|
|
|
83
56
|
end
|
data/lib/picky/bundle.rb
CHANGED
|
@@ -57,7 +57,7 @@ module Picky
|
|
|
57
57
|
reset_backend
|
|
58
58
|
end
|
|
59
59
|
def identifier
|
|
60
|
-
:"#{category.identifier}:#{name}"
|
|
60
|
+
@identifier ||= :"#{category.identifier}:#{name}"
|
|
61
61
|
end
|
|
62
62
|
|
|
63
63
|
# If no specific backend has been set,
|
|
@@ -160,7 +160,7 @@ module Picky
|
|
|
160
160
|
#
|
|
161
161
|
def delete
|
|
162
162
|
@backend_inverted.delete if @backend_inverted.respond_to? :delete
|
|
163
|
-
#
|
|
163
|
+
# THINK about this. Perhaps the strategies should implement the backend methods?
|
|
164
164
|
#
|
|
165
165
|
@backend_weights.delete if @backend_weights.respond_to?(:delete) && @weights_strategy.saved?
|
|
166
166
|
@backend_similarity.delete if @backend_similarity.respond_to? :delete
|
data/lib/picky/bundle_indexed.rb
CHANGED
|
@@ -64,7 +64,7 @@ module Picky
|
|
|
64
64
|
# Loads the weights index.
|
|
65
65
|
#
|
|
66
66
|
def load_weights
|
|
67
|
-
#
|
|
67
|
+
# THINK about this. Perhaps the strategies should implement the backend methods?
|
|
68
68
|
#
|
|
69
69
|
self.weights = @backend_weights.load if @weights_strategy.saved?
|
|
70
70
|
end
|
|
@@ -45,7 +45,7 @@ module Picky
|
|
|
45
45
|
# Dumps the weights index.
|
|
46
46
|
#
|
|
47
47
|
def dump_weights
|
|
48
|
-
#
|
|
48
|
+
# THINK about this. Perhaps the strategies should implement the backend methods? Or only the internal index ones?
|
|
49
49
|
#
|
|
50
50
|
@backend_weights.dump @weights if @weights_strategy.saved?
|
|
51
51
|
end
|
|
@@ -27,18 +27,8 @@ module Picky
|
|
|
27
27
|
# Returns all possible similar tokens for the given token.
|
|
28
28
|
#
|
|
29
29
|
def similar_tokens_for token
|
|
30
|
-
text = token.text
|
|
31
30
|
categories.inject([]) do |result, category|
|
|
32
|
-
|
|
33
|
-
# Note: We could also break off here if not all the available
|
|
34
|
-
# similars are needed.
|
|
35
|
-
# Wait for a concrete case that needs this before taking
|
|
36
|
-
# action.
|
|
37
|
-
#
|
|
38
|
-
while next_token = next_token.next_similar_token(category)
|
|
39
|
-
result << next_token if next_token && next_token.text != text
|
|
40
|
-
end
|
|
41
|
-
result
|
|
31
|
+
result + token.similar_tokens_for(category)
|
|
42
32
|
end
|
|
43
33
|
end
|
|
44
34
|
|
data/lib/picky/category.rb
CHANGED
|
@@ -2,9 +2,9 @@ module Picky
|
|
|
2
2
|
|
|
3
3
|
class Category
|
|
4
4
|
|
|
5
|
+
attr_accessor :exact,
|
|
6
|
+
:partial
|
|
5
7
|
attr_reader :name,
|
|
6
|
-
:exact,
|
|
7
|
-
:partial,
|
|
8
8
|
:prepared,
|
|
9
9
|
:backend
|
|
10
10
|
|
|
@@ -41,7 +41,7 @@ module Picky
|
|
|
41
41
|
|
|
42
42
|
# @symbols = options[:use_symbols] || index.use_symbols? # TODO Symbols.
|
|
43
43
|
|
|
44
|
-
weights = options[:
|
|
44
|
+
weights = options[:weight] || Generators::Weights::Default
|
|
45
45
|
partial = options[:partial] || Generators::Partial::Default
|
|
46
46
|
similarity = options[:similarity] || Generators::Similarity::Default
|
|
47
47
|
|
|
@@ -49,7 +49,7 @@ module Picky
|
|
|
49
49
|
no_similarity = Generators::Similarity::None.new
|
|
50
50
|
|
|
51
51
|
@exact = Bundle.new :exact, self, weights, no_partial, similarity, options
|
|
52
|
-
if partial.use_exact_for_partial?
|
|
52
|
+
if partial.respond_to?(:use_exact_for_partial?) && partial.use_exact_for_partial?
|
|
53
53
|
@partial = Wrappers::Bundle::ExactPartial.new @exact
|
|
54
54
|
else
|
|
55
55
|
@partial = Bundle.new :partial, self, weights, partial, no_similarity, options
|