picky 3.4.3 → 3.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/aux/picky/cli.rb +1 -1
- data/lib/picky/backends/memory/json.rb +1 -1
- data/lib/picky/backends/memory/text.rb +2 -2
- data/lib/picky/backends/redis/string.rb +6 -0
- data/lib/picky/bundle.rb +0 -1
- data/lib/picky/bundle_indexing.rb +11 -107
- data/lib/picky/bundle_realtime.rb +16 -8
- data/lib/picky/calculations/location.rb +18 -14
- data/lib/picky/categories.rb +1 -1
- data/lib/picky/category.rb +7 -1
- data/lib/picky/category_indexed.rb +1 -0
- data/lib/picky/category_indexing.rb +17 -17
- data/lib/picky/category_realtime.rb +23 -11
- data/lib/picky/deployment.rb +33 -33
- data/lib/picky/generators/partial/substring.rb +0 -2
- data/lib/picky/generators/similarity/double_metaphone.rb +1 -1
- data/lib/picky/generators/similarity/metaphone.rb +1 -1
- data/lib/picky/generators/similarity/soundex.rb +1 -1
- data/lib/picky/index.rb +22 -5
- data/lib/picky/index_indexing.rb +3 -15
- data/lib/picky/indexers/base.rb +7 -3
- data/lib/picky/indexers/parallel.rb +1 -10
- data/lib/picky/indexers/serial.rb +1 -10
- data/lib/picky/indexes.rb +1 -1
- data/lib/picky/loader.rb +2 -6
- data/lib/picky/query/qualifier_category_mapper.rb +2 -2
- data/lib/picky/query/token.rb +1 -2
- data/lib/picky/query/tokens.rb +6 -0
- data/lib/picky/search.rb +1 -0
- data/lib/picky/sources/couch.rb +1 -1
- data/lib/picky/sources/csv.rb +1 -1
- data/lib/picky/sources/mongo.rb +1 -1
- data/lib/picky/wrappers/bundle/calculation.rb +8 -8
- data/lib/picky/wrappers/bundle/delegators.rb +4 -1
- data/lib/picky/wrappers/bundle/exact_partial.rb +1 -1
- data/lib/picky/wrappers/bundle/location.rb +30 -13
- data/lib/picky/wrappers/category/location.rb +14 -9
- data/lib/tasks/try.rb +2 -2
- data/spec/lib/backends/memory/text_spec.rb +6 -6
- data/spec/lib/bundle_spec.rb +4 -4
- data/spec/lib/calculations/location_spec.rb +27 -29
- data/spec/lib/category_indexed_spec.rb +1 -0
- data/spec/lib/category_indexing_spec.rb +23 -36
- data/spec/lib/category_spec.rb +2 -0
- data/spec/lib/extensions/string_spec.rb +1 -1
- data/spec/lib/generators/partial/infix_spec.rb +2 -2
- data/spec/lib/index_indexing_spec.rb +5 -3
- data/spec/lib/indexed/bundle_spec.rb +2 -2
- data/spec/lib/indexers/base_spec.rb +2 -4
- data/spec/lib/indexers/serial_spec.rb +3 -19
- data/spec/lib/indexing/bundle_partial_generation_speed_spec.rb +42 -42
- data/spec/lib/indexing/bundle_spec.rb +4 -133
- data/spec/lib/query/combination_spec.rb +6 -6
- data/spec/lib/query/token_spec.rb +32 -19
- data/spec/lib/query/tokens_spec.rb +23 -10
- metadata +27 -34
- data/lib/picky/no_source_specified_exception.rb +0 -7
- data/lib/picky/wrappers/sources/base.rb +0 -35
- data/lib/picky/wrappers/sources/location.rb +0 -56
- data/spec/lib/sources/wrappers/base_spec.rb +0 -38
- data/spec/lib/sources/wrappers/location_spec.rb +0 -55
data/aux/picky/cli.rb
CHANGED
@@ -17,7 +17,7 @@ module Picky
|
|
17
17
|
# Loads the index hash from json format.
|
18
18
|
#
|
19
19
|
def load
|
20
|
-
Yajl::Parser.parse ::File.open(cache_path, 'r'), symbolize_keys: true # TODO
|
20
|
+
Yajl::Parser.parse ::File.open(cache_path, 'r') # , symbolize_keys: true # TODO Symbols.
|
21
21
|
end
|
22
22
|
|
23
23
|
# Dumps the index hash in json format.
|
@@ -41,7 +41,7 @@ module Picky
|
|
41
41
|
# * id,data\n
|
42
42
|
# * id,data\n
|
43
43
|
#
|
44
|
-
# Yields an id string and a
|
44
|
+
# Yields an id string and a token.
|
45
45
|
#
|
46
46
|
def retrieve
|
47
47
|
id = nil
|
@@ -49,7 +49,7 @@ module Picky
|
|
49
49
|
::File.open(cache_path, 'r:utf-8') do |file|
|
50
50
|
file.each_line do |line|
|
51
51
|
id, token = line.split ?,, 2
|
52
|
-
yield id, (token.chomp! || token)
|
52
|
+
yield id, (token.chomp! || token)
|
53
53
|
end
|
54
54
|
end
|
55
55
|
end
|
data/lib/picky/bundle.rb
CHANGED
@@ -28,134 +28,38 @@ module Picky
|
|
28
28
|
#
|
29
29
|
class Bundle
|
30
30
|
|
31
|
-
attr_reader :backend
|
32
|
-
:prepared
|
31
|
+
attr_reader :backend
|
33
32
|
|
34
33
|
# When indexing, clear only clears the inverted index.
|
35
34
|
#
|
36
|
-
delegate :clear,
|
37
|
-
|
38
|
-
# Sets up a piece of the index for the given token.
|
39
|
-
#
|
40
|
-
def initialize_inverted_index_for token
|
41
|
-
self.inverted[token] ||= []
|
42
|
-
end
|
43
|
-
|
44
|
-
# Generation
|
45
|
-
#
|
46
|
-
|
47
|
-
# This method
|
48
|
-
# * Loads the base index from the "prepared..." file.
|
49
|
-
# * Generates derived indexes.
|
50
|
-
# * Dumps all the indexes into files.
|
51
|
-
#
|
52
|
-
def generate_caches_from_source
|
53
|
-
load_from_prepared_index_file
|
54
|
-
generate_caches_from_memory
|
55
|
-
end
|
56
|
-
# Generates derived indexes from the index and dumps.
|
57
|
-
#
|
58
|
-
# Note: assumes that there is something in the index
|
59
|
-
#
|
60
|
-
def generate_caches_from_memory
|
61
|
-
cache_from_memory_generation_message
|
62
|
-
generate_derived
|
63
|
-
end
|
64
|
-
def cache_from_memory_generation_message
|
65
|
-
timed_exclaim %Q{"#{identifier}": Caching from intermediate in-memory index.}
|
66
|
-
end
|
67
|
-
|
68
|
-
# Generates the weights and similarity from the main index.
|
69
|
-
#
|
70
|
-
def generate_derived
|
71
|
-
generate_weights
|
72
|
-
generate_similarity
|
73
|
-
end
|
35
|
+
delegate :clear,
|
36
|
+
:to => :inverted
|
74
37
|
|
75
38
|
# "Empties" the index(es) by getting a new empty
|
76
39
|
# internal backend instance.
|
77
40
|
#
|
78
41
|
def empty
|
79
42
|
empty_inverted
|
43
|
+
empty_weights
|
44
|
+
empty_similarity
|
80
45
|
empty_configuration
|
81
46
|
end
|
82
47
|
def empty_inverted
|
83
48
|
@inverted = @backend_inverted.empty
|
84
49
|
end
|
85
|
-
def
|
86
|
-
@
|
87
|
-
end
|
88
|
-
|
89
|
-
# Load the data from the db.
|
90
|
-
#
|
91
|
-
def load_from_prepared_index_file
|
92
|
-
load_from_prepared_index_generation_message
|
93
|
-
retrieve
|
94
|
-
end
|
95
|
-
def load_from_prepared_index_generation_message
|
96
|
-
timed_exclaim %Q{"#{identifier}": Loading prepared data into memory.}
|
50
|
+
def empty_weights
|
51
|
+
@weights = @backend_weights.empty
|
97
52
|
end
|
98
|
-
|
99
|
-
|
100
|
-
# This is in preparation for generating
|
101
|
-
# derived indexes (like weights, similarity)
|
102
|
-
# and later dumping the optimized index.
|
103
|
-
#
|
104
|
-
# TODO Move this out to the category?
|
105
|
-
#
|
106
|
-
# Note: The clean way to do this would be to
|
107
|
-
# self.inverted.values.each &:uniq!
|
108
|
-
#
|
109
|
-
# Note 2:
|
110
|
-
# initialize_inverted_index_for token
|
111
|
-
# id = id.send(format)
|
112
|
-
# next if last_id == id
|
113
|
-
# self.inverted[token] << id
|
114
|
-
# last_id = id
|
115
|
-
#
|
116
|
-
def retrieve
|
117
|
-
format = key_format || :to_i
|
118
|
-
empty_inverted
|
119
|
-
id, last_id = nil, nil
|
120
|
-
prepared.retrieve do |id, token|
|
121
|
-
initialize_inverted_index_for token
|
122
|
-
self.inverted[token] << id.send(format)
|
123
|
-
end
|
124
|
-
self.inverted.values.each &:uniq!
|
53
|
+
def empty_similarity
|
54
|
+
@similarity = @backend_similarity.empty
|
125
55
|
end
|
126
|
-
|
127
|
-
|
128
|
-
#
|
129
|
-
def generate_partial_from exact_inverted_index
|
130
|
-
timed_exclaim %Q{"#{identifier}": Generating partial index for index.}
|
131
|
-
self.inverted = exact_inverted_index
|
132
|
-
self.generate_partial
|
133
|
-
self
|
134
|
-
end
|
135
|
-
|
136
|
-
# Generates a new index (writes its index) using the
|
137
|
-
# partial caching strategy of this bundle.
|
138
|
-
#
|
139
|
-
def generate_partial
|
140
|
-
self.inverted = partial_strategy.generate_from self.inverted
|
141
|
-
end
|
142
|
-
# Generates a new weights index (writes its index) using the
|
143
|
-
# given weight caching strategy.
|
144
|
-
#
|
145
|
-
def generate_weights
|
146
|
-
self.weights = weights_strategy.generate_from self.inverted
|
147
|
-
end
|
148
|
-
# Generates a new similarity index (writes its index) using the
|
149
|
-
# given similarity caching strategy.
|
150
|
-
#
|
151
|
-
def generate_similarity
|
152
|
-
self.similarity = similarity_strategy.generate_from self.inverted
|
56
|
+
def empty_configuration
|
57
|
+
@configuration = @backend_configuration.empty
|
153
58
|
end
|
154
59
|
|
155
60
|
# Saves the indexes in a dump file.
|
156
61
|
#
|
157
62
|
def dump
|
158
|
-
timed_exclaim %Q{"#{identifier}": Dumping data.}
|
159
63
|
dump_inverted
|
160
64
|
dump_similarity
|
161
65
|
dump_weights
|
@@ -32,7 +32,9 @@ module Picky
|
|
32
32
|
|
33
33
|
# Returns a reference to the array where the id has been added.
|
34
34
|
#
|
35
|
-
|
35
|
+
# TODO Rename sym.
|
36
|
+
#
|
37
|
+
def add id, sym, where = :unshift
|
36
38
|
ary = @inverted[sym]
|
37
39
|
|
38
40
|
syms = @realtime_mapping[id]
|
@@ -42,12 +44,12 @@ module Picky
|
|
42
44
|
#
|
43
45
|
ids = if syms.include? sym
|
44
46
|
ids = @inverted[sym]
|
45
|
-
ids.delete id
|
46
|
-
ids.
|
47
|
+
ids.delete id
|
48
|
+
ids.send where, id
|
47
49
|
else
|
48
50
|
syms << sym
|
49
51
|
ids = @inverted[sym] ||= []
|
50
|
-
ids.
|
52
|
+
ids.send where, id
|
51
53
|
end
|
52
54
|
|
53
55
|
# Weights.
|
@@ -60,21 +62,27 @@ module Picky
|
|
60
62
|
similarity = @similarity[encoded] ||= []
|
61
63
|
if similarity.include? sym
|
62
64
|
similarity.delete sym # Not completely correct, as others will also be affected, but meh.
|
63
|
-
similarity.
|
65
|
+
similarity.send where, sym #
|
64
66
|
else
|
65
|
-
similarity.
|
67
|
+
similarity.send where, sym
|
66
68
|
end
|
67
69
|
end
|
68
70
|
end
|
69
71
|
|
70
72
|
# Partializes the text and then adds each.
|
71
73
|
#
|
72
|
-
def add_partialized id, text
|
74
|
+
def add_partialized id, text, where = :unshift
|
73
75
|
self.partial_strategy.each_partial text do |partial_text|
|
74
|
-
add id, partial_text
|
76
|
+
add id, partial_text, where
|
75
77
|
end
|
76
78
|
end
|
77
79
|
|
80
|
+
# Clears the realtime mapping.
|
81
|
+
#
|
82
|
+
def clear_realtime_mapping
|
83
|
+
@realtime_mapping.clear
|
84
|
+
end
|
85
|
+
|
78
86
|
end
|
79
87
|
|
80
88
|
end
|
@@ -11,37 +11,41 @@ module Picky
|
|
11
11
|
#
|
12
12
|
class Location
|
13
13
|
|
14
|
-
attr_reader :
|
14
|
+
attr_reader :anchor,
|
15
|
+
:precision,
|
16
|
+
:grid
|
15
17
|
|
16
|
-
def initialize user_grid, precision = nil
|
17
|
-
@user_grid
|
18
|
-
@precision
|
19
|
-
@grid
|
18
|
+
def initialize user_grid, anchor = 0.0, precision = nil
|
19
|
+
@user_grid = user_grid
|
20
|
+
@precision = precision || 1
|
21
|
+
@grid = @user_grid / (@precision + 0.5)
|
22
|
+
|
23
|
+
self.anchor = anchor
|
20
24
|
end
|
21
25
|
|
22
|
-
def
|
26
|
+
def anchor= value
|
23
27
|
# Add a margin of 1 user grid.
|
24
28
|
#
|
25
|
-
|
29
|
+
value -= @user_grid
|
26
30
|
|
27
31
|
# Add plus 1 grid so that the index key never falls on 0.
|
28
32
|
# Why? to_i maps by default to 0.
|
29
33
|
#
|
30
|
-
|
34
|
+
value -= @grid
|
31
35
|
|
32
|
-
@
|
36
|
+
@anchor = value
|
33
37
|
end
|
34
38
|
|
35
39
|
#
|
36
40
|
#
|
37
41
|
def add_margin length
|
38
|
-
@
|
42
|
+
@anchor -= length
|
39
43
|
end
|
40
44
|
|
41
45
|
#
|
42
46
|
#
|
43
|
-
def
|
44
|
-
range
|
47
|
+
def calculated_range location
|
48
|
+
range calculate(location)
|
45
49
|
end
|
46
50
|
#
|
47
51
|
#
|
@@ -50,8 +54,8 @@ module Picky
|
|
50
54
|
end
|
51
55
|
#
|
52
56
|
#
|
53
|
-
def
|
54
|
-
((location - @
|
57
|
+
def calculate location
|
58
|
+
((location - @anchor) / @grid).floor
|
55
59
|
end
|
56
60
|
|
57
61
|
end
|
data/lib/picky/categories.rb
CHANGED
@@ -40,7 +40,7 @@ module Picky
|
|
40
40
|
# Find a given category in the categories.
|
41
41
|
#
|
42
42
|
def [] category_name
|
43
|
-
category_name = category_name.
|
43
|
+
category_name = category_name.intern
|
44
44
|
category_hash[category_name] || raise_not_found(category_name)
|
45
45
|
end
|
46
46
|
def raise_not_found category_name
|
data/lib/picky/category.rb
CHANGED
@@ -4,7 +4,8 @@ module Picky
|
|
4
4
|
|
5
5
|
attr_reader :name,
|
6
6
|
:exact,
|
7
|
-
:partial
|
7
|
+
:partial,
|
8
|
+
:prepared
|
8
9
|
|
9
10
|
# Mandatory params:
|
10
11
|
# * name: Category name to use as identifier and file names.
|
@@ -20,6 +21,7 @@ module Picky
|
|
20
21
|
# * weights: Query::Weights.new( [:category1, :category2] => +2, ... )
|
21
22
|
# * tokenizer: Use a subclass of Tokenizers::Base that implements #tokens_for and #empty_tokens.
|
22
23
|
# * key_format: What this category's keys are formatted with (default is :to_i)
|
24
|
+
# * use_symbols: Whether to use symbols internally instead of strings.
|
23
25
|
#
|
24
26
|
def initialize name, index, options = {}
|
25
27
|
@name = name
|
@@ -31,6 +33,7 @@ module Picky
|
|
31
33
|
@from = options[:from]
|
32
34
|
@tokenizer = options[:tokenizer]
|
33
35
|
@key_format = options[:key_format]
|
36
|
+
# @symbols = options[:use_symbols] || index.use_symbols? # TODO Symbols.
|
34
37
|
@qualifiers = extract_qualifiers_from options
|
35
38
|
|
36
39
|
weights = options[:weights] || Generators::Weights::Default
|
@@ -46,6 +49,8 @@ module Picky
|
|
46
49
|
else
|
47
50
|
@partial = Bundle.new :partial, self, index.backend, weights, partial, no_similarity, options
|
48
51
|
end
|
52
|
+
|
53
|
+
@prepared = Backends::Memory::Text.new prepared_index_path
|
49
54
|
end
|
50
55
|
|
51
56
|
# Indexes and reloads the category.
|
@@ -58,6 +63,7 @@ module Picky
|
|
58
63
|
def dump
|
59
64
|
exact.dump
|
60
65
|
partial.dump
|
66
|
+
timed_exclaim %Q{"#{identifier}": Generated -> #{index_directory.gsub("#{PICKY_ROOT}/", '')}.}
|
61
67
|
end
|
62
68
|
|
63
69
|
# Index name.
|
@@ -24,9 +24,12 @@ module Picky
|
|
24
24
|
indexer.index [self]
|
25
25
|
end
|
26
26
|
end
|
27
|
+
|
28
|
+
# Empty all the indexes.
|
29
|
+
#
|
27
30
|
def empty
|
28
31
|
exact.empty
|
29
|
-
partial.
|
32
|
+
partial.empty
|
30
33
|
end
|
31
34
|
|
32
35
|
# Take a data snapshot if the source offers it.
|
@@ -44,22 +47,17 @@ module Picky
|
|
44
47
|
# Generates all caches for this category.
|
45
48
|
#
|
46
49
|
def cache
|
47
|
-
|
48
|
-
|
49
|
-
generate_caches_from_memory
|
50
|
+
empty
|
51
|
+
retrieve
|
50
52
|
dump
|
51
|
-
|
53
|
+
clear_realtime_mapping # TODO To call or not to call, that is the question.
|
52
54
|
end
|
53
|
-
|
55
|
+
|
56
|
+
# Retrieves the prepared index data into the indexes and
|
57
|
+
# generates the necessary derived indexes.
|
54
58
|
#
|
55
|
-
def
|
56
|
-
|
57
|
-
end
|
58
|
-
def generate_partial
|
59
|
-
partial.generate_partial_from exact.inverted
|
60
|
-
end
|
61
|
-
def generate_caches_from_memory
|
62
|
-
partial.generate_caches_from_memory
|
59
|
+
def retrieve
|
60
|
+
prepared.retrieve { |id, token| add_tokenized_token id, token, :<< }
|
63
61
|
end
|
64
62
|
|
65
63
|
# Return an appropriate source.
|
@@ -80,12 +78,14 @@ module Picky
|
|
80
78
|
|
81
79
|
# Return the key format.
|
82
80
|
#
|
83
|
-
# If
|
84
|
-
#
|
81
|
+
# If no key_format is defined on the category
|
82
|
+
# and the source has no key format, ask
|
85
83
|
# the index for one.
|
86
84
|
#
|
85
|
+
# Default is to_i.
|
86
|
+
#
|
87
87
|
def key_format
|
88
|
-
source.respond_to?(:key_format) && source.key_format || @key_format ||
|
88
|
+
@key_format ||= source.respond_to?(:key_format) && source.key_format || @index.key_format || :to_i
|
89
89
|
end
|
90
90
|
|
91
91
|
# Where the data is taken from.
|
@@ -13,29 +13,41 @@ module Picky
|
|
13
13
|
# Adds and indexes this category of the
|
14
14
|
# given object.
|
15
15
|
#
|
16
|
-
def add object
|
16
|
+
def add object, where = :unshift
|
17
17
|
tokens, _ = tokenizer.tokenize object.send(from)
|
18
|
-
add_tokenized object.id, tokens
|
18
|
+
add_tokenized object.id, tokens, where
|
19
19
|
end
|
20
20
|
|
21
21
|
# Removes the object's id, and then
|
22
22
|
# adds it again.
|
23
23
|
#
|
24
|
-
def replace object
|
24
|
+
def replace object, where = :unshift
|
25
25
|
remove object.id
|
26
|
-
add object
|
26
|
+
add object, where
|
27
27
|
end
|
28
28
|
|
29
29
|
# For the given id, adds the list of
|
30
30
|
# strings to the index for the given id.
|
31
31
|
#
|
32
|
-
def add_tokenized id, tokens
|
33
|
-
tokens.each
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
32
|
+
def add_tokenized id, tokens, where = :unshift
|
33
|
+
tokens.each { |text| add_tokenized_token id, text, where }
|
34
|
+
end
|
35
|
+
|
36
|
+
#
|
37
|
+
#
|
38
|
+
def add_tokenized_token id, text, where = :unshift
|
39
|
+
return unless text
|
40
|
+
id = id.send key_format # TODO Speed this up!
|
41
|
+
# text = text.to_sym if @symbols # TODO Symbols.
|
42
|
+
exact.add id, text, where
|
43
|
+
partial.add_partialized id, text, where
|
44
|
+
end
|
45
|
+
|
46
|
+
# Clears the realtime mapping.
|
47
|
+
#
|
48
|
+
def clear_realtime_mapping
|
49
|
+
exact.clear_realtime_mapping
|
50
|
+
partial.clear_realtime_mapping
|
39
51
|
end
|
40
52
|
|
41
53
|
end
|