picky 3.4.3 → 3.5.0
Sign up to get free protection for your applications and to get access to all the features.
- data/aux/picky/cli.rb +1 -1
- data/lib/picky/backends/memory/json.rb +1 -1
- data/lib/picky/backends/memory/text.rb +2 -2
- data/lib/picky/backends/redis/string.rb +6 -0
- data/lib/picky/bundle.rb +0 -1
- data/lib/picky/bundle_indexing.rb +11 -107
- data/lib/picky/bundle_realtime.rb +16 -8
- data/lib/picky/calculations/location.rb +18 -14
- data/lib/picky/categories.rb +1 -1
- data/lib/picky/category.rb +7 -1
- data/lib/picky/category_indexed.rb +1 -0
- data/lib/picky/category_indexing.rb +17 -17
- data/lib/picky/category_realtime.rb +23 -11
- data/lib/picky/deployment.rb +33 -33
- data/lib/picky/generators/partial/substring.rb +0 -2
- data/lib/picky/generators/similarity/double_metaphone.rb +1 -1
- data/lib/picky/generators/similarity/metaphone.rb +1 -1
- data/lib/picky/generators/similarity/soundex.rb +1 -1
- data/lib/picky/index.rb +22 -5
- data/lib/picky/index_indexing.rb +3 -15
- data/lib/picky/indexers/base.rb +7 -3
- data/lib/picky/indexers/parallel.rb +1 -10
- data/lib/picky/indexers/serial.rb +1 -10
- data/lib/picky/indexes.rb +1 -1
- data/lib/picky/loader.rb +2 -6
- data/lib/picky/query/qualifier_category_mapper.rb +2 -2
- data/lib/picky/query/token.rb +1 -2
- data/lib/picky/query/tokens.rb +6 -0
- data/lib/picky/search.rb +1 -0
- data/lib/picky/sources/couch.rb +1 -1
- data/lib/picky/sources/csv.rb +1 -1
- data/lib/picky/sources/mongo.rb +1 -1
- data/lib/picky/wrappers/bundle/calculation.rb +8 -8
- data/lib/picky/wrappers/bundle/delegators.rb +4 -1
- data/lib/picky/wrappers/bundle/exact_partial.rb +1 -1
- data/lib/picky/wrappers/bundle/location.rb +30 -13
- data/lib/picky/wrappers/category/location.rb +14 -9
- data/lib/tasks/try.rb +2 -2
- data/spec/lib/backends/memory/text_spec.rb +6 -6
- data/spec/lib/bundle_spec.rb +4 -4
- data/spec/lib/calculations/location_spec.rb +27 -29
- data/spec/lib/category_indexed_spec.rb +1 -0
- data/spec/lib/category_indexing_spec.rb +23 -36
- data/spec/lib/category_spec.rb +2 -0
- data/spec/lib/extensions/string_spec.rb +1 -1
- data/spec/lib/generators/partial/infix_spec.rb +2 -2
- data/spec/lib/index_indexing_spec.rb +5 -3
- data/spec/lib/indexed/bundle_spec.rb +2 -2
- data/spec/lib/indexers/base_spec.rb +2 -4
- data/spec/lib/indexers/serial_spec.rb +3 -19
- data/spec/lib/indexing/bundle_partial_generation_speed_spec.rb +42 -42
- data/spec/lib/indexing/bundle_spec.rb +4 -133
- data/spec/lib/query/combination_spec.rb +6 -6
- data/spec/lib/query/token_spec.rb +32 -19
- data/spec/lib/query/tokens_spec.rb +23 -10
- metadata +27 -34
- data/lib/picky/no_source_specified_exception.rb +0 -7
- data/lib/picky/wrappers/sources/base.rb +0 -35
- data/lib/picky/wrappers/sources/location.rb +0 -56
- data/spec/lib/sources/wrappers/base_spec.rb +0 -38
- data/spec/lib/sources/wrappers/location_spec.rb +0 -55
data/aux/picky/cli.rb
CHANGED
@@ -17,7 +17,7 @@ module Picky
|
|
17
17
|
# Loads the index hash from json format.
|
18
18
|
#
|
19
19
|
def load
|
20
|
-
Yajl::Parser.parse ::File.open(cache_path, 'r'), symbolize_keys: true # TODO
|
20
|
+
Yajl::Parser.parse ::File.open(cache_path, 'r') # , symbolize_keys: true # TODO Symbols.
|
21
21
|
end
|
22
22
|
|
23
23
|
# Dumps the index hash in json format.
|
@@ -41,7 +41,7 @@ module Picky
|
|
41
41
|
# * id,data\n
|
42
42
|
# * id,data\n
|
43
43
|
#
|
44
|
-
# Yields an id string and a
|
44
|
+
# Yields an id string and a token.
|
45
45
|
#
|
46
46
|
def retrieve
|
47
47
|
id = nil
|
@@ -49,7 +49,7 @@ module Picky
|
|
49
49
|
::File.open(cache_path, 'r:utf-8') do |file|
|
50
50
|
file.each_line do |line|
|
51
51
|
id, token = line.split ?,, 2
|
52
|
-
yield id, (token.chomp! || token)
|
52
|
+
yield id, (token.chomp! || token)
|
53
53
|
end
|
54
54
|
end
|
55
55
|
end
|
data/lib/picky/bundle.rb
CHANGED
@@ -28,134 +28,38 @@ module Picky
|
|
28
28
|
#
|
29
29
|
class Bundle
|
30
30
|
|
31
|
-
attr_reader :backend
|
32
|
-
:prepared
|
31
|
+
attr_reader :backend
|
33
32
|
|
34
33
|
# When indexing, clear only clears the inverted index.
|
35
34
|
#
|
36
|
-
delegate :clear,
|
37
|
-
|
38
|
-
# Sets up a piece of the index for the given token.
|
39
|
-
#
|
40
|
-
def initialize_inverted_index_for token
|
41
|
-
self.inverted[token] ||= []
|
42
|
-
end
|
43
|
-
|
44
|
-
# Generation
|
45
|
-
#
|
46
|
-
|
47
|
-
# This method
|
48
|
-
# * Loads the base index from the "prepared..." file.
|
49
|
-
# * Generates derived indexes.
|
50
|
-
# * Dumps all the indexes into files.
|
51
|
-
#
|
52
|
-
def generate_caches_from_source
|
53
|
-
load_from_prepared_index_file
|
54
|
-
generate_caches_from_memory
|
55
|
-
end
|
56
|
-
# Generates derived indexes from the index and dumps.
|
57
|
-
#
|
58
|
-
# Note: assumes that there is something in the index
|
59
|
-
#
|
60
|
-
def generate_caches_from_memory
|
61
|
-
cache_from_memory_generation_message
|
62
|
-
generate_derived
|
63
|
-
end
|
64
|
-
def cache_from_memory_generation_message
|
65
|
-
timed_exclaim %Q{"#{identifier}": Caching from intermediate in-memory index.}
|
66
|
-
end
|
67
|
-
|
68
|
-
# Generates the weights and similarity from the main index.
|
69
|
-
#
|
70
|
-
def generate_derived
|
71
|
-
generate_weights
|
72
|
-
generate_similarity
|
73
|
-
end
|
35
|
+
delegate :clear,
|
36
|
+
:to => :inverted
|
74
37
|
|
75
38
|
# "Empties" the index(es) by getting a new empty
|
76
39
|
# internal backend instance.
|
77
40
|
#
|
78
41
|
def empty
|
79
42
|
empty_inverted
|
43
|
+
empty_weights
|
44
|
+
empty_similarity
|
80
45
|
empty_configuration
|
81
46
|
end
|
82
47
|
def empty_inverted
|
83
48
|
@inverted = @backend_inverted.empty
|
84
49
|
end
|
85
|
-
def
|
86
|
-
@
|
87
|
-
end
|
88
|
-
|
89
|
-
# Load the data from the db.
|
90
|
-
#
|
91
|
-
def load_from_prepared_index_file
|
92
|
-
load_from_prepared_index_generation_message
|
93
|
-
retrieve
|
94
|
-
end
|
95
|
-
def load_from_prepared_index_generation_message
|
96
|
-
timed_exclaim %Q{"#{identifier}": Loading prepared data into memory.}
|
50
|
+
def empty_weights
|
51
|
+
@weights = @backend_weights.empty
|
97
52
|
end
|
98
|
-
|
99
|
-
|
100
|
-
# This is in preparation for generating
|
101
|
-
# derived indexes (like weights, similarity)
|
102
|
-
# and later dumping the optimized index.
|
103
|
-
#
|
104
|
-
# TODO Move this out to the category?
|
105
|
-
#
|
106
|
-
# Note: The clean way to do this would be to
|
107
|
-
# self.inverted.values.each &:uniq!
|
108
|
-
#
|
109
|
-
# Note 2:
|
110
|
-
# initialize_inverted_index_for token
|
111
|
-
# id = id.send(format)
|
112
|
-
# next if last_id == id
|
113
|
-
# self.inverted[token] << id
|
114
|
-
# last_id = id
|
115
|
-
#
|
116
|
-
def retrieve
|
117
|
-
format = key_format || :to_i
|
118
|
-
empty_inverted
|
119
|
-
id, last_id = nil, nil
|
120
|
-
prepared.retrieve do |id, token|
|
121
|
-
initialize_inverted_index_for token
|
122
|
-
self.inverted[token] << id.send(format)
|
123
|
-
end
|
124
|
-
self.inverted.values.each &:uniq!
|
53
|
+
def empty_similarity
|
54
|
+
@similarity = @backend_similarity.empty
|
125
55
|
end
|
126
|
-
|
127
|
-
|
128
|
-
#
|
129
|
-
def generate_partial_from exact_inverted_index
|
130
|
-
timed_exclaim %Q{"#{identifier}": Generating partial index for index.}
|
131
|
-
self.inverted = exact_inverted_index
|
132
|
-
self.generate_partial
|
133
|
-
self
|
134
|
-
end
|
135
|
-
|
136
|
-
# Generates a new index (writes its index) using the
|
137
|
-
# partial caching strategy of this bundle.
|
138
|
-
#
|
139
|
-
def generate_partial
|
140
|
-
self.inverted = partial_strategy.generate_from self.inverted
|
141
|
-
end
|
142
|
-
# Generates a new weights index (writes its index) using the
|
143
|
-
# given weight caching strategy.
|
144
|
-
#
|
145
|
-
def generate_weights
|
146
|
-
self.weights = weights_strategy.generate_from self.inverted
|
147
|
-
end
|
148
|
-
# Generates a new similarity index (writes its index) using the
|
149
|
-
# given similarity caching strategy.
|
150
|
-
#
|
151
|
-
def generate_similarity
|
152
|
-
self.similarity = similarity_strategy.generate_from self.inverted
|
56
|
+
def empty_configuration
|
57
|
+
@configuration = @backend_configuration.empty
|
153
58
|
end
|
154
59
|
|
155
60
|
# Saves the indexes in a dump file.
|
156
61
|
#
|
157
62
|
def dump
|
158
|
-
timed_exclaim %Q{"#{identifier}": Dumping data.}
|
159
63
|
dump_inverted
|
160
64
|
dump_similarity
|
161
65
|
dump_weights
|
@@ -32,7 +32,9 @@ module Picky
|
|
32
32
|
|
33
33
|
# Returns a reference to the array where the id has been added.
|
34
34
|
#
|
35
|
-
|
35
|
+
# TODO Rename sym.
|
36
|
+
#
|
37
|
+
def add id, sym, where = :unshift
|
36
38
|
ary = @inverted[sym]
|
37
39
|
|
38
40
|
syms = @realtime_mapping[id]
|
@@ -42,12 +44,12 @@ module Picky
|
|
42
44
|
#
|
43
45
|
ids = if syms.include? sym
|
44
46
|
ids = @inverted[sym]
|
45
|
-
ids.delete id
|
46
|
-
ids.
|
47
|
+
ids.delete id
|
48
|
+
ids.send where, id
|
47
49
|
else
|
48
50
|
syms << sym
|
49
51
|
ids = @inverted[sym] ||= []
|
50
|
-
ids.
|
52
|
+
ids.send where, id
|
51
53
|
end
|
52
54
|
|
53
55
|
# Weights.
|
@@ -60,21 +62,27 @@ module Picky
|
|
60
62
|
similarity = @similarity[encoded] ||= []
|
61
63
|
if similarity.include? sym
|
62
64
|
similarity.delete sym # Not completely correct, as others will also be affected, but meh.
|
63
|
-
similarity.
|
65
|
+
similarity.send where, sym #
|
64
66
|
else
|
65
|
-
similarity.
|
67
|
+
similarity.send where, sym
|
66
68
|
end
|
67
69
|
end
|
68
70
|
end
|
69
71
|
|
70
72
|
# Partializes the text and then adds each.
|
71
73
|
#
|
72
|
-
def add_partialized id, text
|
74
|
+
def add_partialized id, text, where = :unshift
|
73
75
|
self.partial_strategy.each_partial text do |partial_text|
|
74
|
-
add id, partial_text
|
76
|
+
add id, partial_text, where
|
75
77
|
end
|
76
78
|
end
|
77
79
|
|
80
|
+
# Clears the realtime mapping.
|
81
|
+
#
|
82
|
+
def clear_realtime_mapping
|
83
|
+
@realtime_mapping.clear
|
84
|
+
end
|
85
|
+
|
78
86
|
end
|
79
87
|
|
80
88
|
end
|
@@ -11,37 +11,41 @@ module Picky
|
|
11
11
|
#
|
12
12
|
class Location
|
13
13
|
|
14
|
-
attr_reader :
|
14
|
+
attr_reader :anchor,
|
15
|
+
:precision,
|
16
|
+
:grid
|
15
17
|
|
16
|
-
def initialize user_grid, precision = nil
|
17
|
-
@user_grid
|
18
|
-
@precision
|
19
|
-
@grid
|
18
|
+
def initialize user_grid, anchor = 0.0, precision = nil
|
19
|
+
@user_grid = user_grid
|
20
|
+
@precision = precision || 1
|
21
|
+
@grid = @user_grid / (@precision + 0.5)
|
22
|
+
|
23
|
+
self.anchor = anchor
|
20
24
|
end
|
21
25
|
|
22
|
-
def
|
26
|
+
def anchor= value
|
23
27
|
# Add a margin of 1 user grid.
|
24
28
|
#
|
25
|
-
|
29
|
+
value -= @user_grid
|
26
30
|
|
27
31
|
# Add plus 1 grid so that the index key never falls on 0.
|
28
32
|
# Why? to_i maps by default to 0.
|
29
33
|
#
|
30
|
-
|
34
|
+
value -= @grid
|
31
35
|
|
32
|
-
@
|
36
|
+
@anchor = value
|
33
37
|
end
|
34
38
|
|
35
39
|
#
|
36
40
|
#
|
37
41
|
def add_margin length
|
38
|
-
@
|
42
|
+
@anchor -= length
|
39
43
|
end
|
40
44
|
|
41
45
|
#
|
42
46
|
#
|
43
|
-
def
|
44
|
-
range
|
47
|
+
def calculated_range location
|
48
|
+
range calculate(location)
|
45
49
|
end
|
46
50
|
#
|
47
51
|
#
|
@@ -50,8 +54,8 @@ module Picky
|
|
50
54
|
end
|
51
55
|
#
|
52
56
|
#
|
53
|
-
def
|
54
|
-
((location - @
|
57
|
+
def calculate location
|
58
|
+
((location - @anchor) / @grid).floor
|
55
59
|
end
|
56
60
|
|
57
61
|
end
|
data/lib/picky/categories.rb
CHANGED
@@ -40,7 +40,7 @@ module Picky
|
|
40
40
|
# Find a given category in the categories.
|
41
41
|
#
|
42
42
|
def [] category_name
|
43
|
-
category_name = category_name.
|
43
|
+
category_name = category_name.intern
|
44
44
|
category_hash[category_name] || raise_not_found(category_name)
|
45
45
|
end
|
46
46
|
def raise_not_found category_name
|
data/lib/picky/category.rb
CHANGED
@@ -4,7 +4,8 @@ module Picky
|
|
4
4
|
|
5
5
|
attr_reader :name,
|
6
6
|
:exact,
|
7
|
-
:partial
|
7
|
+
:partial,
|
8
|
+
:prepared
|
8
9
|
|
9
10
|
# Mandatory params:
|
10
11
|
# * name: Category name to use as identifier and file names.
|
@@ -20,6 +21,7 @@ module Picky
|
|
20
21
|
# * weights: Query::Weights.new( [:category1, :category2] => +2, ... )
|
21
22
|
# * tokenizer: Use a subclass of Tokenizers::Base that implements #tokens_for and #empty_tokens.
|
22
23
|
# * key_format: What this category's keys are formatted with (default is :to_i)
|
24
|
+
# * use_symbols: Whether to use symbols internally instead of strings.
|
23
25
|
#
|
24
26
|
def initialize name, index, options = {}
|
25
27
|
@name = name
|
@@ -31,6 +33,7 @@ module Picky
|
|
31
33
|
@from = options[:from]
|
32
34
|
@tokenizer = options[:tokenizer]
|
33
35
|
@key_format = options[:key_format]
|
36
|
+
# @symbols = options[:use_symbols] || index.use_symbols? # TODO Symbols.
|
34
37
|
@qualifiers = extract_qualifiers_from options
|
35
38
|
|
36
39
|
weights = options[:weights] || Generators::Weights::Default
|
@@ -46,6 +49,8 @@ module Picky
|
|
46
49
|
else
|
47
50
|
@partial = Bundle.new :partial, self, index.backend, weights, partial, no_similarity, options
|
48
51
|
end
|
52
|
+
|
53
|
+
@prepared = Backends::Memory::Text.new prepared_index_path
|
49
54
|
end
|
50
55
|
|
51
56
|
# Indexes and reloads the category.
|
@@ -58,6 +63,7 @@ module Picky
|
|
58
63
|
def dump
|
59
64
|
exact.dump
|
60
65
|
partial.dump
|
66
|
+
timed_exclaim %Q{"#{identifier}": Generated -> #{index_directory.gsub("#{PICKY_ROOT}/", '')}.}
|
61
67
|
end
|
62
68
|
|
63
69
|
# Index name.
|
@@ -24,9 +24,12 @@ module Picky
|
|
24
24
|
indexer.index [self]
|
25
25
|
end
|
26
26
|
end
|
27
|
+
|
28
|
+
# Empty all the indexes.
|
29
|
+
#
|
27
30
|
def empty
|
28
31
|
exact.empty
|
29
|
-
partial.
|
32
|
+
partial.empty
|
30
33
|
end
|
31
34
|
|
32
35
|
# Take a data snapshot if the source offers it.
|
@@ -44,22 +47,17 @@ module Picky
|
|
44
47
|
# Generates all caches for this category.
|
45
48
|
#
|
46
49
|
def cache
|
47
|
-
|
48
|
-
|
49
|
-
generate_caches_from_memory
|
50
|
+
empty
|
51
|
+
retrieve
|
50
52
|
dump
|
51
|
-
|
53
|
+
clear_realtime_mapping # TODO To call or not to call, that is the question.
|
52
54
|
end
|
53
|
-
|
55
|
+
|
56
|
+
# Retrieves the prepared index data into the indexes and
|
57
|
+
# generates the necessary derived indexes.
|
54
58
|
#
|
55
|
-
def
|
56
|
-
|
57
|
-
end
|
58
|
-
def generate_partial
|
59
|
-
partial.generate_partial_from exact.inverted
|
60
|
-
end
|
61
|
-
def generate_caches_from_memory
|
62
|
-
partial.generate_caches_from_memory
|
59
|
+
def retrieve
|
60
|
+
prepared.retrieve { |id, token| add_tokenized_token id, token, :<< }
|
63
61
|
end
|
64
62
|
|
65
63
|
# Return an appropriate source.
|
@@ -80,12 +78,14 @@ module Picky
|
|
80
78
|
|
81
79
|
# Return the key format.
|
82
80
|
#
|
83
|
-
# If
|
84
|
-
#
|
81
|
+
# If no key_format is defined on the category
|
82
|
+
# and the source has no key format, ask
|
85
83
|
# the index for one.
|
86
84
|
#
|
85
|
+
# Default is to_i.
|
86
|
+
#
|
87
87
|
def key_format
|
88
|
-
source.respond_to?(:key_format) && source.key_format || @key_format ||
|
88
|
+
@key_format ||= source.respond_to?(:key_format) && source.key_format || @index.key_format || :to_i
|
89
89
|
end
|
90
90
|
|
91
91
|
# Where the data is taken from.
|
@@ -13,29 +13,41 @@ module Picky
|
|
13
13
|
# Adds and indexes this category of the
|
14
14
|
# given object.
|
15
15
|
#
|
16
|
-
def add object
|
16
|
+
def add object, where = :unshift
|
17
17
|
tokens, _ = tokenizer.tokenize object.send(from)
|
18
|
-
add_tokenized object.id, tokens
|
18
|
+
add_tokenized object.id, tokens, where
|
19
19
|
end
|
20
20
|
|
21
21
|
# Removes the object's id, and then
|
22
22
|
# adds it again.
|
23
23
|
#
|
24
|
-
def replace object
|
24
|
+
def replace object, where = :unshift
|
25
25
|
remove object.id
|
26
|
-
add object
|
26
|
+
add object, where
|
27
27
|
end
|
28
28
|
|
29
29
|
# For the given id, adds the list of
|
30
30
|
# strings to the index for the given id.
|
31
31
|
#
|
32
|
-
def add_tokenized id, tokens
|
33
|
-
tokens.each
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
32
|
+
def add_tokenized id, tokens, where = :unshift
|
33
|
+
tokens.each { |text| add_tokenized_token id, text, where }
|
34
|
+
end
|
35
|
+
|
36
|
+
#
|
37
|
+
#
|
38
|
+
def add_tokenized_token id, text, where = :unshift
|
39
|
+
return unless text
|
40
|
+
id = id.send key_format # TODO Speed this up!
|
41
|
+
# text = text.to_sym if @symbols # TODO Symbols.
|
42
|
+
exact.add id, text, where
|
43
|
+
partial.add_partialized id, text, where
|
44
|
+
end
|
45
|
+
|
46
|
+
# Clears the realtime mapping.
|
47
|
+
#
|
48
|
+
def clear_realtime_mapping
|
49
|
+
exact.clear_realtime_mapping
|
50
|
+
partial.clear_realtime_mapping
|
39
51
|
end
|
40
52
|
|
41
53
|
end
|