picky 0.10.5 → 0.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/picky/alias_instances.rb +1 -0
- data/lib/picky/application.rb +6 -7
- data/lib/picky/bundle.rb +31 -0
- data/lib/picky/configuration/indexes.rb +30 -41
- data/lib/picky/configuration/type.rb +6 -40
- data/lib/picky/ext/maybe_compile.rb +9 -0
- data/lib/picky/index/bundle.rb +1 -139
- data/lib/picky/{query/combinator.rb → index/categories.rb} +16 -18
- data/lib/picky/index/category.rb +20 -46
- data/lib/picky/index/type.rb +16 -12
- data/lib/picky/index/types.rb +41 -0
- data/lib/picky/index/wrappers/exact_first.rb +5 -1
- data/lib/picky/indexers/base.rb +9 -8
- data/lib/picky/indexing/bundle.rb +152 -0
- data/lib/picky/indexing/categories.rb +36 -0
- data/lib/picky/indexing/category.rb +145 -0
- data/lib/picky/indexing/type.rb +45 -0
- data/lib/picky/indexing/types.rb +74 -0
- data/lib/picky/loader.rb +17 -7
- data/lib/picky/query/base.rb +5 -4
- data/lib/picky/sources/wrappers/base.rb +23 -0
- data/lib/picky/sources/wrappers/location.rb +92 -0
- data/lib/picky/tokenizers/index.rb +4 -1
- data/lib/picky/type.rb +46 -0
- data/lib/picky/types.rb +38 -0
- data/lib/tasks/index.rake +4 -0
- data/project_prototype/Gemfile +1 -1
- data/project_prototype/app/application.rb +12 -12
- data/spec/lib/application_spec.rb +6 -9
- data/spec/lib/configuration/indexes_spec.rb +0 -85
- data/spec/lib/index/bundle_spec.rb +2 -94
- data/spec/lib/index/category_spec.rb +7 -86
- data/spec/lib/index/type_spec.rb +14 -26
- data/spec/lib/index/wrappers/exact_first_spec.rb +12 -12
- data/spec/lib/{index → indexing}/bundle_partial_generation_speed_spec.rb +2 -2
- data/spec/lib/indexing/bundle_spec.rb +174 -0
- data/spec/lib/{query/combinator_spec.rb → indexing/categories_spec.rb} +30 -34
- data/spec/lib/indexing/category_spec.rb +257 -0
- data/spec/lib/indexing/type_spec.rb +32 -0
- data/spec/lib/loader_spec.rb +0 -2
- data/spec/lib/query/base_spec.rb +8 -17
- data/spec/lib/query/full_spec.rb +3 -6
- data/spec/lib/query/live_spec.rb +4 -3
- data/spec/lib/sources/wrappers/base_spec.rb +35 -0
- data/spec/lib/sources/wrappers/location_spec.rb +68 -0
- data/spec/lib/tokenizers/index_spec.rb +2 -5
- metadata +32 -16
- data/lib/picky/configuration/field.rb +0 -73
- data/lib/picky/indexes.rb +0 -179
- data/lib/picky/initializers/ext.rb +0 -1
- data/spec/lib/configuration/field_spec.rb +0 -208
- data/spec/lib/configuration/type_spec.rb +0 -49
data/lib/picky/indexers/base.rb
CHANGED
@@ -7,22 +7,22 @@ module Indexers
|
|
7
7
|
#
|
8
8
|
class Base
|
9
9
|
|
10
|
-
def initialize type,
|
10
|
+
def initialize type, category
|
11
11
|
@type = type
|
12
|
-
@
|
12
|
+
@category = category
|
13
13
|
end
|
14
14
|
|
15
15
|
# Convenience method for getting the right Tokenizer.
|
16
16
|
#
|
17
17
|
def tokenizer
|
18
|
-
@
|
18
|
+
@category.tokenizer
|
19
19
|
end
|
20
20
|
# Convenience methods for user subclasses.
|
21
21
|
#
|
22
22
|
# TODO Duplicate code in Index::Files.
|
23
23
|
#
|
24
24
|
def search_index_file_name
|
25
|
-
@
|
25
|
+
@category.search_index_file_name
|
26
26
|
end
|
27
27
|
|
28
28
|
# Executes the specific strategy.
|
@@ -34,10 +34,10 @@ module Indexers
|
|
34
34
|
# Get the source where the data is taken from.
|
35
35
|
#
|
36
36
|
def source
|
37
|
-
@
|
37
|
+
@category.source || raise_no_source
|
38
38
|
end
|
39
39
|
def raise_no_source
|
40
|
-
raise NoSourceSpecifiedException.new "No source given for index:#{@type.name},
|
40
|
+
raise NoSourceSpecifiedException.new "No source given for index:#{@type.name}, category:#{@category.name}." # TODO field.identifier
|
41
41
|
end
|
42
42
|
|
43
43
|
# Selects the original id (indexed id) and a column to process. The column data is called "token".
|
@@ -54,8 +54,9 @@ module Indexers
|
|
54
54
|
#
|
55
55
|
File.open(search_index_file_name, 'w:binary') do |file|
|
56
56
|
result = []
|
57
|
-
source.harvest(@type, @
|
57
|
+
source.harvest(@type, @category) do |indexed_id, text|
|
58
58
|
tokenizer.tokenize(text).each do |token_text|
|
59
|
+
next unless token_text
|
59
60
|
result << indexed_id << comma << token_text << newline
|
60
61
|
end
|
61
62
|
file.write(result.join) && result.clear if result.size > 100_000
|
@@ -65,7 +66,7 @@ module Indexers
|
|
65
66
|
end
|
66
67
|
|
67
68
|
def indexing_message
|
68
|
-
timed_exclaim "INDEX #{@type.name} #{@
|
69
|
+
timed_exclaim "INDEX #{@type.name} #{@category.name}" #:#{@category.indexed_as}." # TODO field.identifier
|
69
70
|
end
|
70
71
|
|
71
72
|
end
|
@@ -0,0 +1,152 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
#
|
3
|
+
module Indexing
|
4
|
+
|
5
|
+
# This is the indexing bundle.
|
6
|
+
# It does all menial tasks that have nothing to do
|
7
|
+
# with the actual index running etc.
|
8
|
+
#
|
9
|
+
# TODO Superclass?
|
10
|
+
#
|
11
|
+
class Bundle < ::Bundle
|
12
|
+
|
13
|
+
attr_accessor :partial_strategy, :weights_strategy
|
14
|
+
attr_reader :files
|
15
|
+
|
16
|
+
# Path is in which directory the cache is located.
|
17
|
+
#
|
18
|
+
def initialize name, category, type, similarity_strategy, partial_strategy, weights_strategy
|
19
|
+
super name, category, type, similarity_strategy
|
20
|
+
|
21
|
+
@partial_strategy = partial_strategy
|
22
|
+
@weights_strategy = weights_strategy
|
23
|
+
end
|
24
|
+
|
25
|
+
# Generation
|
26
|
+
#
|
27
|
+
|
28
|
+
# This method
|
29
|
+
# * loads the base index from the db
|
30
|
+
# * generates derived indexes
|
31
|
+
# * dumps all the indexes into files
|
32
|
+
#
|
33
|
+
def generate_caches_from_source
|
34
|
+
load_from_index_file
|
35
|
+
generate_caches_from_memory
|
36
|
+
end
|
37
|
+
# Generates derived indexes from the index and dumps.
|
38
|
+
#
|
39
|
+
# Note: assumes that there is something in the index
|
40
|
+
#
|
41
|
+
def generate_caches_from_memory
|
42
|
+
cache_from_memory_generation_message
|
43
|
+
generate_derived
|
44
|
+
end
|
45
|
+
def cache_from_memory_generation_message
|
46
|
+
timed_exclaim "CACHE FROM MEMORY #{identifier}."
|
47
|
+
end
|
48
|
+
|
49
|
+
# Generates the weights and similarity from the main index.
|
50
|
+
#
|
51
|
+
def generate_derived
|
52
|
+
generate_weights
|
53
|
+
generate_similarity
|
54
|
+
end
|
55
|
+
|
56
|
+
# Load the data from the db.
|
57
|
+
#
|
58
|
+
def load_from_index_file
|
59
|
+
load_from_index_generation_message
|
60
|
+
clear
|
61
|
+
retrieve
|
62
|
+
end
|
63
|
+
def load_from_index_generation_message
|
64
|
+
timed_exclaim "LOAD INDEX #{identifier}."
|
65
|
+
end
|
66
|
+
# Retrieves the data into the index.
|
67
|
+
#
|
68
|
+
def retrieve
|
69
|
+
files.retrieve do |id, token|
|
70
|
+
initialize_index_for token
|
71
|
+
index[token] << id
|
72
|
+
end
|
73
|
+
end
|
74
|
+
def initialize_index_for token
|
75
|
+
index[token] ||= []
|
76
|
+
end
|
77
|
+
|
78
|
+
# Generators.
|
79
|
+
#
|
80
|
+
# TODO Move somewhere more fitting.
|
81
|
+
#
|
82
|
+
|
83
|
+
# Generates a new index (writes its index) using the
|
84
|
+
# given partial caching strategy.
|
85
|
+
#
|
86
|
+
def generate_partial
|
87
|
+
generator = Cacher::PartialGenerator.new self.index
|
88
|
+
self.index = generator.generate self.partial_strategy
|
89
|
+
end
|
90
|
+
def generate_partial_from exact_index
|
91
|
+
timed_exclaim "PARTIAL GENERATE #{identifier}."
|
92
|
+
self.index = exact_index
|
93
|
+
self.generate_partial
|
94
|
+
self
|
95
|
+
end
|
96
|
+
# Generates a new similarity index (writes its index) using the
|
97
|
+
# given similarity caching strategy.
|
98
|
+
#
|
99
|
+
def generate_similarity
|
100
|
+
generator = Cacher::SimilarityGenerator.new self.index
|
101
|
+
self.similarity = generator.generate self.similarity_strategy
|
102
|
+
end
|
103
|
+
# Generates a new weights index (writes its index) using the
|
104
|
+
# given weight caching strategy.
|
105
|
+
#
|
106
|
+
def generate_weights
|
107
|
+
generator = Cacher::WeightsGenerator.new self.index
|
108
|
+
self.weights = generator.generate self.weights_strategy
|
109
|
+
end
|
110
|
+
|
111
|
+
# Saves the index in a dump file.
|
112
|
+
#
|
113
|
+
def dump
|
114
|
+
dump_index
|
115
|
+
dump_similarity
|
116
|
+
dump_weights
|
117
|
+
end
|
118
|
+
def dump_index
|
119
|
+
timed_exclaim "DUMP INDEX #{identifier}."
|
120
|
+
files.dump_index index
|
121
|
+
end
|
122
|
+
def dump_similarity
|
123
|
+
timed_exclaim "DUMP SIMILARITY #{identifier}."
|
124
|
+
files.dump_similarity similarity
|
125
|
+
end
|
126
|
+
def dump_weights
|
127
|
+
timed_exclaim "DUMP WEIGHTS #{identifier}."
|
128
|
+
files.dump_weights weights
|
129
|
+
end
|
130
|
+
|
131
|
+
# Alerts the user if an index is missing.
|
132
|
+
#
|
133
|
+
def raise_unless_cache_exists
|
134
|
+
warn_cache_small :index if files.index_cache_small?
|
135
|
+
warn_cache_small :similarity if files.similarity_cache_small?
|
136
|
+
warn_cache_small :weights if files.weights_cache_small?
|
137
|
+
|
138
|
+
raise_cache_missing :index unless files.index_cache_ok?
|
139
|
+
raise_cache_missing :similarity unless files.similarity_cache_ok?
|
140
|
+
raise_cache_missing :weights unless files.weights_cache_ok?
|
141
|
+
end
|
142
|
+
def warn_cache_small what
|
143
|
+
puts "#{what} cache for #{identifier} smaller than 16 bytes."
|
144
|
+
end
|
145
|
+
# Raises an appropriate error message.
|
146
|
+
#
|
147
|
+
def raise_cache_missing what
|
148
|
+
raise "#{what} cache for #{identifier} missing."
|
149
|
+
end
|
150
|
+
|
151
|
+
end
|
152
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
module Indexing
|
2
|
+
|
3
|
+
class Categories
|
4
|
+
|
5
|
+
attr_reader :categories
|
6
|
+
|
7
|
+
each_delegate :index,
|
8
|
+
:cache,
|
9
|
+
:generate_caches,
|
10
|
+
:backup_caches,
|
11
|
+
:restore_caches,
|
12
|
+
:check_caches,
|
13
|
+
:clear_caches,
|
14
|
+
:create_directory_structure,
|
15
|
+
:to => :categories
|
16
|
+
|
17
|
+
def initialize
|
18
|
+
@categories = []
|
19
|
+
end
|
20
|
+
|
21
|
+
def << category
|
22
|
+
categories << category
|
23
|
+
end
|
24
|
+
|
25
|
+
def find category_name
|
26
|
+
category_name = category_name.to_sym
|
27
|
+
|
28
|
+
categories.each do |category|
|
29
|
+
next unless category.name == category_name
|
30
|
+
return category
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
35
|
+
|
36
|
+
end
|
@@ -0,0 +1,145 @@
|
|
1
|
+
module Indexing
|
2
|
+
|
3
|
+
class Category
|
4
|
+
|
5
|
+
attr_reader :name, :type, :indexed_as, :virtual, :tokenizer, :source, :exact, :partial
|
6
|
+
|
7
|
+
# TODO Dup the options?
|
8
|
+
#
|
9
|
+
def initialize name, type, options = {}
|
10
|
+
@name = name
|
11
|
+
@type = type
|
12
|
+
|
13
|
+
@source = options[:source]
|
14
|
+
|
15
|
+
@tokenizer = options[:tokenizer] || Tokenizers::Index.default
|
16
|
+
@indexer_class = options[:indexer] || Indexers::Default
|
17
|
+
@indexed_as = options[:as] || name
|
18
|
+
@virtual = options[:virtual] || false # TODO What is this again?
|
19
|
+
|
20
|
+
# TODO Push into Bundle.
|
21
|
+
#
|
22
|
+
partial = options[:partial] || Cacher::Partial::Default
|
23
|
+
weights = options[:weights] || Cacher::Weights::Default
|
24
|
+
similarity = options[:similarity] || Cacher::Similarity::Default
|
25
|
+
|
26
|
+
@exact = options[:exact_indexing_bundle] || Bundle.new(:exact, self, type, similarity, Cacher::Partial::None.new, weights)
|
27
|
+
@partial = options[:partial_indexing_bundle] || Bundle.new(:partial, self, type, Cacher::Similarity::None.new, partial, weights)
|
28
|
+
|
29
|
+
# @remove = options[:remove] || false
|
30
|
+
# @filter = options[:filter] || true
|
31
|
+
|
32
|
+
@options = options # TODO Remove?
|
33
|
+
end
|
34
|
+
|
35
|
+
# TODO Move to initializer?
|
36
|
+
#
|
37
|
+
def identifier
|
38
|
+
@identifier ||= "#{type.name} #{name}"
|
39
|
+
end
|
40
|
+
|
41
|
+
# Note: Most of the time the source of the type is used.
|
42
|
+
#
|
43
|
+
def source
|
44
|
+
@source || type.source
|
45
|
+
end
|
46
|
+
|
47
|
+
# TODO Spec.
|
48
|
+
#
|
49
|
+
def backup_caches
|
50
|
+
timed_exclaim "Backing up #{identifier}."
|
51
|
+
exact.backup
|
52
|
+
partial.backup
|
53
|
+
end
|
54
|
+
def restore_caches
|
55
|
+
timed_exclaim "Restoring #{identifier}."
|
56
|
+
exact.restore
|
57
|
+
partial.restore
|
58
|
+
end
|
59
|
+
def check_caches
|
60
|
+
timed_exclaim "Checking #{identifier}."
|
61
|
+
exact.raise_unless_cache_exists
|
62
|
+
partial.raise_unless_cache_exists
|
63
|
+
end
|
64
|
+
def clear_caches
|
65
|
+
timed_exclaim "Deleting #{identifier}."
|
66
|
+
exact.delete
|
67
|
+
partial.delete
|
68
|
+
end
|
69
|
+
def create_directory_structure
|
70
|
+
timed_exclaim "Creating directory structure for #{identifier}."
|
71
|
+
exact.create_directory
|
72
|
+
partial.create_directory
|
73
|
+
end
|
74
|
+
|
75
|
+
# Used for testing.
|
76
|
+
#
|
77
|
+
# TODO Remove?
|
78
|
+
#
|
79
|
+
def generate_indexes_from_exact_index
|
80
|
+
generate_derived_exact
|
81
|
+
generate_partial
|
82
|
+
generate_derived_partial
|
83
|
+
end
|
84
|
+
def generate_derived_exact
|
85
|
+
exact.generate_derived
|
86
|
+
end
|
87
|
+
def generate_derived_partial
|
88
|
+
partial.generate_derived
|
89
|
+
end
|
90
|
+
|
91
|
+
# Generates all caches for this category.
|
92
|
+
#
|
93
|
+
def cache
|
94
|
+
prepare_cache_directory
|
95
|
+
generate_caches
|
96
|
+
end
|
97
|
+
def generate_caches
|
98
|
+
generate_caches_from_source
|
99
|
+
generate_partial
|
100
|
+
generate_caches_from_memory
|
101
|
+
dump_caches
|
102
|
+
timed_exclaim "CACHE FINISHED #{identifier}."
|
103
|
+
end
|
104
|
+
def generate_caches_from_source
|
105
|
+
exact.generate_caches_from_source
|
106
|
+
end
|
107
|
+
def generate_partial
|
108
|
+
partial.generate_partial_from exact.index
|
109
|
+
end
|
110
|
+
def generate_caches_from_memory
|
111
|
+
partial.generate_caches_from_memory
|
112
|
+
end
|
113
|
+
def dump_caches
|
114
|
+
exact.dump
|
115
|
+
partial.dump
|
116
|
+
end
|
117
|
+
|
118
|
+
# TODO Partially move to type. Duplicate Code in indexers/field.rb.
|
119
|
+
#
|
120
|
+
def search_index_root
|
121
|
+
File.join PICKY_ROOT, 'index'
|
122
|
+
end
|
123
|
+
def cache_directory
|
124
|
+
File.join search_index_root, PICKY_ENVIRONMENT, type.name.to_s
|
125
|
+
end
|
126
|
+
def search_index_file_name
|
127
|
+
File.join cache_directory, "prepared_#{name}_index.txt"
|
128
|
+
end
|
129
|
+
def index
|
130
|
+
prepare_cache_directory
|
131
|
+
indexer.index
|
132
|
+
end
|
133
|
+
def prepare_cache_directory
|
134
|
+
FileUtils.mkdir_p cache_directory
|
135
|
+
end
|
136
|
+
def indexer
|
137
|
+
@indexer || @indexer = @indexer_class.new(type, self)
|
138
|
+
end
|
139
|
+
def virtual?
|
140
|
+
!!virtual
|
141
|
+
end
|
142
|
+
|
143
|
+
end
|
144
|
+
|
145
|
+
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
module Indexing
|
2
|
+
|
3
|
+
class Type
|
4
|
+
|
5
|
+
attr_reader :name, :source, :categories, :after_indexing
|
6
|
+
|
7
|
+
# Delegators for indexing.
|
8
|
+
#
|
9
|
+
delegate :connect_backend,
|
10
|
+
:to => :source
|
11
|
+
|
12
|
+
delegate :index,
|
13
|
+
:cache,
|
14
|
+
:generate_caches,
|
15
|
+
:backup_caches,
|
16
|
+
:restore_caches,
|
17
|
+
:check_caches,
|
18
|
+
:clear_caches,
|
19
|
+
:create_directory_structure,
|
20
|
+
:to => :categories
|
21
|
+
|
22
|
+
def initialize name, source, options = {}
|
23
|
+
@name = name
|
24
|
+
@source = source
|
25
|
+
|
26
|
+
@after_indexing = options[:after_indexing]
|
27
|
+
|
28
|
+
@categories = Categories.new
|
29
|
+
end
|
30
|
+
|
31
|
+
# TODO Spec. Doc.
|
32
|
+
#
|
33
|
+
def add_category name, options = {}
|
34
|
+
categories << Category.new(name, self, options)
|
35
|
+
end
|
36
|
+
|
37
|
+
# Indexing.
|
38
|
+
#
|
39
|
+
def take_snapshot
|
40
|
+
source.take_snapshot self
|
41
|
+
end
|
42
|
+
|
43
|
+
end
|
44
|
+
|
45
|
+
end
|
@@ -0,0 +1,74 @@
|
|
1
|
+
module Indexing
|
2
|
+
|
3
|
+
class Types
|
4
|
+
|
5
|
+
attr_reader :types
|
6
|
+
|
7
|
+
each_delegate :take_snapshot,
|
8
|
+
:generate_caches,
|
9
|
+
:backup_caches,
|
10
|
+
:restore_caches,
|
11
|
+
:check_caches,
|
12
|
+
:clear_caches,
|
13
|
+
:create_directory_structure,
|
14
|
+
:to => :types
|
15
|
+
|
16
|
+
def initialize
|
17
|
+
clear
|
18
|
+
end
|
19
|
+
|
20
|
+
# TODO Spec.
|
21
|
+
#
|
22
|
+
def clear
|
23
|
+
@types = []
|
24
|
+
end
|
25
|
+
|
26
|
+
# TODO Spec. Superclass?
|
27
|
+
#
|
28
|
+
def register type
|
29
|
+
self.types << type
|
30
|
+
end
|
31
|
+
|
32
|
+
# Runs the indexers in parallel (index + cache).
|
33
|
+
#
|
34
|
+
# TODO Spec.
|
35
|
+
#
|
36
|
+
def index randomly = true
|
37
|
+
take_snapshot
|
38
|
+
|
39
|
+
# Run in parallel.
|
40
|
+
#
|
41
|
+
timed_exclaim "INDEXING USING #{Cores.max_processors} PROCESSORS, IN #{randomly ? 'RANDOM' : 'GIVEN'} ORDER."
|
42
|
+
Cores.forked self.types, { randomly: randomly } do |type|
|
43
|
+
type.index
|
44
|
+
type.cache
|
45
|
+
end
|
46
|
+
timed_exclaim "INDEXING FINISHED."
|
47
|
+
end
|
48
|
+
|
49
|
+
# TODO Spec
|
50
|
+
#
|
51
|
+
def generate_index_only type_name, field_name
|
52
|
+
found = find type_name, field_name
|
53
|
+
found.index if found
|
54
|
+
end
|
55
|
+
def generate_cache_only type_name, category_name
|
56
|
+
found = find type_name, field_name
|
57
|
+
found.generate_caches if found
|
58
|
+
end
|
59
|
+
|
60
|
+
# TODO Spec
|
61
|
+
#
|
62
|
+
def find type_name, category_name
|
63
|
+
type_name = type_name.to_sym
|
64
|
+
|
65
|
+
types.each do |type|
|
66
|
+
next unless type.name == type_name
|
67
|
+
|
68
|
+
found = type.categories.find category_name
|
69
|
+
return found if found
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
end
|
74
|
+
end
|
data/lib/picky/loader.rb
CHANGED
@@ -84,7 +84,7 @@ module Loader
|
|
84
84
|
def self.load_framework
|
85
85
|
# Load compiled C code.
|
86
86
|
#
|
87
|
-
require_relative '
|
87
|
+
require_relative 'ext/maybe_compile'
|
88
88
|
|
89
89
|
# Load extensions.
|
90
90
|
#
|
@@ -166,9 +166,23 @@ module Loader
|
|
166
166
|
|
167
167
|
# Index types.
|
168
168
|
#
|
169
|
+
load_relative 'bundle'
|
170
|
+
|
171
|
+
load_relative 'indexing/bundle'
|
172
|
+
load_relative 'indexing/category'
|
173
|
+
load_relative 'indexing/categories'
|
174
|
+
load_relative 'indexing/type'
|
175
|
+
load_relative 'indexing/types'
|
176
|
+
|
169
177
|
load_relative 'index/bundle'
|
170
178
|
load_relative 'index/category'
|
179
|
+
load_relative 'index/categories'
|
171
180
|
load_relative 'index/type'
|
181
|
+
load_relative 'index/types'
|
182
|
+
|
183
|
+
load_relative 'types'
|
184
|
+
load_relative 'alias_instances'
|
185
|
+
load_relative 'type'
|
172
186
|
|
173
187
|
load_relative 'index/wrappers/exact_first'
|
174
188
|
|
@@ -193,7 +207,6 @@ module Loader
|
|
193
207
|
|
194
208
|
load_relative 'query/qualifiers'
|
195
209
|
load_relative 'query/weigher'
|
196
|
-
load_relative 'query/combinator'
|
197
210
|
|
198
211
|
load_relative 'query/weights'
|
199
212
|
|
@@ -219,14 +232,11 @@ module Loader
|
|
219
232
|
load_relative 'sources/delicious'
|
220
233
|
load_relative 'sources/couch'
|
221
234
|
|
222
|
-
|
223
|
-
|
224
|
-
load_relative 'indexes'
|
235
|
+
load_relative 'sources/wrappers/base'
|
236
|
+
load_relative 'sources/wrappers/location'
|
225
237
|
|
226
238
|
# Configuration.
|
227
239
|
#
|
228
|
-
load_relative 'configuration/field'
|
229
|
-
load_relative 'configuration/type'
|
230
240
|
load_relative 'configuration/indexes'
|
231
241
|
|
232
242
|
# ... in Application.
|
data/lib/picky/query/base.rb
CHANGED
@@ -17,10 +17,11 @@ module Query
|
|
17
17
|
# * tokenizer: Tokenizers::Query.default by default.
|
18
18
|
# * weights: A hash of weights, or a Query::Weights object.
|
19
19
|
#
|
20
|
-
def initialize *
|
21
|
-
options = Hash ===
|
22
|
-
|
23
|
-
|
20
|
+
def initialize *index_type_definitions
|
21
|
+
options = Hash === index_type_definitions.last ? index_type_definitions.pop : {}
|
22
|
+
indexes = index_type_definitions.map &:index
|
23
|
+
|
24
|
+
@weigher = options[:weigher] || Weigher.new(indexes)
|
24
25
|
@tokenizer = options[:tokenizer] || Tokenizers::Query.default
|
25
26
|
weights = options[:weights] || Weights.new
|
26
27
|
@weights = Hash === weights ? Weights.new(weights) : weights
|
@@ -0,0 +1,23 @@
|
|
1
|
+
module Sources
|
2
|
+
|
3
|
+
module Wrappers
|
4
|
+
|
5
|
+
class Base
|
6
|
+
|
7
|
+
attr_reader :backend
|
8
|
+
|
9
|
+
# Wraps a backend
|
10
|
+
#
|
11
|
+
def initialize backend
|
12
|
+
@backend = backend
|
13
|
+
end
|
14
|
+
|
15
|
+
# Default is delegation for all methods
|
16
|
+
#
|
17
|
+
delegate :harvest, :connect_backend, :take_snapshot, :to => :backend
|
18
|
+
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
22
|
+
|
23
|
+
end
|
@@ -0,0 +1,92 @@
|
|
1
|
+
module Sources
|
2
|
+
|
3
|
+
module Wrappers
|
4
|
+
|
5
|
+
class Location < Base
|
6
|
+
|
7
|
+
attr_reader :precision, :grid
|
8
|
+
|
9
|
+
# TODO Save min and grid!
|
10
|
+
#
|
11
|
+
def initialize backend, options = {}
|
12
|
+
super backend
|
13
|
+
|
14
|
+
@user_grid = extract_user_grid options
|
15
|
+
@precision = extract_precision options
|
16
|
+
|
17
|
+
@grid = @user_grid / (@precision + 0.5)
|
18
|
+
end
|
19
|
+
|
20
|
+
#
|
21
|
+
#
|
22
|
+
def extract_user_grid options
|
23
|
+
options[:grid] || raise # TODO
|
24
|
+
end
|
25
|
+
# Extracts an amount of grids that this
|
26
|
+
# Precision is given in a value.
|
27
|
+
# 1 is low (up to 16.6% error), 5 is very high (up to 5% error).
|
28
|
+
#
|
29
|
+
# We don't recommend using values higher than 5.
|
30
|
+
#
|
31
|
+
# Default is 1.
|
32
|
+
#
|
33
|
+
def extract_precision options
|
34
|
+
options[:precision] || 1
|
35
|
+
end
|
36
|
+
|
37
|
+
def reset
|
38
|
+
@min = 1.0/0
|
39
|
+
end
|
40
|
+
|
41
|
+
# Yield the data (id, text for id) for the given type and field.
|
42
|
+
#
|
43
|
+
def harvest type, field
|
44
|
+
reset
|
45
|
+
|
46
|
+
# Cache. TODO Make option?
|
47
|
+
#
|
48
|
+
locations = []
|
49
|
+
|
50
|
+
# Gather min/max.
|
51
|
+
#
|
52
|
+
backend.harvest type, field do |indexed_id, location|
|
53
|
+
location = location.to_f
|
54
|
+
@min = location if location < @min
|
55
|
+
locations << [indexed_id, location]
|
56
|
+
end
|
57
|
+
|
58
|
+
# Add a margin.
|
59
|
+
#
|
60
|
+
marginize
|
61
|
+
|
62
|
+
# Recalculate locations.
|
63
|
+
#
|
64
|
+
locations.each do |indexed_id, location|
|
65
|
+
locations_for(location).each do |new_location|
|
66
|
+
yield indexed_id, new_location.to_s
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
def marginize
|
72
|
+
@min -= @user_grid
|
73
|
+
end
|
74
|
+
|
75
|
+
# Put location onto multiple places on a grid.
|
76
|
+
#
|
77
|
+
# Note: Always returns an integer.
|
78
|
+
#
|
79
|
+
def locations_for location
|
80
|
+
new_location = ((location - @min) / grid).floor
|
81
|
+
|
82
|
+
min_location = new_location - precision
|
83
|
+
max_location = new_location + precision
|
84
|
+
|
85
|
+
(min_location..max_location).to_a
|
86
|
+
end
|
87
|
+
|
88
|
+
end
|
89
|
+
|
90
|
+
end
|
91
|
+
|
92
|
+
end
|