picky 0.10.5 → 0.11.0
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/picky/alias_instances.rb +1 -0
- data/lib/picky/application.rb +6 -7
- data/lib/picky/bundle.rb +31 -0
- data/lib/picky/configuration/indexes.rb +30 -41
- data/lib/picky/configuration/type.rb +6 -40
- data/lib/picky/ext/maybe_compile.rb +9 -0
- data/lib/picky/index/bundle.rb +1 -139
- data/lib/picky/{query/combinator.rb → index/categories.rb} +16 -18
- data/lib/picky/index/category.rb +20 -46
- data/lib/picky/index/type.rb +16 -12
- data/lib/picky/index/types.rb +41 -0
- data/lib/picky/index/wrappers/exact_first.rb +5 -1
- data/lib/picky/indexers/base.rb +9 -8
- data/lib/picky/indexing/bundle.rb +152 -0
- data/lib/picky/indexing/categories.rb +36 -0
- data/lib/picky/indexing/category.rb +145 -0
- data/lib/picky/indexing/type.rb +45 -0
- data/lib/picky/indexing/types.rb +74 -0
- data/lib/picky/loader.rb +17 -7
- data/lib/picky/query/base.rb +5 -4
- data/lib/picky/sources/wrappers/base.rb +23 -0
- data/lib/picky/sources/wrappers/location.rb +92 -0
- data/lib/picky/tokenizers/index.rb +4 -1
- data/lib/picky/type.rb +46 -0
- data/lib/picky/types.rb +38 -0
- data/lib/tasks/index.rake +4 -0
- data/project_prototype/Gemfile +1 -1
- data/project_prototype/app/application.rb +12 -12
- data/spec/lib/application_spec.rb +6 -9
- data/spec/lib/configuration/indexes_spec.rb +0 -85
- data/spec/lib/index/bundle_spec.rb +2 -94
- data/spec/lib/index/category_spec.rb +7 -86
- data/spec/lib/index/type_spec.rb +14 -26
- data/spec/lib/index/wrappers/exact_first_spec.rb +12 -12
- data/spec/lib/{index → indexing}/bundle_partial_generation_speed_spec.rb +2 -2
- data/spec/lib/indexing/bundle_spec.rb +174 -0
- data/spec/lib/{query/combinator_spec.rb → indexing/categories_spec.rb} +30 -34
- data/spec/lib/indexing/category_spec.rb +257 -0
- data/spec/lib/indexing/type_spec.rb +32 -0
- data/spec/lib/loader_spec.rb +0 -2
- data/spec/lib/query/base_spec.rb +8 -17
- data/spec/lib/query/full_spec.rb +3 -6
- data/spec/lib/query/live_spec.rb +4 -3
- data/spec/lib/sources/wrappers/base_spec.rb +35 -0
- data/spec/lib/sources/wrappers/location_spec.rb +68 -0
- data/spec/lib/tokenizers/index_spec.rb +2 -5
- metadata +32 -16
- data/lib/picky/configuration/field.rb +0 -73
- data/lib/picky/indexes.rb +0 -179
- data/lib/picky/initializers/ext.rb +0 -1
- data/spec/lib/configuration/field_spec.rb +0 -208
- data/spec/lib/configuration/type_spec.rb +0 -49
data/lib/picky/indexers/base.rb
CHANGED
@@ -7,22 +7,22 @@ module Indexers
|
|
7
7
|
#
|
8
8
|
class Base
|
9
9
|
|
10
|
-
def initialize type,
|
10
|
+
def initialize type, category
|
11
11
|
@type = type
|
12
|
-
@
|
12
|
+
@category = category
|
13
13
|
end
|
14
14
|
|
15
15
|
# Convenience method for getting the right Tokenizer.
|
16
16
|
#
|
17
17
|
def tokenizer
|
18
|
-
@
|
18
|
+
@category.tokenizer
|
19
19
|
end
|
20
20
|
# Convenience methods for user subclasses.
|
21
21
|
#
|
22
22
|
# TODO Duplicate code in Index::Files.
|
23
23
|
#
|
24
24
|
def search_index_file_name
|
25
|
-
@
|
25
|
+
@category.search_index_file_name
|
26
26
|
end
|
27
27
|
|
28
28
|
# Executes the specific strategy.
|
@@ -34,10 +34,10 @@ module Indexers
|
|
34
34
|
# Get the source where the data is taken from.
|
35
35
|
#
|
36
36
|
def source
|
37
|
-
@
|
37
|
+
@category.source || raise_no_source
|
38
38
|
end
|
39
39
|
def raise_no_source
|
40
|
-
raise NoSourceSpecifiedException.new "No source given for index:#{@type.name},
|
40
|
+
raise NoSourceSpecifiedException.new "No source given for index:#{@type.name}, category:#{@category.name}." # TODO field.identifier
|
41
41
|
end
|
42
42
|
|
43
43
|
# Selects the original id (indexed id) and a column to process. The column data is called "token".
|
@@ -54,8 +54,9 @@ module Indexers
|
|
54
54
|
#
|
55
55
|
File.open(search_index_file_name, 'w:binary') do |file|
|
56
56
|
result = []
|
57
|
-
source.harvest(@type, @
|
57
|
+
source.harvest(@type, @category) do |indexed_id, text|
|
58
58
|
tokenizer.tokenize(text).each do |token_text|
|
59
|
+
next unless token_text
|
59
60
|
result << indexed_id << comma << token_text << newline
|
60
61
|
end
|
61
62
|
file.write(result.join) && result.clear if result.size > 100_000
|
@@ -65,7 +66,7 @@ module Indexers
|
|
65
66
|
end
|
66
67
|
|
67
68
|
def indexing_message
|
68
|
-
timed_exclaim "INDEX #{@type.name} #{@
|
69
|
+
timed_exclaim "INDEX #{@type.name} #{@category.name}" #:#{@category.indexed_as}." # TODO field.identifier
|
69
70
|
end
|
70
71
|
|
71
72
|
end
|
@@ -0,0 +1,152 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
#
|
3
|
+
module Indexing
|
4
|
+
|
5
|
+
# This is the indexing bundle.
|
6
|
+
# It does all menial tasks that have nothing to do
|
7
|
+
# with the actual index running etc.
|
8
|
+
#
|
9
|
+
# TODO Superclass?
|
10
|
+
#
|
11
|
+
class Bundle < ::Bundle
|
12
|
+
|
13
|
+
attr_accessor :partial_strategy, :weights_strategy
|
14
|
+
attr_reader :files
|
15
|
+
|
16
|
+
# Path is in which directory the cache is located.
|
17
|
+
#
|
18
|
+
def initialize name, category, type, similarity_strategy, partial_strategy, weights_strategy
|
19
|
+
super name, category, type, similarity_strategy
|
20
|
+
|
21
|
+
@partial_strategy = partial_strategy
|
22
|
+
@weights_strategy = weights_strategy
|
23
|
+
end
|
24
|
+
|
25
|
+
# Generation
|
26
|
+
#
|
27
|
+
|
28
|
+
# This method
|
29
|
+
# * loads the base index from the db
|
30
|
+
# * generates derived indexes
|
31
|
+
# * dumps all the indexes into files
|
32
|
+
#
|
33
|
+
def generate_caches_from_source
|
34
|
+
load_from_index_file
|
35
|
+
generate_caches_from_memory
|
36
|
+
end
|
37
|
+
# Generates derived indexes from the index and dumps.
|
38
|
+
#
|
39
|
+
# Note: assumes that there is something in the index
|
40
|
+
#
|
41
|
+
def generate_caches_from_memory
|
42
|
+
cache_from_memory_generation_message
|
43
|
+
generate_derived
|
44
|
+
end
|
45
|
+
def cache_from_memory_generation_message
|
46
|
+
timed_exclaim "CACHE FROM MEMORY #{identifier}."
|
47
|
+
end
|
48
|
+
|
49
|
+
# Generates the weights and similarity from the main index.
|
50
|
+
#
|
51
|
+
def generate_derived
|
52
|
+
generate_weights
|
53
|
+
generate_similarity
|
54
|
+
end
|
55
|
+
|
56
|
+
# Load the data from the db.
|
57
|
+
#
|
58
|
+
def load_from_index_file
|
59
|
+
load_from_index_generation_message
|
60
|
+
clear
|
61
|
+
retrieve
|
62
|
+
end
|
63
|
+
def load_from_index_generation_message
|
64
|
+
timed_exclaim "LOAD INDEX #{identifier}."
|
65
|
+
end
|
66
|
+
# Retrieves the data into the index.
|
67
|
+
#
|
68
|
+
def retrieve
|
69
|
+
files.retrieve do |id, token|
|
70
|
+
initialize_index_for token
|
71
|
+
index[token] << id
|
72
|
+
end
|
73
|
+
end
|
74
|
+
def initialize_index_for token
|
75
|
+
index[token] ||= []
|
76
|
+
end
|
77
|
+
|
78
|
+
# Generators.
|
79
|
+
#
|
80
|
+
# TODO Move somewhere more fitting.
|
81
|
+
#
|
82
|
+
|
83
|
+
# Generates a new index (writes its index) using the
|
84
|
+
# given partial caching strategy.
|
85
|
+
#
|
86
|
+
def generate_partial
|
87
|
+
generator = Cacher::PartialGenerator.new self.index
|
88
|
+
self.index = generator.generate self.partial_strategy
|
89
|
+
end
|
90
|
+
def generate_partial_from exact_index
|
91
|
+
timed_exclaim "PARTIAL GENERATE #{identifier}."
|
92
|
+
self.index = exact_index
|
93
|
+
self.generate_partial
|
94
|
+
self
|
95
|
+
end
|
96
|
+
# Generates a new similarity index (writes its index) using the
|
97
|
+
# given similarity caching strategy.
|
98
|
+
#
|
99
|
+
def generate_similarity
|
100
|
+
generator = Cacher::SimilarityGenerator.new self.index
|
101
|
+
self.similarity = generator.generate self.similarity_strategy
|
102
|
+
end
|
103
|
+
# Generates a new weights index (writes its index) using the
|
104
|
+
# given weight caching strategy.
|
105
|
+
#
|
106
|
+
def generate_weights
|
107
|
+
generator = Cacher::WeightsGenerator.new self.index
|
108
|
+
self.weights = generator.generate self.weights_strategy
|
109
|
+
end
|
110
|
+
|
111
|
+
# Saves the index in a dump file.
|
112
|
+
#
|
113
|
+
def dump
|
114
|
+
dump_index
|
115
|
+
dump_similarity
|
116
|
+
dump_weights
|
117
|
+
end
|
118
|
+
def dump_index
|
119
|
+
timed_exclaim "DUMP INDEX #{identifier}."
|
120
|
+
files.dump_index index
|
121
|
+
end
|
122
|
+
def dump_similarity
|
123
|
+
timed_exclaim "DUMP SIMILARITY #{identifier}."
|
124
|
+
files.dump_similarity similarity
|
125
|
+
end
|
126
|
+
def dump_weights
|
127
|
+
timed_exclaim "DUMP WEIGHTS #{identifier}."
|
128
|
+
files.dump_weights weights
|
129
|
+
end
|
130
|
+
|
131
|
+
# Alerts the user if an index is missing.
|
132
|
+
#
|
133
|
+
def raise_unless_cache_exists
|
134
|
+
warn_cache_small :index if files.index_cache_small?
|
135
|
+
warn_cache_small :similarity if files.similarity_cache_small?
|
136
|
+
warn_cache_small :weights if files.weights_cache_small?
|
137
|
+
|
138
|
+
raise_cache_missing :index unless files.index_cache_ok?
|
139
|
+
raise_cache_missing :similarity unless files.similarity_cache_ok?
|
140
|
+
raise_cache_missing :weights unless files.weights_cache_ok?
|
141
|
+
end
|
142
|
+
def warn_cache_small what
|
143
|
+
puts "#{what} cache for #{identifier} smaller than 16 bytes."
|
144
|
+
end
|
145
|
+
# Raises an appropriate error message.
|
146
|
+
#
|
147
|
+
def raise_cache_missing what
|
148
|
+
raise "#{what} cache for #{identifier} missing."
|
149
|
+
end
|
150
|
+
|
151
|
+
end
|
152
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
module Indexing
|
2
|
+
|
3
|
+
class Categories
|
4
|
+
|
5
|
+
attr_reader :categories
|
6
|
+
|
7
|
+
each_delegate :index,
|
8
|
+
:cache,
|
9
|
+
:generate_caches,
|
10
|
+
:backup_caches,
|
11
|
+
:restore_caches,
|
12
|
+
:check_caches,
|
13
|
+
:clear_caches,
|
14
|
+
:create_directory_structure,
|
15
|
+
:to => :categories
|
16
|
+
|
17
|
+
def initialize
|
18
|
+
@categories = []
|
19
|
+
end
|
20
|
+
|
21
|
+
def << category
|
22
|
+
categories << category
|
23
|
+
end
|
24
|
+
|
25
|
+
def find category_name
|
26
|
+
category_name = category_name.to_sym
|
27
|
+
|
28
|
+
categories.each do |category|
|
29
|
+
next unless category.name == category_name
|
30
|
+
return category
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
35
|
+
|
36
|
+
end
|
@@ -0,0 +1,145 @@
|
|
1
|
+
module Indexing
|
2
|
+
|
3
|
+
class Category
|
4
|
+
|
5
|
+
attr_reader :name, :type, :indexed_as, :virtual, :tokenizer, :source, :exact, :partial
|
6
|
+
|
7
|
+
# TODO Dup the options?
|
8
|
+
#
|
9
|
+
def initialize name, type, options = {}
|
10
|
+
@name = name
|
11
|
+
@type = type
|
12
|
+
|
13
|
+
@source = options[:source]
|
14
|
+
|
15
|
+
@tokenizer = options[:tokenizer] || Tokenizers::Index.default
|
16
|
+
@indexer_class = options[:indexer] || Indexers::Default
|
17
|
+
@indexed_as = options[:as] || name
|
18
|
+
@virtual = options[:virtual] || false # TODO What is this again?
|
19
|
+
|
20
|
+
# TODO Push into Bundle.
|
21
|
+
#
|
22
|
+
partial = options[:partial] || Cacher::Partial::Default
|
23
|
+
weights = options[:weights] || Cacher::Weights::Default
|
24
|
+
similarity = options[:similarity] || Cacher::Similarity::Default
|
25
|
+
|
26
|
+
@exact = options[:exact_indexing_bundle] || Bundle.new(:exact, self, type, similarity, Cacher::Partial::None.new, weights)
|
27
|
+
@partial = options[:partial_indexing_bundle] || Bundle.new(:partial, self, type, Cacher::Similarity::None.new, partial, weights)
|
28
|
+
|
29
|
+
# @remove = options[:remove] || false
|
30
|
+
# @filter = options[:filter] || true
|
31
|
+
|
32
|
+
@options = options # TODO Remove?
|
33
|
+
end
|
34
|
+
|
35
|
+
# TODO Move to initializer?
|
36
|
+
#
|
37
|
+
def identifier
|
38
|
+
@identifier ||= "#{type.name} #{name}"
|
39
|
+
end
|
40
|
+
|
41
|
+
# Note: Most of the time the source of the type is used.
|
42
|
+
#
|
43
|
+
def source
|
44
|
+
@source || type.source
|
45
|
+
end
|
46
|
+
|
47
|
+
# TODO Spec.
|
48
|
+
#
|
49
|
+
def backup_caches
|
50
|
+
timed_exclaim "Backing up #{identifier}."
|
51
|
+
exact.backup
|
52
|
+
partial.backup
|
53
|
+
end
|
54
|
+
def restore_caches
|
55
|
+
timed_exclaim "Restoring #{identifier}."
|
56
|
+
exact.restore
|
57
|
+
partial.restore
|
58
|
+
end
|
59
|
+
def check_caches
|
60
|
+
timed_exclaim "Checking #{identifier}."
|
61
|
+
exact.raise_unless_cache_exists
|
62
|
+
partial.raise_unless_cache_exists
|
63
|
+
end
|
64
|
+
def clear_caches
|
65
|
+
timed_exclaim "Deleting #{identifier}."
|
66
|
+
exact.delete
|
67
|
+
partial.delete
|
68
|
+
end
|
69
|
+
def create_directory_structure
|
70
|
+
timed_exclaim "Creating directory structure for #{identifier}."
|
71
|
+
exact.create_directory
|
72
|
+
partial.create_directory
|
73
|
+
end
|
74
|
+
|
75
|
+
# Used for testing.
|
76
|
+
#
|
77
|
+
# TODO Remove?
|
78
|
+
#
|
79
|
+
def generate_indexes_from_exact_index
|
80
|
+
generate_derived_exact
|
81
|
+
generate_partial
|
82
|
+
generate_derived_partial
|
83
|
+
end
|
84
|
+
def generate_derived_exact
|
85
|
+
exact.generate_derived
|
86
|
+
end
|
87
|
+
def generate_derived_partial
|
88
|
+
partial.generate_derived
|
89
|
+
end
|
90
|
+
|
91
|
+
# Generates all caches for this category.
|
92
|
+
#
|
93
|
+
def cache
|
94
|
+
prepare_cache_directory
|
95
|
+
generate_caches
|
96
|
+
end
|
97
|
+
def generate_caches
|
98
|
+
generate_caches_from_source
|
99
|
+
generate_partial
|
100
|
+
generate_caches_from_memory
|
101
|
+
dump_caches
|
102
|
+
timed_exclaim "CACHE FINISHED #{identifier}."
|
103
|
+
end
|
104
|
+
def generate_caches_from_source
|
105
|
+
exact.generate_caches_from_source
|
106
|
+
end
|
107
|
+
def generate_partial
|
108
|
+
partial.generate_partial_from exact.index
|
109
|
+
end
|
110
|
+
def generate_caches_from_memory
|
111
|
+
partial.generate_caches_from_memory
|
112
|
+
end
|
113
|
+
def dump_caches
|
114
|
+
exact.dump
|
115
|
+
partial.dump
|
116
|
+
end
|
117
|
+
|
118
|
+
# TODO Partially move to type. Duplicate Code in indexers/field.rb.
|
119
|
+
#
|
120
|
+
def search_index_root
|
121
|
+
File.join PICKY_ROOT, 'index'
|
122
|
+
end
|
123
|
+
def cache_directory
|
124
|
+
File.join search_index_root, PICKY_ENVIRONMENT, type.name.to_s
|
125
|
+
end
|
126
|
+
def search_index_file_name
|
127
|
+
File.join cache_directory, "prepared_#{name}_index.txt"
|
128
|
+
end
|
129
|
+
def index
|
130
|
+
prepare_cache_directory
|
131
|
+
indexer.index
|
132
|
+
end
|
133
|
+
def prepare_cache_directory
|
134
|
+
FileUtils.mkdir_p cache_directory
|
135
|
+
end
|
136
|
+
def indexer
|
137
|
+
@indexer || @indexer = @indexer_class.new(type, self)
|
138
|
+
end
|
139
|
+
def virtual?
|
140
|
+
!!virtual
|
141
|
+
end
|
142
|
+
|
143
|
+
end
|
144
|
+
|
145
|
+
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
module Indexing
|
2
|
+
|
3
|
+
class Type
|
4
|
+
|
5
|
+
attr_reader :name, :source, :categories, :after_indexing
|
6
|
+
|
7
|
+
# Delegators for indexing.
|
8
|
+
#
|
9
|
+
delegate :connect_backend,
|
10
|
+
:to => :source
|
11
|
+
|
12
|
+
delegate :index,
|
13
|
+
:cache,
|
14
|
+
:generate_caches,
|
15
|
+
:backup_caches,
|
16
|
+
:restore_caches,
|
17
|
+
:check_caches,
|
18
|
+
:clear_caches,
|
19
|
+
:create_directory_structure,
|
20
|
+
:to => :categories
|
21
|
+
|
22
|
+
def initialize name, source, options = {}
|
23
|
+
@name = name
|
24
|
+
@source = source
|
25
|
+
|
26
|
+
@after_indexing = options[:after_indexing]
|
27
|
+
|
28
|
+
@categories = Categories.new
|
29
|
+
end
|
30
|
+
|
31
|
+
# TODO Spec. Doc.
|
32
|
+
#
|
33
|
+
def add_category name, options = {}
|
34
|
+
categories << Category.new(name, self, options)
|
35
|
+
end
|
36
|
+
|
37
|
+
# Indexing.
|
38
|
+
#
|
39
|
+
def take_snapshot
|
40
|
+
source.take_snapshot self
|
41
|
+
end
|
42
|
+
|
43
|
+
end
|
44
|
+
|
45
|
+
end
|
@@ -0,0 +1,74 @@
|
|
1
|
+
module Indexing
|
2
|
+
|
3
|
+
class Types
|
4
|
+
|
5
|
+
attr_reader :types
|
6
|
+
|
7
|
+
each_delegate :take_snapshot,
|
8
|
+
:generate_caches,
|
9
|
+
:backup_caches,
|
10
|
+
:restore_caches,
|
11
|
+
:check_caches,
|
12
|
+
:clear_caches,
|
13
|
+
:create_directory_structure,
|
14
|
+
:to => :types
|
15
|
+
|
16
|
+
def initialize
|
17
|
+
clear
|
18
|
+
end
|
19
|
+
|
20
|
+
# TODO Spec.
|
21
|
+
#
|
22
|
+
def clear
|
23
|
+
@types = []
|
24
|
+
end
|
25
|
+
|
26
|
+
# TODO Spec. Superclass?
|
27
|
+
#
|
28
|
+
def register type
|
29
|
+
self.types << type
|
30
|
+
end
|
31
|
+
|
32
|
+
# Runs the indexers in parallel (index + cache).
|
33
|
+
#
|
34
|
+
# TODO Spec.
|
35
|
+
#
|
36
|
+
def index randomly = true
|
37
|
+
take_snapshot
|
38
|
+
|
39
|
+
# Run in parallel.
|
40
|
+
#
|
41
|
+
timed_exclaim "INDEXING USING #{Cores.max_processors} PROCESSORS, IN #{randomly ? 'RANDOM' : 'GIVEN'} ORDER."
|
42
|
+
Cores.forked self.types, { randomly: randomly } do |type|
|
43
|
+
type.index
|
44
|
+
type.cache
|
45
|
+
end
|
46
|
+
timed_exclaim "INDEXING FINISHED."
|
47
|
+
end
|
48
|
+
|
49
|
+
# TODO Spec
|
50
|
+
#
|
51
|
+
def generate_index_only type_name, field_name
|
52
|
+
found = find type_name, field_name
|
53
|
+
found.index if found
|
54
|
+
end
|
55
|
+
def generate_cache_only type_name, category_name
|
56
|
+
found = find type_name, field_name
|
57
|
+
found.generate_caches if found
|
58
|
+
end
|
59
|
+
|
60
|
+
# TODO Spec
|
61
|
+
#
|
62
|
+
def find type_name, category_name
|
63
|
+
type_name = type_name.to_sym
|
64
|
+
|
65
|
+
types.each do |type|
|
66
|
+
next unless type.name == type_name
|
67
|
+
|
68
|
+
found = type.categories.find category_name
|
69
|
+
return found if found
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
end
|
74
|
+
end
|
data/lib/picky/loader.rb
CHANGED
@@ -84,7 +84,7 @@ module Loader
|
|
84
84
|
def self.load_framework
|
85
85
|
# Load compiled C code.
|
86
86
|
#
|
87
|
-
require_relative '
|
87
|
+
require_relative 'ext/maybe_compile'
|
88
88
|
|
89
89
|
# Load extensions.
|
90
90
|
#
|
@@ -166,9 +166,23 @@ module Loader
|
|
166
166
|
|
167
167
|
# Index types.
|
168
168
|
#
|
169
|
+
load_relative 'bundle'
|
170
|
+
|
171
|
+
load_relative 'indexing/bundle'
|
172
|
+
load_relative 'indexing/category'
|
173
|
+
load_relative 'indexing/categories'
|
174
|
+
load_relative 'indexing/type'
|
175
|
+
load_relative 'indexing/types'
|
176
|
+
|
169
177
|
load_relative 'index/bundle'
|
170
178
|
load_relative 'index/category'
|
179
|
+
load_relative 'index/categories'
|
171
180
|
load_relative 'index/type'
|
181
|
+
load_relative 'index/types'
|
182
|
+
|
183
|
+
load_relative 'types'
|
184
|
+
load_relative 'alias_instances'
|
185
|
+
load_relative 'type'
|
172
186
|
|
173
187
|
load_relative 'index/wrappers/exact_first'
|
174
188
|
|
@@ -193,7 +207,6 @@ module Loader
|
|
193
207
|
|
194
208
|
load_relative 'query/qualifiers'
|
195
209
|
load_relative 'query/weigher'
|
196
|
-
load_relative 'query/combinator'
|
197
210
|
|
198
211
|
load_relative 'query/weights'
|
199
212
|
|
@@ -219,14 +232,11 @@ module Loader
|
|
219
232
|
load_relative 'sources/delicious'
|
220
233
|
load_relative 'sources/couch'
|
221
234
|
|
222
|
-
|
223
|
-
|
224
|
-
load_relative 'indexes'
|
235
|
+
load_relative 'sources/wrappers/base'
|
236
|
+
load_relative 'sources/wrappers/location'
|
225
237
|
|
226
238
|
# Configuration.
|
227
239
|
#
|
228
|
-
load_relative 'configuration/field'
|
229
|
-
load_relative 'configuration/type'
|
230
240
|
load_relative 'configuration/indexes'
|
231
241
|
|
232
242
|
# ... in Application.
|
data/lib/picky/query/base.rb
CHANGED
@@ -17,10 +17,11 @@ module Query
|
|
17
17
|
# * tokenizer: Tokenizers::Query.default by default.
|
18
18
|
# * weights: A hash of weights, or a Query::Weights object.
|
19
19
|
#
|
20
|
-
def initialize *
|
21
|
-
options = Hash ===
|
22
|
-
|
23
|
-
|
20
|
+
def initialize *index_type_definitions
|
21
|
+
options = Hash === index_type_definitions.last ? index_type_definitions.pop : {}
|
22
|
+
indexes = index_type_definitions.map &:index
|
23
|
+
|
24
|
+
@weigher = options[:weigher] || Weigher.new(indexes)
|
24
25
|
@tokenizer = options[:tokenizer] || Tokenizers::Query.default
|
25
26
|
weights = options[:weights] || Weights.new
|
26
27
|
@weights = Hash === weights ? Weights.new(weights) : weights
|
@@ -0,0 +1,23 @@
|
|
1
|
+
module Sources
|
2
|
+
|
3
|
+
module Wrappers
|
4
|
+
|
5
|
+
class Base
|
6
|
+
|
7
|
+
attr_reader :backend
|
8
|
+
|
9
|
+
# Wraps a backend
|
10
|
+
#
|
11
|
+
def initialize backend
|
12
|
+
@backend = backend
|
13
|
+
end
|
14
|
+
|
15
|
+
# Default is delegation for all methods
|
16
|
+
#
|
17
|
+
delegate :harvest, :connect_backend, :take_snapshot, :to => :backend
|
18
|
+
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
22
|
+
|
23
|
+
end
|
@@ -0,0 +1,92 @@
|
|
1
|
+
module Sources
|
2
|
+
|
3
|
+
module Wrappers
|
4
|
+
|
5
|
+
class Location < Base
|
6
|
+
|
7
|
+
attr_reader :precision, :grid
|
8
|
+
|
9
|
+
# TODO Save min and grid!
|
10
|
+
#
|
11
|
+
def initialize backend, options = {}
|
12
|
+
super backend
|
13
|
+
|
14
|
+
@user_grid = extract_user_grid options
|
15
|
+
@precision = extract_precision options
|
16
|
+
|
17
|
+
@grid = @user_grid / (@precision + 0.5)
|
18
|
+
end
|
19
|
+
|
20
|
+
#
|
21
|
+
#
|
22
|
+
def extract_user_grid options
|
23
|
+
options[:grid] || raise # TODO
|
24
|
+
end
|
25
|
+
# Extracts an amount of grids that this
|
26
|
+
# Precision is given in a value.
|
27
|
+
# 1 is low (up to 16.6% error), 5 is very high (up to 5% error).
|
28
|
+
#
|
29
|
+
# We don't recommend using values higher than 5.
|
30
|
+
#
|
31
|
+
# Default is 1.
|
32
|
+
#
|
33
|
+
def extract_precision options
|
34
|
+
options[:precision] || 1
|
35
|
+
end
|
36
|
+
|
37
|
+
def reset
|
38
|
+
@min = 1.0/0
|
39
|
+
end
|
40
|
+
|
41
|
+
# Yield the data (id, text for id) for the given type and field.
|
42
|
+
#
|
43
|
+
def harvest type, field
|
44
|
+
reset
|
45
|
+
|
46
|
+
# Cache. TODO Make option?
|
47
|
+
#
|
48
|
+
locations = []
|
49
|
+
|
50
|
+
# Gather min/max.
|
51
|
+
#
|
52
|
+
backend.harvest type, field do |indexed_id, location|
|
53
|
+
location = location.to_f
|
54
|
+
@min = location if location < @min
|
55
|
+
locations << [indexed_id, location]
|
56
|
+
end
|
57
|
+
|
58
|
+
# Add a margin.
|
59
|
+
#
|
60
|
+
marginize
|
61
|
+
|
62
|
+
# Recalculate locations.
|
63
|
+
#
|
64
|
+
locations.each do |indexed_id, location|
|
65
|
+
locations_for(location).each do |new_location|
|
66
|
+
yield indexed_id, new_location.to_s
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
def marginize
|
72
|
+
@min -= @user_grid
|
73
|
+
end
|
74
|
+
|
75
|
+
# Put location onto multiple places on a grid.
|
76
|
+
#
|
77
|
+
# Note: Always returns an integer.
|
78
|
+
#
|
79
|
+
def locations_for location
|
80
|
+
new_location = ((location - @min) / grid).floor
|
81
|
+
|
82
|
+
min_location = new_location - precision
|
83
|
+
max_location = new_location + precision
|
84
|
+
|
85
|
+
(min_location..max_location).to_a
|
86
|
+
end
|
87
|
+
|
88
|
+
end
|
89
|
+
|
90
|
+
end
|
91
|
+
|
92
|
+
end
|