picky 2.1.2 → 2.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/{lib → aux}/picky/cli.rb +50 -38
- data/bin/picky +1 -1
- data/lib/picky/application.rb +5 -2
- data/lib/picky/index/base.rb +88 -25
- data/lib/picky/index/memory.rb +8 -8
- data/lib/picky/index/redis.rb +8 -8
- data/lib/picky/index_bundle.rb +2 -2
- data/lib/picky/indexing/indexes.rb +6 -6
- data/lib/picky/internals/calculations/location.rb +54 -42
- data/lib/picky/internals/index/backend.rb +21 -21
- data/lib/picky/internals/index/file/text.rb +11 -11
- data/lib/picky/internals/index/files.rb +6 -6
- data/lib/picky/internals/index/redis.rb +14 -14
- data/lib/picky/internals/indexed/bundle/base.rb +2 -2
- data/lib/picky/internals/indexed/bundle/redis.rb +3 -3
- data/lib/picky/internals/indexed/category.rb +8 -9
- data/lib/picky/internals/indexed/wrappers/bundle/calculation.rb +25 -23
- data/lib/picky/internals/indexed/wrappers/bundle/location.rb +36 -34
- data/lib/picky/internals/indexed/wrappers/bundle/wrapper.rb +35 -33
- data/lib/picky/internals/indexed/wrappers/category/location.rb +27 -0
- data/lib/picky/internals/indexers/base.rb +28 -0
- data/lib/picky/internals/indexers/parallel.rb +64 -0
- data/lib/picky/internals/indexers/serial.rb +20 -29
- data/lib/picky/internals/indexing/bundle/base.rb +2 -2
- data/lib/picky/internals/indexing/bundle/super_base.rb +3 -3
- data/lib/picky/internals/indexing/category.rb +30 -27
- data/lib/picky/internals/indexing/index.rb +82 -27
- data/lib/picky/internals/indexing/wrappers/category/location.rb +27 -0
- data/lib/picky/internals/query/indexes.rb +1 -1
- data/lib/picky/internals/query/qualifiers.rb +7 -6
- data/lib/picky/internals/query/weights.rb +6 -0
- data/lib/picky/internals/shared/category.rb +52 -0
- data/lib/picky/internals/tokenizers/base.rb +1 -1
- data/lib/picky/internals/tokenizers/location.rb +54 -0
- data/lib/picky/loader.rb +16 -3
- data/lib/picky/no_source_specified_exception.rb +3 -0
- data/lib/picky/search.rb +44 -5
- data/lib/picky/sources/base.rb +2 -2
- data/lib/picky/sources/couch.rb +1 -1
- data/lib/picky/sources/csv.rb +1 -1
- data/lib/picky/sources/db.rb +9 -9
- data/lib/picky/sources/delicious.rb +1 -1
- data/lib/picky/sources/wrappers/base.rb +12 -13
- data/lib/picky/sources/wrappers/location.rb +24 -54
- data/lib/tasks/search.rake +4 -5
- data/lib/tasks/todo.rake +1 -1
- data/spec/{lib → aux/picky}/cli_spec.rb +13 -8
- data/spec/lib/application_spec.rb +21 -16
- data/spec/lib/index/base_spec.rb +74 -27
- data/spec/lib/index/redis_spec.rb +1 -1
- data/spec/lib/index_bundle_spec.rb +1 -1
- data/spec/lib/indexing/indexes_spec.rb +5 -5
- data/spec/lib/internals/calculations/location_spec.rb +14 -3
- data/spec/lib/internals/index/files_spec.rb +2 -3
- data/spec/lib/internals/index/redis_spec.rb +122 -49
- data/spec/lib/internals/indexed/bundle/memory_spec.rb +4 -6
- data/spec/lib/internals/indexed/bundle/redis_spec.rb +2 -3
- data/spec/lib/internals/indexed/wrappers/bundle/calculation_spec.rb +3 -3
- data/spec/lib/internals/indexed/wrappers/bundle/wrapper_spec.rb +3 -3
- data/spec/lib/internals/indexers/parallel_spec.rb +36 -0
- data/spec/lib/internals/indexers/serial_spec.rb +6 -14
- data/spec/lib/internals/indexing/bundle/memory_partial_generation_speed_spec.rb +2 -3
- data/spec/lib/internals/indexing/bundle/memory_spec.rb +5 -6
- data/spec/lib/internals/indexing/bundle/redis_spec.rb +5 -6
- data/spec/lib/internals/indexing/category_spec.rb +21 -6
- data/spec/lib/internals/indexing/index_spec.rb +43 -7
- data/spec/lib/query/indexes_spec.rb +1 -1
- data/spec/lib/search_spec.rb +51 -2
- data/spec/lib/sources/couch_spec.rb +6 -6
- data/spec/lib/sources/csv_spec.rb +4 -4
- data/spec/lib/sources/db_spec.rb +13 -14
- data/spec/lib/sources/delicious_spec.rb +3 -3
- data/spec/lib/sources/wrappers/base_spec.rb +9 -10
- data/spec/lib/sources/wrappers/location_spec.rb +11 -23
- metadata +14 -15
- data/lib/picky/auxiliary/terminal.rb +0 -219
- data/lib/picky/internals/configuration/index.rb +0 -67
- data/lib/picky/internals/indexers/no_source_specified_error.rb +0 -7
- data/lib/picky/internals/indexing/categories.rb +0 -46
- data/spec/lib/auxiliary/terminal_spec.rb +0 -150
- data/spec/lib/internals/configuration/index_spec.rb +0 -80
- data/spec/lib/internals/indexing/categories_spec.rb +0 -49
@@ -4,10 +4,9 @@ module Internals
|
|
4
4
|
|
5
5
|
class Category
|
6
6
|
|
7
|
-
|
7
|
+
include Internals::Shared::Category
|
8
8
|
|
9
|
-
|
10
|
-
delegate :source, :source=, :tokenizer, :tokenizer=, :to => :indexer
|
9
|
+
attr_reader :name, :index, :exact, :partial
|
11
10
|
|
12
11
|
# Mandatory params:
|
13
12
|
# * name: Category name to use as identifier and file names.
|
@@ -16,26 +15,20 @@ module Internals
|
|
16
15
|
# Options:
|
17
16
|
# * partial: Partial::None.new, Partial::Substring.new(from:start_char, to:up_to_char) (defaults from:-3, to:-1)
|
18
17
|
# * similarity: Similarity::None.new (default), Similarity::DoubleMetaphone.new(amount_of_similarly_linked_words)
|
19
|
-
# * source: Use if the category should use a different source.
|
20
18
|
# * from: The source category identifier to take the data from.
|
21
19
|
#
|
22
20
|
# Advanced Options:
|
23
|
-
#
|
21
|
+
# * source: Use if the category should use a different source.
|
24
22
|
# * weights: Query::Weights.new( [:category1, :category2] => +2, ... )
|
25
23
|
# * tokenizer: Use a subclass of Tokenizers::Base that implements #tokens_for and #empty_tokens.
|
26
24
|
#
|
27
|
-
# TODO Should source be not optional, or taken from the index?
|
28
|
-
#
|
29
25
|
def initialize name, index, options = {}
|
30
|
-
@name
|
31
|
-
@
|
32
|
-
|
33
|
-
# Now we have enough info to combine the index and the category.
|
34
|
-
#
|
35
|
-
@configuration = Configuration::Index.new index, self
|
26
|
+
@name = name
|
27
|
+
@index = index
|
36
28
|
|
37
|
-
@
|
38
|
-
@
|
29
|
+
@source = options[:source]
|
30
|
+
@from = options[:from]
|
31
|
+
@tokenizer = options[:tokenizer]
|
39
32
|
|
40
33
|
# TODO Push into Bundle. At least the weights.
|
41
34
|
#
|
@@ -44,20 +37,30 @@ module Internals
|
|
44
37
|
similarity = options[:similarity] || Generators::Similarity::Default
|
45
38
|
|
46
39
|
bundle_class = options[:indexing_bundle_class] || Bundle::Memory
|
47
|
-
@exact = bundle_class.new(:exact,
|
48
|
-
@partial = bundle_class.new(:partial,
|
40
|
+
@exact = bundle_class.new(:exact, self, similarity, Generators::Partial::None.new, weights)
|
41
|
+
@partial = bundle_class.new(:partial, self, Generators::Similarity::None.new, partial, weights)
|
49
42
|
end
|
50
43
|
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
#
|
58
|
-
|
44
|
+
# Return an appropriate source.
|
45
|
+
#
|
46
|
+
def source
|
47
|
+
@source || @index.source
|
48
|
+
end
|
49
|
+
# The indexer is lazily generated and cached.
|
50
|
+
#
|
51
|
+
def indexer
|
52
|
+
@indexer ||= Indexers::Serial.new self
|
53
|
+
end
|
54
|
+
# Returns an appropriate tokenizer.
|
55
|
+
# If one isn't set on this category, will try the index,
|
56
|
+
# and finally the default index tokenizer.
|
57
|
+
#
|
58
|
+
def tokenizer
|
59
|
+
@tokenizer || @index.tokenizer || Tokenizers::Index.default
|
59
60
|
end
|
60
61
|
|
62
|
+
# Where the data is taken from.
|
63
|
+
#
|
61
64
|
def from
|
62
65
|
@from || name
|
63
66
|
end
|
@@ -83,14 +86,14 @@ Category(#{name} from #{from}):
|
|
83
86
|
partial.delete
|
84
87
|
end
|
85
88
|
|
86
|
-
def index
|
89
|
+
def index!
|
87
90
|
prepare_index_directory
|
88
91
|
indexer.index
|
89
92
|
end
|
90
93
|
|
91
94
|
# Generates all caches for this category.
|
92
95
|
#
|
93
|
-
def cache
|
96
|
+
def cache!
|
94
97
|
prepare_index_directory
|
95
98
|
configure
|
96
99
|
generate_caches
|
@@ -6,60 +6,115 @@ module Internals
|
|
6
6
|
|
7
7
|
class Index
|
8
8
|
|
9
|
-
attr_reader :name, :
|
9
|
+
attr_reader :name, :categories, :after_indexing, :bundle_class, :tokenizer
|
10
10
|
|
11
11
|
# Delegators for indexing.
|
12
12
|
#
|
13
13
|
delegate :connect_backend,
|
14
14
|
:to => :source
|
15
15
|
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
@
|
28
|
-
@source = source
|
29
|
-
|
16
|
+
each_delegate :backup_caches,
|
17
|
+
:cache!,
|
18
|
+
:check_caches,
|
19
|
+
:clear_caches,
|
20
|
+
:create_directory_structure,
|
21
|
+
:generate_caches,
|
22
|
+
:restore_caches,
|
23
|
+
:to => :categories
|
24
|
+
|
25
|
+
def initialize name, options = {}
|
26
|
+
@name = name
|
27
|
+
@source = options[:source]
|
30
28
|
@after_indexing = options[:after_indexing]
|
31
29
|
@bundle_class = options[:indexing_bundle_class] # TODO This should actually be a fixed parameter.
|
30
|
+
@tokenizer = options[:tokenizer]
|
32
31
|
|
33
|
-
@categories =
|
32
|
+
@categories = []
|
34
33
|
end
|
35
34
|
|
36
35
|
# TODO Spec. Doc.
|
37
36
|
#
|
38
37
|
def define_category category_name, options = {}
|
39
|
-
options = default_category_options.merge options
|
40
|
-
|
41
38
|
new_category = Category.new category_name, self, options
|
39
|
+
new_category = yield new_category if block_given?
|
42
40
|
categories << new_category
|
43
41
|
new_category
|
44
42
|
end
|
45
43
|
|
46
|
-
#
|
47
|
-
#
|
48
|
-
|
44
|
+
# TODO Spec. Doc.
|
45
|
+
#
|
46
|
+
def define_indexing options = {}
|
47
|
+
@tokenizer = Internals::Tokenizers::Index.new options
|
48
|
+
end
|
49
|
+
|
50
|
+
#
|
51
|
+
#
|
52
|
+
def define_source source
|
53
|
+
@source = source
|
54
|
+
end
|
55
|
+
def source
|
56
|
+
@source || raise_no_source
|
57
|
+
end
|
58
|
+
def raise_no_source
|
59
|
+
raise NoSourceSpecifiedException.new(<<-NO_SOURCE
|
60
|
+
|
61
|
+
|
62
|
+
No source given for index #{name}. An index needs a source.
|
63
|
+
Example:
|
64
|
+
Index::Memory.new(:with_source) do
|
65
|
+
source Sources::CSV.new(:title, file: 'data/books.csv')
|
66
|
+
category :title
|
67
|
+
category :author
|
68
|
+
end
|
69
|
+
|
70
|
+
NO_SOURCE
|
71
|
+
)
|
72
|
+
end
|
73
|
+
|
74
|
+
#
|
49
75
|
#
|
50
|
-
def
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
76
|
+
def find category_name
|
77
|
+
category_name = category_name.to_sym
|
78
|
+
|
79
|
+
categories.each do |category|
|
80
|
+
next unless category.name == category_name
|
81
|
+
return category
|
82
|
+
end
|
83
|
+
|
84
|
+
raise %Q{Index category "#{category_name}" not found. Possible categories: "#{categories.map(&:name).join('", "')}".}
|
85
|
+
end
|
86
|
+
|
87
|
+
# Decides whether to use a parallel indexer or whether to
|
88
|
+
# delegate to each category to index themselves.
|
89
|
+
#
|
90
|
+
def index!
|
91
|
+
if source.respond_to?(:each)
|
92
|
+
warn %Q{\n\033[1mWarning\033[m, source for index "#{name}" is empty: #{source} (responds true to empty?).\n} if source.respond_to?(:empty?) && source.empty?
|
93
|
+
index_parallel
|
94
|
+
else
|
95
|
+
categories.each &:index!
|
96
|
+
end
|
97
|
+
end
|
98
|
+
# Indexes the categories in parallel.
|
99
|
+
#
|
100
|
+
# Only use where the category does not have a non-#each source defined.
|
101
|
+
#
|
102
|
+
def index_parallel
|
103
|
+
indexer = Indexers::Parallel.new self
|
104
|
+
categories.first.prepare_index_directory # TODO Unnice.
|
105
|
+
indexer.index
|
55
106
|
end
|
56
107
|
|
57
108
|
# Indexing.
|
58
109
|
#
|
110
|
+
# Note: If it is an each source we do not take a snapshot.
|
111
|
+
#
|
59
112
|
def take_snapshot
|
60
|
-
source.take_snapshot self
|
113
|
+
source.take_snapshot self unless source.respond_to? :each
|
61
114
|
end
|
62
115
|
|
116
|
+
#
|
117
|
+
#
|
63
118
|
def to_s
|
64
119
|
<<-INDEX
|
65
120
|
Indexing(#{name}):
|
@@ -0,0 +1,27 @@
|
|
1
|
+
module Internals
|
2
|
+
module Indexing
|
3
|
+
module Wrappers
|
4
|
+
module Category
|
5
|
+
|
6
|
+
module Location
|
7
|
+
|
8
|
+
def self.install_on category, grid, precision = 1
|
9
|
+
new_source = Sources::Wrappers::Location.new category.source, grid, precision
|
10
|
+
|
11
|
+
category.class_eval do
|
12
|
+
def tokenizer
|
13
|
+
@tokenizer ||= Internals::Tokenizers::Index.new
|
14
|
+
end
|
15
|
+
define_method :source do
|
16
|
+
new_source
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
end
|
21
|
+
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -22,7 +22,7 @@ module Internals
|
|
22
22
|
#
|
23
23
|
def initialize *index_definitions, combinations_type
|
24
24
|
@combinations_type = combinations_type
|
25
|
-
@indexes = index_definitions.map
|
25
|
+
@indexes = index_definitions.map(&:internal_indexed)
|
26
26
|
end
|
27
27
|
|
28
28
|
# Returns a number of prepared (sorted, reduced etc.) allocations for the given tokens.
|
@@ -35,8 +35,6 @@ module Internals
|
|
35
35
|
#
|
36
36
|
class Qualifiers # :nodoc:all
|
37
37
|
|
38
|
-
include Singleton
|
39
|
-
|
40
38
|
attr_reader :qualifiers, :normalization_mapping
|
41
39
|
|
42
40
|
delegate :<<, :to => :qualifiers
|
@@ -47,13 +45,16 @@ module Internals
|
|
47
45
|
@qualifiers = []
|
48
46
|
@normalization_mapping = {}
|
49
47
|
end
|
50
|
-
|
48
|
+
def self.instance
|
49
|
+
@instanec ||= new
|
50
|
+
end
|
51
|
+
|
51
52
|
# TODO Spec.
|
52
53
|
#
|
53
54
|
def self.add name, qualifiers
|
54
55
|
instance << Qualifier.new(name, qualifiers)
|
55
56
|
end
|
56
|
-
|
57
|
+
|
57
58
|
# Uses the qualifiers to prepare (optimize) the qualifier handling.
|
58
59
|
#
|
59
60
|
def prepare
|
@@ -75,7 +76,7 @@ module Internals
|
|
75
76
|
end
|
76
77
|
|
77
78
|
end
|
78
|
-
|
79
|
+
|
79
80
|
end
|
80
|
-
|
81
|
+
|
81
82
|
end
|
@@ -4,6 +4,8 @@ module Query
|
|
4
4
|
#
|
5
5
|
class Weights # :nodoc:all
|
6
6
|
|
7
|
+
attr_reader :weights
|
8
|
+
|
7
9
|
#
|
8
10
|
#
|
9
11
|
def initialize weights = {}
|
@@ -46,6 +48,10 @@ module Query
|
|
46
48
|
@weights.empty?
|
47
49
|
end
|
48
50
|
|
51
|
+
def == other
|
52
|
+
@weights == other.weights
|
53
|
+
end
|
54
|
+
|
49
55
|
# Prints out a nice representation of the configured weights.
|
50
56
|
#
|
51
57
|
def to_s
|
@@ -0,0 +1,52 @@
|
|
1
|
+
module Internals
|
2
|
+
module Shared
|
3
|
+
|
4
|
+
module Category
|
5
|
+
|
6
|
+
def index_name
|
7
|
+
index.name
|
8
|
+
end
|
9
|
+
def category_name
|
10
|
+
name
|
11
|
+
end
|
12
|
+
|
13
|
+
# Path and partial filename of a specific index on this category.
|
14
|
+
#
|
15
|
+
def index_path bundle_name, type
|
16
|
+
"#{index_directory}/#{name}_#{bundle_name}_#{type}"
|
17
|
+
end
|
18
|
+
|
19
|
+
#
|
20
|
+
#
|
21
|
+
def prepared_index_path
|
22
|
+
@prepared_index_path ||= "#{index_directory}/prepared_#{name}_index"
|
23
|
+
end
|
24
|
+
def prepared_index_file &block
|
25
|
+
@prepared_index_file ||= Internals::Index::File::Text.new prepared_index_path
|
26
|
+
@prepared_index_file.open_for_indexing &block
|
27
|
+
end
|
28
|
+
|
29
|
+
# Identifier for internal use.
|
30
|
+
#
|
31
|
+
def identifier
|
32
|
+
@identifier ||= "#{index.name}:#{name}"
|
33
|
+
end
|
34
|
+
def to_s
|
35
|
+
"#{index.name} #{name}"
|
36
|
+
end
|
37
|
+
|
38
|
+
# The index directory for this category.
|
39
|
+
#
|
40
|
+
def index_directory
|
41
|
+
@index_directory ||= "#{PICKY_ROOT}/index/#{PICKY_ENVIRONMENT}/#{index.name}"
|
42
|
+
end
|
43
|
+
# Creates the index directory including all necessary paths above it.
|
44
|
+
#
|
45
|
+
def prepare_index_directory
|
46
|
+
FileUtils.mkdir_p index_directory
|
47
|
+
end
|
48
|
+
|
49
|
+
end
|
50
|
+
|
51
|
+
end
|
52
|
+
end
|
@@ -169,7 +169,7 @@ Case sensitive? #{@case_sensitive ? "Yes." : "-"}
|
|
169
169
|
# Defaults.
|
170
170
|
#
|
171
171
|
splits_text_on options[:splits_text_on] || /\s/
|
172
|
-
reject_token_if &(options[:reject_token_if] || :blank?)
|
172
|
+
reject_token_if &(options[:reject_token_if] || options[:rejects_token_if] || :blank?) # TODO Decide on using an s or not.
|
173
173
|
end
|
174
174
|
|
175
175
|
# Default preprocessing hook.
|
@@ -0,0 +1,54 @@
|
|
1
|
+
module Internals
|
2
|
+
|
3
|
+
module Tokenizers
|
4
|
+
|
5
|
+
|
6
|
+
class Location < Base
|
7
|
+
|
8
|
+
attr_reader :calculation
|
9
|
+
|
10
|
+
def initialize options = {}
|
11
|
+
super options
|
12
|
+
|
13
|
+
grid = options[:grid]
|
14
|
+
precision = options[:precision] || 1
|
15
|
+
|
16
|
+
@calculation = Internals::Calculations::Location.new grid, precision
|
17
|
+
|
18
|
+
@minimum = 1.0 / 0
|
19
|
+
|
20
|
+
@locations = []
|
21
|
+
end
|
22
|
+
|
23
|
+
# TODO Work on this!
|
24
|
+
#
|
25
|
+
def tokenize text
|
26
|
+
|
27
|
+
# Gather min/max.
|
28
|
+
#
|
29
|
+
source.harvest category do |indexed_id, location|
|
30
|
+
location = location.to_f
|
31
|
+
minimum = location if location < minimum
|
32
|
+
locations << [indexed_id, location]
|
33
|
+
end
|
34
|
+
|
35
|
+
calculation.minimum = minimum
|
36
|
+
|
37
|
+
# Recalculate locations.
|
38
|
+
#
|
39
|
+
locations.each do |indexed_id, location|
|
40
|
+
calculation.recalculated_range(location).each do |new_location|
|
41
|
+
yield indexed_id, new_location.to_s
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
# TODO Move to the right place.
|
46
|
+
#
|
47
|
+
category.exact[:location_minimum] = minimum
|
48
|
+
end
|
49
|
+
|
50
|
+
end
|
51
|
+
|
52
|
+
end
|
53
|
+
|
54
|
+
end
|
data/lib/picky/loader.rb
CHANGED
@@ -109,8 +109,9 @@ module Loader # :nodoc:all
|
|
109
109
|
|
110
110
|
# Index generation strategies.
|
111
111
|
#
|
112
|
-
load_internals 'indexers/
|
112
|
+
load_internals 'indexers/base'
|
113
113
|
load_internals 'indexers/serial'
|
114
|
+
load_internals 'indexers/parallel'
|
114
115
|
|
115
116
|
# Generators.
|
116
117
|
#
|
@@ -146,6 +147,10 @@ module Loader # :nodoc:all
|
|
146
147
|
load_internals 'generators/weights_generator'
|
147
148
|
load_internals 'generators/similarity_generator'
|
148
149
|
|
150
|
+
# Shared index elements.
|
151
|
+
#
|
152
|
+
load_internals 'shared/category'
|
153
|
+
|
149
154
|
# Index store handling.
|
150
155
|
#
|
151
156
|
load_internals 'index/backend'
|
@@ -169,9 +174,11 @@ module Loader # :nodoc:all
|
|
169
174
|
load_internals 'indexing/bundle/memory'
|
170
175
|
load_internals 'indexing/bundle/redis'
|
171
176
|
load_internals 'indexing/category'
|
172
|
-
load_internals 'indexing/categories'
|
177
|
+
# load_internals 'indexing/categories'
|
173
178
|
load_internals 'indexing/index'
|
174
179
|
|
180
|
+
load_internals 'indexing/wrappers/category/location'
|
181
|
+
|
175
182
|
load_internals 'indexed/bundle/base'
|
176
183
|
load_internals 'indexed/bundle/memory'
|
177
184
|
load_internals 'indexed/bundle/redis'
|
@@ -187,6 +194,8 @@ module Loader # :nodoc:all
|
|
187
194
|
load_internals 'indexed/wrappers/bundle/calculation'
|
188
195
|
load_internals 'indexed/wrappers/bundle/location'
|
189
196
|
|
197
|
+
load_internals 'indexed/wrappers/category/location'
|
198
|
+
|
190
199
|
# Tokens.
|
191
200
|
#
|
192
201
|
load_internals 'query/token'
|
@@ -216,7 +225,7 @@ module Loader # :nodoc:all
|
|
216
225
|
|
217
226
|
# Configuration.
|
218
227
|
#
|
219
|
-
load_internals 'configuration/index'
|
228
|
+
# load_internals 'configuration/index'
|
220
229
|
|
221
230
|
# Adapters.
|
222
231
|
#
|
@@ -236,6 +245,10 @@ module Loader # :nodoc:all
|
|
236
245
|
#
|
237
246
|
load_relative 'rack/harakiri'
|
238
247
|
|
248
|
+
# Errors.
|
249
|
+
#
|
250
|
+
load_relative 'no_source_specified_exception'
|
251
|
+
|
239
252
|
# Load analyzer.
|
240
253
|
#
|
241
254
|
load_relative 'analyzer'
|
data/lib/picky/search.rb
CHANGED
@@ -25,13 +25,52 @@ class Search
|
|
25
25
|
#
|
26
26
|
# TODO Add identifiers_to_remove (rename) and reduce_allocations_to_amount (rename).
|
27
27
|
#
|
28
|
+
# It is also possible to define the tokenizer and weights like so.
|
29
|
+
# Example:
|
30
|
+
# Search.new(index1, index2, index3) do
|
31
|
+
# searching removes_characters: /[^a-z]/, etc.
|
32
|
+
# weights [:author, :title] => +3, [:title, :isbn] => +1
|
33
|
+
# end
|
34
|
+
#
|
28
35
|
def initialize *index_definitions
|
29
36
|
options = Hash === index_definitions.last ? index_definitions.pop : {}
|
30
37
|
|
31
|
-
@indexes
|
32
|
-
|
33
|
-
|
34
|
-
|
38
|
+
@indexes = Internals::Query::Indexes.new *index_definitions, combinations_type_for(index_definitions)
|
39
|
+
searching options[:tokenizer]
|
40
|
+
boost options[:weights]
|
41
|
+
|
42
|
+
instance_eval(&Proc.new) if block_given?
|
43
|
+
end
|
44
|
+
|
45
|
+
# TODO Doc. Spec.
|
46
|
+
#
|
47
|
+
# Example:
|
48
|
+
# Search.new(index1, index2, index3) do
|
49
|
+
# searching removes_characters: /[^a-z]/, etc.
|
50
|
+
# weights [:author, :title] => +3, [:title, :isbn] => +1
|
51
|
+
# end
|
52
|
+
#
|
53
|
+
def searching options
|
54
|
+
@tokenizer = if options.respond_to?(:tokenize)
|
55
|
+
options
|
56
|
+
else
|
57
|
+
options && Internals::Tokenizers::Query.new(options)
|
58
|
+
end
|
59
|
+
end
|
60
|
+
def tokenizer
|
61
|
+
@tokenizer || Internals::Tokenizers::Query.default
|
62
|
+
end
|
63
|
+
# TODO Doc. Spec.
|
64
|
+
#
|
65
|
+
# Example:
|
66
|
+
# Search.new(index1, index2, index3) do
|
67
|
+
# searching removes_characters: /[^a-z]/, etc.
|
68
|
+
# boost [:author, :title] => +3, [:title, :isbn] => +1
|
69
|
+
# end
|
70
|
+
#
|
71
|
+
def boost options
|
72
|
+
weights = options || Query::Weights.new
|
73
|
+
@weights = Hash === weights ? Query::Weights.new(weights) : weights
|
35
74
|
end
|
36
75
|
|
37
76
|
# Returns the right combinations strategy for
|
@@ -110,7 +149,7 @@ class Search
|
|
110
149
|
# * text: The text to tokenize.
|
111
150
|
#
|
112
151
|
def tokenized text
|
113
|
-
|
152
|
+
tokenizer.tokenize text
|
114
153
|
end
|
115
154
|
|
116
155
|
# Gets sorted allocations for the tokens.
|
data/lib/picky/sources/base.rb
CHANGED
@@ -42,7 +42,7 @@ module Sources
|
|
42
42
|
|
43
43
|
# Called by the indexer when gathering data.
|
44
44
|
#
|
45
|
-
# Yields the data (id, text for id) for the given
|
45
|
+
# Yields the data (id, text for id) for the given category.
|
46
46
|
#
|
47
47
|
# When implementing or overriding your own,
|
48
48
|
# be sure to <tt>yield(id, text_for_id)</tt> (or <tt>block.call(id, text_for_id)</tt>)
|
@@ -50,7 +50,7 @@ module Sources
|
|
50
50
|
#
|
51
51
|
# Note: Since harvest needs to be implemented, it has no default impementation.
|
52
52
|
#
|
53
|
-
# def harvest
|
53
|
+
# def harvest category # :yields: id, text_for_id
|
54
54
|
#
|
55
55
|
# end
|
56
56
|
|
data/lib/picky/sources/couch.rb
CHANGED
data/lib/picky/sources/csv.rb
CHANGED
data/lib/picky/sources/db.rb
CHANGED
@@ -123,19 +123,19 @@ module Sources
|
|
123
123
|
|
124
124
|
# Harvests the data to index in chunks.
|
125
125
|
#
|
126
|
-
def harvest
|
126
|
+
def harvest category, &block
|
127
127
|
connect_backend
|
128
128
|
|
129
|
-
(0..count(index)).step(chunksize) do |offset|
|
130
|
-
get_data
|
129
|
+
(0..count(category.index)).step(chunksize) do |offset|
|
130
|
+
get_data category, offset, &block
|
131
131
|
end
|
132
132
|
end
|
133
133
|
|
134
134
|
# Gets the data from the backend.
|
135
135
|
#
|
136
|
-
def get_data
|
136
|
+
def get_data category, offset, &block # :nodoc:
|
137
137
|
|
138
|
-
select_statement = harvest_statement_with_offset
|
138
|
+
select_statement = harvest_statement_with_offset category, offset
|
139
139
|
|
140
140
|
# TODO Rewrite ASAP.
|
141
141
|
#
|
@@ -155,8 +155,8 @@ module Sources
|
|
155
155
|
|
156
156
|
# Builds a harvest statement for getting data to index.
|
157
157
|
#
|
158
|
-
def harvest_statement_with_offset
|
159
|
-
statement = harvest_statement
|
158
|
+
def harvest_statement_with_offset category, offset
|
159
|
+
statement = harvest_statement category
|
160
160
|
|
161
161
|
statement += statement.include?('WHERE') ? ' AND' : ' WHERE'
|
162
162
|
|
@@ -165,8 +165,8 @@ module Sources
|
|
165
165
|
|
166
166
|
# The harvest statement used to pull data from the snapshot table.
|
167
167
|
#
|
168
|
-
def harvest_statement
|
169
|
-
"SELECT id, #{category.from} FROM #{snapshot_table_name(index)} st"
|
168
|
+
def harvest_statement category
|
169
|
+
"SELECT id, #{category.from} FROM #{snapshot_table_name(category.index)} st"
|
170
170
|
end
|
171
171
|
|
172
172
|
# The amount of records that are loaded each chunk.
|