picky 2.1.2 → 2.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/{lib → aux}/picky/cli.rb +50 -38
- data/bin/picky +1 -1
- data/lib/picky/application.rb +5 -2
- data/lib/picky/index/base.rb +88 -25
- data/lib/picky/index/memory.rb +8 -8
- data/lib/picky/index/redis.rb +8 -8
- data/lib/picky/index_bundle.rb +2 -2
- data/lib/picky/indexing/indexes.rb +6 -6
- data/lib/picky/internals/calculations/location.rb +54 -42
- data/lib/picky/internals/index/backend.rb +21 -21
- data/lib/picky/internals/index/file/text.rb +11 -11
- data/lib/picky/internals/index/files.rb +6 -6
- data/lib/picky/internals/index/redis.rb +14 -14
- data/lib/picky/internals/indexed/bundle/base.rb +2 -2
- data/lib/picky/internals/indexed/bundle/redis.rb +3 -3
- data/lib/picky/internals/indexed/category.rb +8 -9
- data/lib/picky/internals/indexed/wrappers/bundle/calculation.rb +25 -23
- data/lib/picky/internals/indexed/wrappers/bundle/location.rb +36 -34
- data/lib/picky/internals/indexed/wrappers/bundle/wrapper.rb +35 -33
- data/lib/picky/internals/indexed/wrappers/category/location.rb +27 -0
- data/lib/picky/internals/indexers/base.rb +28 -0
- data/lib/picky/internals/indexers/parallel.rb +64 -0
- data/lib/picky/internals/indexers/serial.rb +20 -29
- data/lib/picky/internals/indexing/bundle/base.rb +2 -2
- data/lib/picky/internals/indexing/bundle/super_base.rb +3 -3
- data/lib/picky/internals/indexing/category.rb +30 -27
- data/lib/picky/internals/indexing/index.rb +82 -27
- data/lib/picky/internals/indexing/wrappers/category/location.rb +27 -0
- data/lib/picky/internals/query/indexes.rb +1 -1
- data/lib/picky/internals/query/qualifiers.rb +7 -6
- data/lib/picky/internals/query/weights.rb +6 -0
- data/lib/picky/internals/shared/category.rb +52 -0
- data/lib/picky/internals/tokenizers/base.rb +1 -1
- data/lib/picky/internals/tokenizers/location.rb +54 -0
- data/lib/picky/loader.rb +16 -3
- data/lib/picky/no_source_specified_exception.rb +3 -0
- data/lib/picky/search.rb +44 -5
- data/lib/picky/sources/base.rb +2 -2
- data/lib/picky/sources/couch.rb +1 -1
- data/lib/picky/sources/csv.rb +1 -1
- data/lib/picky/sources/db.rb +9 -9
- data/lib/picky/sources/delicious.rb +1 -1
- data/lib/picky/sources/wrappers/base.rb +12 -13
- data/lib/picky/sources/wrappers/location.rb +24 -54
- data/lib/tasks/search.rake +4 -5
- data/lib/tasks/todo.rake +1 -1
- data/spec/{lib → aux/picky}/cli_spec.rb +13 -8
- data/spec/lib/application_spec.rb +21 -16
- data/spec/lib/index/base_spec.rb +74 -27
- data/spec/lib/index/redis_spec.rb +1 -1
- data/spec/lib/index_bundle_spec.rb +1 -1
- data/spec/lib/indexing/indexes_spec.rb +5 -5
- data/spec/lib/internals/calculations/location_spec.rb +14 -3
- data/spec/lib/internals/index/files_spec.rb +2 -3
- data/spec/lib/internals/index/redis_spec.rb +122 -49
- data/spec/lib/internals/indexed/bundle/memory_spec.rb +4 -6
- data/spec/lib/internals/indexed/bundle/redis_spec.rb +2 -3
- data/spec/lib/internals/indexed/wrappers/bundle/calculation_spec.rb +3 -3
- data/spec/lib/internals/indexed/wrappers/bundle/wrapper_spec.rb +3 -3
- data/spec/lib/internals/indexers/parallel_spec.rb +36 -0
- data/spec/lib/internals/indexers/serial_spec.rb +6 -14
- data/spec/lib/internals/indexing/bundle/memory_partial_generation_speed_spec.rb +2 -3
- data/spec/lib/internals/indexing/bundle/memory_spec.rb +5 -6
- data/spec/lib/internals/indexing/bundle/redis_spec.rb +5 -6
- data/spec/lib/internals/indexing/category_spec.rb +21 -6
- data/spec/lib/internals/indexing/index_spec.rb +43 -7
- data/spec/lib/query/indexes_spec.rb +1 -1
- data/spec/lib/search_spec.rb +51 -2
- data/spec/lib/sources/couch_spec.rb +6 -6
- data/spec/lib/sources/csv_spec.rb +4 -4
- data/spec/lib/sources/db_spec.rb +13 -14
- data/spec/lib/sources/delicious_spec.rb +3 -3
- data/spec/lib/sources/wrappers/base_spec.rb +9 -10
- data/spec/lib/sources/wrappers/location_spec.rb +11 -23
- metadata +14 -15
- data/lib/picky/auxiliary/terminal.rb +0 -219
- data/lib/picky/internals/configuration/index.rb +0 -67
- data/lib/picky/internals/indexers/no_source_specified_error.rb +0 -7
- data/lib/picky/internals/indexing/categories.rb +0 -46
- data/spec/lib/auxiliary/terminal_spec.rb +0 -150
- data/spec/lib/internals/configuration/index_spec.rb +0 -80
- data/spec/lib/internals/indexing/categories_spec.rb +0 -49
@@ -4,10 +4,9 @@ module Internals
|
|
4
4
|
|
5
5
|
class Category
|
6
6
|
|
7
|
-
|
7
|
+
include Internals::Shared::Category
|
8
8
|
|
9
|
-
|
10
|
-
delegate :source, :source=, :tokenizer, :tokenizer=, :to => :indexer
|
9
|
+
attr_reader :name, :index, :exact, :partial
|
11
10
|
|
12
11
|
# Mandatory params:
|
13
12
|
# * name: Category name to use as identifier and file names.
|
@@ -16,26 +15,20 @@ module Internals
|
|
16
15
|
# Options:
|
17
16
|
# * partial: Partial::None.new, Partial::Substring.new(from:start_char, to:up_to_char) (defaults from:-3, to:-1)
|
18
17
|
# * similarity: Similarity::None.new (default), Similarity::DoubleMetaphone.new(amount_of_similarly_linked_words)
|
19
|
-
# * source: Use if the category should use a different source.
|
20
18
|
# * from: The source category identifier to take the data from.
|
21
19
|
#
|
22
20
|
# Advanced Options:
|
23
|
-
#
|
21
|
+
# * source: Use if the category should use a different source.
|
24
22
|
# * weights: Query::Weights.new( [:category1, :category2] => +2, ... )
|
25
23
|
# * tokenizer: Use a subclass of Tokenizers::Base that implements #tokens_for and #empty_tokens.
|
26
24
|
#
|
27
|
-
# TODO Should source be not optional, or taken from the index?
|
28
|
-
#
|
29
25
|
def initialize name, index, options = {}
|
30
|
-
@name
|
31
|
-
@
|
32
|
-
|
33
|
-
# Now we have enough info to combine the index and the category.
|
34
|
-
#
|
35
|
-
@configuration = Configuration::Index.new index, self
|
26
|
+
@name = name
|
27
|
+
@index = index
|
36
28
|
|
37
|
-
@
|
38
|
-
@
|
29
|
+
@source = options[:source]
|
30
|
+
@from = options[:from]
|
31
|
+
@tokenizer = options[:tokenizer]
|
39
32
|
|
40
33
|
# TODO Push into Bundle. At least the weights.
|
41
34
|
#
|
@@ -44,20 +37,30 @@ module Internals
|
|
44
37
|
similarity = options[:similarity] || Generators::Similarity::Default
|
45
38
|
|
46
39
|
bundle_class = options[:indexing_bundle_class] || Bundle::Memory
|
47
|
-
@exact = bundle_class.new(:exact,
|
48
|
-
@partial = bundle_class.new(:partial,
|
40
|
+
@exact = bundle_class.new(:exact, self, similarity, Generators::Partial::None.new, weights)
|
41
|
+
@partial = bundle_class.new(:partial, self, Generators::Similarity::None.new, partial, weights)
|
49
42
|
end
|
50
43
|
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
#
|
58
|
-
|
44
|
+
# Return an appropriate source.
|
45
|
+
#
|
46
|
+
def source
|
47
|
+
@source || @index.source
|
48
|
+
end
|
49
|
+
# The indexer is lazily generated and cached.
|
50
|
+
#
|
51
|
+
def indexer
|
52
|
+
@indexer ||= Indexers::Serial.new self
|
53
|
+
end
|
54
|
+
# Returns an appropriate tokenizer.
|
55
|
+
# If one isn't set on this category, will try the index,
|
56
|
+
# and finally the default index tokenizer.
|
57
|
+
#
|
58
|
+
def tokenizer
|
59
|
+
@tokenizer || @index.tokenizer || Tokenizers::Index.default
|
59
60
|
end
|
60
61
|
|
62
|
+
# Where the data is taken from.
|
63
|
+
#
|
61
64
|
def from
|
62
65
|
@from || name
|
63
66
|
end
|
@@ -83,14 +86,14 @@ Category(#{name} from #{from}):
|
|
83
86
|
partial.delete
|
84
87
|
end
|
85
88
|
|
86
|
-
def index
|
89
|
+
def index!
|
87
90
|
prepare_index_directory
|
88
91
|
indexer.index
|
89
92
|
end
|
90
93
|
|
91
94
|
# Generates all caches for this category.
|
92
95
|
#
|
93
|
-
def cache
|
96
|
+
def cache!
|
94
97
|
prepare_index_directory
|
95
98
|
configure
|
96
99
|
generate_caches
|
@@ -6,60 +6,115 @@ module Internals
|
|
6
6
|
|
7
7
|
class Index
|
8
8
|
|
9
|
-
attr_reader :name, :
|
9
|
+
attr_reader :name, :categories, :after_indexing, :bundle_class, :tokenizer
|
10
10
|
|
11
11
|
# Delegators for indexing.
|
12
12
|
#
|
13
13
|
delegate :connect_backend,
|
14
14
|
:to => :source
|
15
15
|
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
@
|
28
|
-
@source = source
|
29
|
-
|
16
|
+
each_delegate :backup_caches,
|
17
|
+
:cache!,
|
18
|
+
:check_caches,
|
19
|
+
:clear_caches,
|
20
|
+
:create_directory_structure,
|
21
|
+
:generate_caches,
|
22
|
+
:restore_caches,
|
23
|
+
:to => :categories
|
24
|
+
|
25
|
+
def initialize name, options = {}
|
26
|
+
@name = name
|
27
|
+
@source = options[:source]
|
30
28
|
@after_indexing = options[:after_indexing]
|
31
29
|
@bundle_class = options[:indexing_bundle_class] # TODO This should actually be a fixed parameter.
|
30
|
+
@tokenizer = options[:tokenizer]
|
32
31
|
|
33
|
-
@categories =
|
32
|
+
@categories = []
|
34
33
|
end
|
35
34
|
|
36
35
|
# TODO Spec. Doc.
|
37
36
|
#
|
38
37
|
def define_category category_name, options = {}
|
39
|
-
options = default_category_options.merge options
|
40
|
-
|
41
38
|
new_category = Category.new category_name, self, options
|
39
|
+
new_category = yield new_category if block_given?
|
42
40
|
categories << new_category
|
43
41
|
new_category
|
44
42
|
end
|
45
43
|
|
46
|
-
#
|
47
|
-
#
|
48
|
-
|
44
|
+
# TODO Spec. Doc.
|
45
|
+
#
|
46
|
+
def define_indexing options = {}
|
47
|
+
@tokenizer = Internals::Tokenizers::Index.new options
|
48
|
+
end
|
49
|
+
|
50
|
+
#
|
51
|
+
#
|
52
|
+
def define_source source
|
53
|
+
@source = source
|
54
|
+
end
|
55
|
+
def source
|
56
|
+
@source || raise_no_source
|
57
|
+
end
|
58
|
+
def raise_no_source
|
59
|
+
raise NoSourceSpecifiedException.new(<<-NO_SOURCE
|
60
|
+
|
61
|
+
|
62
|
+
No source given for index #{name}. An index needs a source.
|
63
|
+
Example:
|
64
|
+
Index::Memory.new(:with_source) do
|
65
|
+
source Sources::CSV.new(:title, file: 'data/books.csv')
|
66
|
+
category :title
|
67
|
+
category :author
|
68
|
+
end
|
69
|
+
|
70
|
+
NO_SOURCE
|
71
|
+
)
|
72
|
+
end
|
73
|
+
|
74
|
+
#
|
49
75
|
#
|
50
|
-
def
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
76
|
+
def find category_name
|
77
|
+
category_name = category_name.to_sym
|
78
|
+
|
79
|
+
categories.each do |category|
|
80
|
+
next unless category.name == category_name
|
81
|
+
return category
|
82
|
+
end
|
83
|
+
|
84
|
+
raise %Q{Index category "#{category_name}" not found. Possible categories: "#{categories.map(&:name).join('", "')}".}
|
85
|
+
end
|
86
|
+
|
87
|
+
# Decides whether to use a parallel indexer or whether to
|
88
|
+
# delegate to each category to index themselves.
|
89
|
+
#
|
90
|
+
def index!
|
91
|
+
if source.respond_to?(:each)
|
92
|
+
warn %Q{\n\033[1mWarning\033[m, source for index "#{name}" is empty: #{source} (responds true to empty?).\n} if source.respond_to?(:empty?) && source.empty?
|
93
|
+
index_parallel
|
94
|
+
else
|
95
|
+
categories.each &:index!
|
96
|
+
end
|
97
|
+
end
|
98
|
+
# Indexes the categories in parallel.
|
99
|
+
#
|
100
|
+
# Only use where the category does not have a non-#each source defined.
|
101
|
+
#
|
102
|
+
def index_parallel
|
103
|
+
indexer = Indexers::Parallel.new self
|
104
|
+
categories.first.prepare_index_directory # TODO Unnice.
|
105
|
+
indexer.index
|
55
106
|
end
|
56
107
|
|
57
108
|
# Indexing.
|
58
109
|
#
|
110
|
+
# Note: If it is an each source we do not take a snapshot.
|
111
|
+
#
|
59
112
|
def take_snapshot
|
60
|
-
source.take_snapshot self
|
113
|
+
source.take_snapshot self unless source.respond_to? :each
|
61
114
|
end
|
62
115
|
|
116
|
+
#
|
117
|
+
#
|
63
118
|
def to_s
|
64
119
|
<<-INDEX
|
65
120
|
Indexing(#{name}):
|
@@ -0,0 +1,27 @@
|
|
1
|
+
module Internals
|
2
|
+
module Indexing
|
3
|
+
module Wrappers
|
4
|
+
module Category
|
5
|
+
|
6
|
+
module Location
|
7
|
+
|
8
|
+
def self.install_on category, grid, precision = 1
|
9
|
+
new_source = Sources::Wrappers::Location.new category.source, grid, precision
|
10
|
+
|
11
|
+
category.class_eval do
|
12
|
+
def tokenizer
|
13
|
+
@tokenizer ||= Internals::Tokenizers::Index.new
|
14
|
+
end
|
15
|
+
define_method :source do
|
16
|
+
new_source
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
end
|
21
|
+
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -22,7 +22,7 @@ module Internals
|
|
22
22
|
#
|
23
23
|
def initialize *index_definitions, combinations_type
|
24
24
|
@combinations_type = combinations_type
|
25
|
-
@indexes = index_definitions.map
|
25
|
+
@indexes = index_definitions.map(&:internal_indexed)
|
26
26
|
end
|
27
27
|
|
28
28
|
# Returns a number of prepared (sorted, reduced etc.) allocations for the given tokens.
|
@@ -35,8 +35,6 @@ module Internals
|
|
35
35
|
#
|
36
36
|
class Qualifiers # :nodoc:all
|
37
37
|
|
38
|
-
include Singleton
|
39
|
-
|
40
38
|
attr_reader :qualifiers, :normalization_mapping
|
41
39
|
|
42
40
|
delegate :<<, :to => :qualifiers
|
@@ -47,13 +45,16 @@ module Internals
|
|
47
45
|
@qualifiers = []
|
48
46
|
@normalization_mapping = {}
|
49
47
|
end
|
50
|
-
|
48
|
+
def self.instance
|
49
|
+
@instanec ||= new
|
50
|
+
end
|
51
|
+
|
51
52
|
# TODO Spec.
|
52
53
|
#
|
53
54
|
def self.add name, qualifiers
|
54
55
|
instance << Qualifier.new(name, qualifiers)
|
55
56
|
end
|
56
|
-
|
57
|
+
|
57
58
|
# Uses the qualifiers to prepare (optimize) the qualifier handling.
|
58
59
|
#
|
59
60
|
def prepare
|
@@ -75,7 +76,7 @@ module Internals
|
|
75
76
|
end
|
76
77
|
|
77
78
|
end
|
78
|
-
|
79
|
+
|
79
80
|
end
|
80
|
-
|
81
|
+
|
81
82
|
end
|
@@ -4,6 +4,8 @@ module Query
|
|
4
4
|
#
|
5
5
|
class Weights # :nodoc:all
|
6
6
|
|
7
|
+
attr_reader :weights
|
8
|
+
|
7
9
|
#
|
8
10
|
#
|
9
11
|
def initialize weights = {}
|
@@ -46,6 +48,10 @@ module Query
|
|
46
48
|
@weights.empty?
|
47
49
|
end
|
48
50
|
|
51
|
+
def == other
|
52
|
+
@weights == other.weights
|
53
|
+
end
|
54
|
+
|
49
55
|
# Prints out a nice representation of the configured weights.
|
50
56
|
#
|
51
57
|
def to_s
|
@@ -0,0 +1,52 @@
|
|
1
|
+
module Internals
|
2
|
+
module Shared
|
3
|
+
|
4
|
+
module Category
|
5
|
+
|
6
|
+
def index_name
|
7
|
+
index.name
|
8
|
+
end
|
9
|
+
def category_name
|
10
|
+
name
|
11
|
+
end
|
12
|
+
|
13
|
+
# Path and partial filename of a specific index on this category.
|
14
|
+
#
|
15
|
+
def index_path bundle_name, type
|
16
|
+
"#{index_directory}/#{name}_#{bundle_name}_#{type}"
|
17
|
+
end
|
18
|
+
|
19
|
+
#
|
20
|
+
#
|
21
|
+
def prepared_index_path
|
22
|
+
@prepared_index_path ||= "#{index_directory}/prepared_#{name}_index"
|
23
|
+
end
|
24
|
+
def prepared_index_file &block
|
25
|
+
@prepared_index_file ||= Internals::Index::File::Text.new prepared_index_path
|
26
|
+
@prepared_index_file.open_for_indexing &block
|
27
|
+
end
|
28
|
+
|
29
|
+
# Identifier for internal use.
|
30
|
+
#
|
31
|
+
def identifier
|
32
|
+
@identifier ||= "#{index.name}:#{name}"
|
33
|
+
end
|
34
|
+
def to_s
|
35
|
+
"#{index.name} #{name}"
|
36
|
+
end
|
37
|
+
|
38
|
+
# The index directory for this category.
|
39
|
+
#
|
40
|
+
def index_directory
|
41
|
+
@index_directory ||= "#{PICKY_ROOT}/index/#{PICKY_ENVIRONMENT}/#{index.name}"
|
42
|
+
end
|
43
|
+
# Creates the index directory including all necessary paths above it.
|
44
|
+
#
|
45
|
+
def prepare_index_directory
|
46
|
+
FileUtils.mkdir_p index_directory
|
47
|
+
end
|
48
|
+
|
49
|
+
end
|
50
|
+
|
51
|
+
end
|
52
|
+
end
|
@@ -169,7 +169,7 @@ Case sensitive? #{@case_sensitive ? "Yes." : "-"}
|
|
169
169
|
# Defaults.
|
170
170
|
#
|
171
171
|
splits_text_on options[:splits_text_on] || /\s/
|
172
|
-
reject_token_if &(options[:reject_token_if] || :blank?)
|
172
|
+
reject_token_if &(options[:reject_token_if] || options[:rejects_token_if] || :blank?) # TODO Decide on using an s or not.
|
173
173
|
end
|
174
174
|
|
175
175
|
# Default preprocessing hook.
|
@@ -0,0 +1,54 @@
|
|
1
|
+
module Internals
|
2
|
+
|
3
|
+
module Tokenizers
|
4
|
+
|
5
|
+
|
6
|
+
class Location < Base
|
7
|
+
|
8
|
+
attr_reader :calculation
|
9
|
+
|
10
|
+
def initialize options = {}
|
11
|
+
super options
|
12
|
+
|
13
|
+
grid = options[:grid]
|
14
|
+
precision = options[:precision] || 1
|
15
|
+
|
16
|
+
@calculation = Internals::Calculations::Location.new grid, precision
|
17
|
+
|
18
|
+
@minimum = 1.0 / 0
|
19
|
+
|
20
|
+
@locations = []
|
21
|
+
end
|
22
|
+
|
23
|
+
# TODO Work on this!
|
24
|
+
#
|
25
|
+
def tokenize text
|
26
|
+
|
27
|
+
# Gather min/max.
|
28
|
+
#
|
29
|
+
source.harvest category do |indexed_id, location|
|
30
|
+
location = location.to_f
|
31
|
+
minimum = location if location < minimum
|
32
|
+
locations << [indexed_id, location]
|
33
|
+
end
|
34
|
+
|
35
|
+
calculation.minimum = minimum
|
36
|
+
|
37
|
+
# Recalculate locations.
|
38
|
+
#
|
39
|
+
locations.each do |indexed_id, location|
|
40
|
+
calculation.recalculated_range(location).each do |new_location|
|
41
|
+
yield indexed_id, new_location.to_s
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
# TODO Move to the right place.
|
46
|
+
#
|
47
|
+
category.exact[:location_minimum] = minimum
|
48
|
+
end
|
49
|
+
|
50
|
+
end
|
51
|
+
|
52
|
+
end
|
53
|
+
|
54
|
+
end
|
data/lib/picky/loader.rb
CHANGED
@@ -109,8 +109,9 @@ module Loader # :nodoc:all
|
|
109
109
|
|
110
110
|
# Index generation strategies.
|
111
111
|
#
|
112
|
-
load_internals 'indexers/
|
112
|
+
load_internals 'indexers/base'
|
113
113
|
load_internals 'indexers/serial'
|
114
|
+
load_internals 'indexers/parallel'
|
114
115
|
|
115
116
|
# Generators.
|
116
117
|
#
|
@@ -146,6 +147,10 @@ module Loader # :nodoc:all
|
|
146
147
|
load_internals 'generators/weights_generator'
|
147
148
|
load_internals 'generators/similarity_generator'
|
148
149
|
|
150
|
+
# Shared index elements.
|
151
|
+
#
|
152
|
+
load_internals 'shared/category'
|
153
|
+
|
149
154
|
# Index store handling.
|
150
155
|
#
|
151
156
|
load_internals 'index/backend'
|
@@ -169,9 +174,11 @@ module Loader # :nodoc:all
|
|
169
174
|
load_internals 'indexing/bundle/memory'
|
170
175
|
load_internals 'indexing/bundle/redis'
|
171
176
|
load_internals 'indexing/category'
|
172
|
-
load_internals 'indexing/categories'
|
177
|
+
# load_internals 'indexing/categories'
|
173
178
|
load_internals 'indexing/index'
|
174
179
|
|
180
|
+
load_internals 'indexing/wrappers/category/location'
|
181
|
+
|
175
182
|
load_internals 'indexed/bundle/base'
|
176
183
|
load_internals 'indexed/bundle/memory'
|
177
184
|
load_internals 'indexed/bundle/redis'
|
@@ -187,6 +194,8 @@ module Loader # :nodoc:all
|
|
187
194
|
load_internals 'indexed/wrappers/bundle/calculation'
|
188
195
|
load_internals 'indexed/wrappers/bundle/location'
|
189
196
|
|
197
|
+
load_internals 'indexed/wrappers/category/location'
|
198
|
+
|
190
199
|
# Tokens.
|
191
200
|
#
|
192
201
|
load_internals 'query/token'
|
@@ -216,7 +225,7 @@ module Loader # :nodoc:all
|
|
216
225
|
|
217
226
|
# Configuration.
|
218
227
|
#
|
219
|
-
load_internals 'configuration/index'
|
228
|
+
# load_internals 'configuration/index'
|
220
229
|
|
221
230
|
# Adapters.
|
222
231
|
#
|
@@ -236,6 +245,10 @@ module Loader # :nodoc:all
|
|
236
245
|
#
|
237
246
|
load_relative 'rack/harakiri'
|
238
247
|
|
248
|
+
# Errors.
|
249
|
+
#
|
250
|
+
load_relative 'no_source_specified_exception'
|
251
|
+
|
239
252
|
# Load analyzer.
|
240
253
|
#
|
241
254
|
load_relative 'analyzer'
|
data/lib/picky/search.rb
CHANGED
@@ -25,13 +25,52 @@ class Search
|
|
25
25
|
#
|
26
26
|
# TODO Add identifiers_to_remove (rename) and reduce_allocations_to_amount (rename).
|
27
27
|
#
|
28
|
+
# It is also possible to define the tokenizer and weights like so.
|
29
|
+
# Example:
|
30
|
+
# Search.new(index1, index2, index3) do
|
31
|
+
# searching removes_characters: /[^a-z]/, etc.
|
32
|
+
# weights [:author, :title] => +3, [:title, :isbn] => +1
|
33
|
+
# end
|
34
|
+
#
|
28
35
|
def initialize *index_definitions
|
29
36
|
options = Hash === index_definitions.last ? index_definitions.pop : {}
|
30
37
|
|
31
|
-
@indexes
|
32
|
-
|
33
|
-
|
34
|
-
|
38
|
+
@indexes = Internals::Query::Indexes.new *index_definitions, combinations_type_for(index_definitions)
|
39
|
+
searching options[:tokenizer]
|
40
|
+
boost options[:weights]
|
41
|
+
|
42
|
+
instance_eval(&Proc.new) if block_given?
|
43
|
+
end
|
44
|
+
|
45
|
+
# TODO Doc. Spec.
|
46
|
+
#
|
47
|
+
# Example:
|
48
|
+
# Search.new(index1, index2, index3) do
|
49
|
+
# searching removes_characters: /[^a-z]/, etc.
|
50
|
+
# weights [:author, :title] => +3, [:title, :isbn] => +1
|
51
|
+
# end
|
52
|
+
#
|
53
|
+
def searching options
|
54
|
+
@tokenizer = if options.respond_to?(:tokenize)
|
55
|
+
options
|
56
|
+
else
|
57
|
+
options && Internals::Tokenizers::Query.new(options)
|
58
|
+
end
|
59
|
+
end
|
60
|
+
def tokenizer
|
61
|
+
@tokenizer || Internals::Tokenizers::Query.default
|
62
|
+
end
|
63
|
+
# TODO Doc. Spec.
|
64
|
+
#
|
65
|
+
# Example:
|
66
|
+
# Search.new(index1, index2, index3) do
|
67
|
+
# searching removes_characters: /[^a-z]/, etc.
|
68
|
+
# boost [:author, :title] => +3, [:title, :isbn] => +1
|
69
|
+
# end
|
70
|
+
#
|
71
|
+
def boost options
|
72
|
+
weights = options || Query::Weights.new
|
73
|
+
@weights = Hash === weights ? Query::Weights.new(weights) : weights
|
35
74
|
end
|
36
75
|
|
37
76
|
# Returns the right combinations strategy for
|
@@ -110,7 +149,7 @@ class Search
|
|
110
149
|
# * text: The text to tokenize.
|
111
150
|
#
|
112
151
|
def tokenized text
|
113
|
-
|
152
|
+
tokenizer.tokenize text
|
114
153
|
end
|
115
154
|
|
116
155
|
# Gets sorted allocations for the tokens.
|
data/lib/picky/sources/base.rb
CHANGED
@@ -42,7 +42,7 @@ module Sources
|
|
42
42
|
|
43
43
|
# Called by the indexer when gathering data.
|
44
44
|
#
|
45
|
-
# Yields the data (id, text for id) for the given
|
45
|
+
# Yields the data (id, text for id) for the given category.
|
46
46
|
#
|
47
47
|
# When implementing or overriding your own,
|
48
48
|
# be sure to <tt>yield(id, text_for_id)</tt> (or <tt>block.call(id, text_for_id)</tt>)
|
@@ -50,7 +50,7 @@ module Sources
|
|
50
50
|
#
|
51
51
|
# Note: Since harvest needs to be implemented, it has no default impementation.
|
52
52
|
#
|
53
|
-
# def harvest
|
53
|
+
# def harvest category # :yields: id, text_for_id
|
54
54
|
#
|
55
55
|
# end
|
56
56
|
|
data/lib/picky/sources/couch.rb
CHANGED
data/lib/picky/sources/csv.rb
CHANGED
data/lib/picky/sources/db.rb
CHANGED
@@ -123,19 +123,19 @@ module Sources
|
|
123
123
|
|
124
124
|
# Harvests the data to index in chunks.
|
125
125
|
#
|
126
|
-
def harvest
|
126
|
+
def harvest category, &block
|
127
127
|
connect_backend
|
128
128
|
|
129
|
-
(0..count(index)).step(chunksize) do |offset|
|
130
|
-
get_data
|
129
|
+
(0..count(category.index)).step(chunksize) do |offset|
|
130
|
+
get_data category, offset, &block
|
131
131
|
end
|
132
132
|
end
|
133
133
|
|
134
134
|
# Gets the data from the backend.
|
135
135
|
#
|
136
|
-
def get_data
|
136
|
+
def get_data category, offset, &block # :nodoc:
|
137
137
|
|
138
|
-
select_statement = harvest_statement_with_offset
|
138
|
+
select_statement = harvest_statement_with_offset category, offset
|
139
139
|
|
140
140
|
# TODO Rewrite ASAP.
|
141
141
|
#
|
@@ -155,8 +155,8 @@ module Sources
|
|
155
155
|
|
156
156
|
# Builds a harvest statement for getting data to index.
|
157
157
|
#
|
158
|
-
def harvest_statement_with_offset
|
159
|
-
statement = harvest_statement
|
158
|
+
def harvest_statement_with_offset category, offset
|
159
|
+
statement = harvest_statement category
|
160
160
|
|
161
161
|
statement += statement.include?('WHERE') ? ' AND' : ' WHERE'
|
162
162
|
|
@@ -165,8 +165,8 @@ module Sources
|
|
165
165
|
|
166
166
|
# The harvest statement used to pull data from the snapshot table.
|
167
167
|
#
|
168
|
-
def harvest_statement
|
169
|
-
"SELECT id, #{category.from} FROM #{snapshot_table_name(index)} st"
|
168
|
+
def harvest_statement category
|
169
|
+
"SELECT id, #{category.from} FROM #{snapshot_table_name(category.index)} st"
|
170
170
|
end
|
171
171
|
|
172
172
|
# The amount of records that are loaded each chunk.
|