picky 2.6.0 → 2.7.0
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/picky/analyzer.rb +4 -4
- data/lib/picky/application.rb +6 -7
- data/lib/picky/backend/{backend.rb → base.rb} +31 -14
- data/lib/picky/backend/file/basic.rb +12 -4
- data/lib/picky/backend/file/json.rb +5 -5
- data/lib/picky/backend/file/text.rb +1 -1
- data/lib/picky/backend/files.rb +3 -9
- data/lib/picky/backend/redis/basic.rb +8 -0
- data/lib/picky/backend/redis/list_hash.rb +5 -5
- data/lib/picky/backend/redis/string_hash.rb +5 -5
- data/lib/picky/backend/redis.rb +5 -5
- data/lib/picky/bundle.rb +62 -0
- data/lib/picky/categories.rb +10 -9
- data/lib/picky/categories_indexed.rb +12 -7
- data/lib/picky/categories_indexing.rb +7 -9
- data/lib/picky/category.rb +38 -26
- data/lib/picky/category_indexed.rb +4 -20
- data/lib/picky/category_indexing.rb +71 -68
- data/lib/picky/generators/base.rb +6 -6
- data/lib/picky/generators/partial/substring.rb +28 -26
- data/lib/picky/generators/partial_generator.rb +3 -3
- data/lib/picky/generators/similarity/phonetic.rb +5 -5
- data/lib/picky/generators/similarity_generator.rb +2 -2
- data/lib/picky/generators/weights/logarithmic.rb +3 -3
- data/lib/picky/generators/weights_generator.rb +2 -2
- data/lib/picky/index/base.rb +13 -10
- data/lib/picky/index/base_indexed.rb +2 -0
- data/lib/picky/index/base_indexing.rb +65 -57
- data/lib/picky/indexed/bundle/base.rb +21 -86
- data/lib/picky/indexed/bundle/memory.rb +5 -12
- data/lib/picky/indexed/bundle/redis.rb +42 -0
- data/lib/picky/indexed/wrappers/bundle/wrapper.rb +3 -3
- data/lib/picky/indexers/base.rb +20 -3
- data/lib/picky/indexers/parallel.rb +32 -14
- data/lib/picky/indexers/serial.rb +29 -26
- data/lib/picky/indexes.rb +5 -3
- data/lib/picky/indexes_indexed.rb +3 -15
- data/lib/picky/indexes_indexing.rb +18 -21
- data/lib/picky/indexing/bundle/base.rb +64 -45
- data/lib/picky/indexing/bundle/memory.rb +0 -4
- data/lib/picky/loader.rb +7 -6
- data/lib/picky/query/allocation.rb +3 -3
- data/lib/picky/query/token.rb +5 -1
- data/lib/picky/search.rb +5 -0
- data/lib/picky/sources/base.rb +21 -2
- data/lib/picky/sources/db.rb +0 -7
- data/lib/picky/statistics.rb +9 -12
- data/lib/picky/tokenizers/location.rb +1 -1
- data/lib/tasks/checks.rake +8 -6
- data/lib/tasks/index.rake +14 -20
- data/lib/tasks/server.rake +18 -2
- data/lib/tasks/statistics.rake +27 -14
- data/lib/tasks/todo.rake +2 -2
- data/lib/tasks/try.rake +12 -27
- data/spec/lib/application_spec.rb +1 -1
- data/spec/lib/backend/file/basic_spec.rb +6 -6
- data/spec/lib/backend/file/json_spec.rb +11 -6
- data/spec/lib/backend/file/marshal_spec.rb +11 -6
- data/spec/lib/backend/files_spec.rb +21 -7
- data/spec/lib/backend/redis/basic_spec.rb +6 -0
- data/spec/lib/backend/redis/list_hash_spec.rb +9 -3
- data/spec/lib/backend/redis/string_hash_spec.rb +7 -1
- data/spec/lib/backend/redis_spec.rb +22 -12
- data/spec/lib/categories_indexed_spec.rb +2 -2
- data/spec/lib/category_indexing_spec.rb +12 -33
- data/spec/lib/category_spec.rb +22 -0
- data/spec/lib/index/base_indexing_spec.rb +30 -0
- data/spec/lib/indexed/bundle/memory_spec.rb +13 -20
- data/spec/lib/indexers/base_spec.rb +39 -4
- data/spec/lib/indexers/parallel_spec.rb +2 -10
- data/spec/lib/indexers/serial_spec.rb +11 -26
- data/spec/lib/indexes_class_spec.rb +4 -4
- data/spec/lib/indexes_indexed_spec.rb +2 -2
- data/spec/lib/indexes_indexing_spec.rb +6 -10
- data/spec/lib/indexes_spec.rb +3 -3
- data/spec/lib/indexing/bundle/{super_base_spec.rb → base_spec.rb} +2 -2
- data/spec/lib/indexing/bundle/memory_partial_generation_speed_spec.rb +3 -3
- data/spec/lib/indexing/bundle/memory_spec.rb +16 -14
- data/spec/lib/indexing/bundle/redis_spec.rb +18 -16
- data/spec/lib/query/allocation_spec.rb +1 -1
- data/spec/lib/query/token_spec.rb +5 -7
- data/spec/lib/sources/base_spec.rb +53 -0
- data/spec/lib/sources/db_spec.rb +0 -7
- metadata +11 -12
- data/lib/picky/indexers/solr.rb +0 -56
- data/lib/picky/indexing/bundle/super_base.rb +0 -61
- data/lib/picky/solr/schema_generator.rb +0 -74
- data/lib/tasks/search.rake +0 -9
- data/lib/tasks/shortcuts.rake +0 -32
- data/lib/tasks/solr.rake +0 -36
data/lib/picky/category.rb
CHANGED
@@ -1,7 +1,6 @@
|
|
1
1
|
class Category
|
2
2
|
|
3
|
-
attr_reader :name
|
4
|
-
:index
|
3
|
+
attr_reader :name
|
5
4
|
|
6
5
|
# Mandatory params:
|
7
6
|
# * name: Category name to use as identifier and file names.
|
@@ -31,12 +30,12 @@ class Category
|
|
31
30
|
|
32
31
|
# TODO Push into Bundle. At least the weights.
|
33
32
|
#
|
34
|
-
partial = options[:partial] || Generators::Partial::Default
|
35
33
|
weights = options[:weights] || Generators::Weights::Default
|
34
|
+
partial = options[:partial] || Generators::Partial::Default
|
36
35
|
similarity = options[:similarity] || Generators::Similarity::Default
|
37
36
|
|
38
|
-
@indexing_exact = index.indexing_bundle_class.new(:exact, self,
|
39
|
-
@indexing_partial = index.indexing_bundle_class.new(:partial, self, Generators::Similarity::None.new
|
37
|
+
@indexing_exact = index.indexing_bundle_class.new(:exact, self, weights, Generators::Partial::None.new, similarity)
|
38
|
+
@indexing_partial = index.indexing_bundle_class.new(:partial, self, weights, partial, Generators::Similarity::None.new)
|
40
39
|
|
41
40
|
# Indexed.
|
42
41
|
#
|
@@ -55,6 +54,12 @@ class Category
|
|
55
54
|
Query::Qualifiers.add(name, generate_qualifiers_from(options) || [name])
|
56
55
|
end
|
57
56
|
|
57
|
+
# TODO Move to Index.
|
58
|
+
#
|
59
|
+
def generate_qualifiers_from options
|
60
|
+
options[:qualifiers] || options[:qualifier] && [options[:qualifier]]
|
61
|
+
end
|
62
|
+
|
58
63
|
# Indexes and reloads the category.
|
59
64
|
#
|
60
65
|
def reindex
|
@@ -74,10 +79,10 @@ class Category
|
|
74
79
|
@index.name
|
75
80
|
end
|
76
81
|
|
77
|
-
#
|
82
|
+
# The category itself just yields itself.
|
78
83
|
#
|
79
|
-
def
|
80
|
-
|
84
|
+
def each_category
|
85
|
+
yield self
|
81
86
|
end
|
82
87
|
|
83
88
|
# Path and partial filename of the prepared index on this category.
|
@@ -85,9 +90,20 @@ class Category
|
|
85
90
|
def prepared_index_path
|
86
91
|
@prepared_index_path ||= "#{index_directory}/prepared_#{name}_index"
|
87
92
|
end
|
93
|
+
# Get an opened index file.
|
94
|
+
#
|
95
|
+
# Note: If you don't use it with the block, do not forget to close it.
|
96
|
+
#
|
88
97
|
def prepared_index_file &block
|
89
98
|
@prepared_index_file ||= Backend::File::Text.new prepared_index_path
|
90
|
-
@prepared_index_file.
|
99
|
+
@prepared_index_file.open &block
|
100
|
+
end
|
101
|
+
# Creates the index directory including all necessary paths above it.
|
102
|
+
#
|
103
|
+
# Note: Interface method called by any indexers.
|
104
|
+
#
|
105
|
+
def prepare_index_directory
|
106
|
+
FileUtils.mkdir_p index_directory
|
91
107
|
end
|
92
108
|
|
93
109
|
# The index directory for this category.
|
@@ -96,30 +112,26 @@ class Category
|
|
96
112
|
@index_directory ||= "#{PICKY_ROOT}/index/#{PICKY_ENVIRONMENT}/#{@index.name}"
|
97
113
|
end
|
98
114
|
|
99
|
-
#
|
115
|
+
# Path and partial filename of a specific subindex on this category.
|
100
116
|
#
|
101
|
-
|
102
|
-
|
117
|
+
# Subindexes are:
|
118
|
+
# * inverted index
|
119
|
+
# * weights index
|
120
|
+
# * partial index
|
121
|
+
# * similarity index
|
122
|
+
#
|
123
|
+
def index_path bundle_name, type
|
124
|
+
"#{index_directory}/#{name}_#{bundle_name}_#{type}"
|
103
125
|
end
|
104
126
|
|
105
|
-
# Identifier for
|
106
|
-
#
|
107
|
-
# TODO What internal use?
|
127
|
+
# Identifier for technical output.
|
108
128
|
#
|
109
129
|
def identifier
|
110
|
-
@identifier ||= "#{
|
111
|
-
end
|
112
|
-
|
113
|
-
def to_info
|
114
|
-
<<-CATEGORY
|
115
|
-
Category(#{name}):
|
116
|
-
Exact:
|
117
|
-
#{exact.indented_to_s(4)}
|
118
|
-
Partial:
|
119
|
-
#{partial.indented_to_s(4)}
|
120
|
-
CATEGORY
|
130
|
+
@identifier ||= "#{PICKY_ENVIRONMENT}:#{index_name}:#{name}"
|
121
131
|
end
|
122
132
|
|
133
|
+
#
|
134
|
+
#
|
123
135
|
def to_s
|
124
136
|
"Category(#{name})"
|
125
137
|
end
|
@@ -4,12 +4,6 @@ class Category
|
|
4
4
|
|
5
5
|
attr_reader :indexed_exact
|
6
6
|
|
7
|
-
# TODO Move to Index.
|
8
|
-
#
|
9
|
-
def generate_qualifiers_from options
|
10
|
-
options[:qualifiers] || options[:qualifier] && [options[:qualifier]]
|
11
|
-
end
|
12
|
-
|
13
7
|
# Loads the index from cache.
|
14
8
|
#
|
15
9
|
def load_from_cache
|
@@ -19,18 +13,6 @@ class Category
|
|
19
13
|
end
|
20
14
|
alias reload load_from_cache
|
21
15
|
|
22
|
-
# Loads, analyzes, and clears the index.
|
23
|
-
#
|
24
|
-
# Note: The idea is not to run this while the search engine is running.
|
25
|
-
#
|
26
|
-
def analyze collector
|
27
|
-
collector[identifier] = {
|
28
|
-
:exact => Analyzer.new.analyze(indexed_exact),
|
29
|
-
:partial => Analyzer.new.analyze(indexed_partial)
|
30
|
-
}
|
31
|
-
collector
|
32
|
-
end
|
33
|
-
|
34
16
|
# Gets the weight for this token's text.
|
35
17
|
#
|
36
18
|
def weight token
|
@@ -49,13 +31,15 @@ class Category
|
|
49
31
|
token.partial? ? indexed_partial : indexed_exact
|
50
32
|
end
|
51
33
|
|
52
|
-
# The partial strategy defines whether to
|
34
|
+
# The partial strategy defines whether to
|
35
|
+
# really use the partial index.
|
53
36
|
#
|
54
37
|
def indexed_partial
|
55
38
|
@partial_strategy.use_exact_for_partial? ? @indexed_exact : @indexed_partial
|
56
39
|
end
|
57
40
|
|
58
|
-
#
|
41
|
+
# Returns a combination for the token,
|
42
|
+
# or nil, if there is none.
|
59
43
|
#
|
60
44
|
def combination_for token
|
61
45
|
weight(token) && Query::Combination.new(token, self)
|
@@ -14,8 +14,56 @@ class Category
|
|
14
14
|
cache
|
15
15
|
end
|
16
16
|
|
17
|
+
# Indexes, creates the "prepared_..." file.
|
18
|
+
#
|
19
|
+
def prepare
|
20
|
+
with_data_snapshot do
|
21
|
+
indexer.index [self]
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
# Take a data snapshot if the source offers it.
|
26
|
+
#
|
27
|
+
def with_data_snapshot
|
28
|
+
if source.respond_to? :with_snapshot
|
29
|
+
source.with_snapshot(@index) do
|
30
|
+
yield
|
31
|
+
end
|
32
|
+
else
|
33
|
+
yield
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
# Generates all caches for this category.
|
38
|
+
#
|
39
|
+
def cache
|
40
|
+
configure
|
41
|
+
generate_caches_from_source
|
42
|
+
generate_partial
|
43
|
+
generate_caches_from_memory
|
44
|
+
dump_caches
|
45
|
+
timed_exclaim %Q{"#{identifier}": Caching finished.}
|
46
|
+
end
|
47
|
+
# Generate the cache data.
|
48
|
+
#
|
49
|
+
def generate_caches_from_source
|
50
|
+
indexing_exact.generate_caches_from_source
|
51
|
+
end
|
52
|
+
def generate_partial
|
53
|
+
indexing_partial.generate_partial_from indexing_exact.inverted
|
54
|
+
end
|
55
|
+
def generate_caches_from_memory
|
56
|
+
indexing_partial.generate_caches_from_memory
|
57
|
+
end
|
58
|
+
def dump_caches
|
59
|
+
indexing_exact.dump
|
60
|
+
indexing_partial.dump
|
61
|
+
end
|
62
|
+
|
17
63
|
# Return an appropriate source.
|
18
64
|
#
|
65
|
+
# If we have no explicit source, we'll check the index for one.
|
66
|
+
#
|
19
67
|
def source
|
20
68
|
@source || @index.source
|
21
69
|
end
|
@@ -39,45 +87,33 @@ class Category
|
|
39
87
|
|
40
88
|
# The indexer is lazily generated and cached.
|
41
89
|
#
|
90
|
+
# TODO Really cache?
|
91
|
+
#
|
42
92
|
def indexer
|
43
93
|
@indexer ||= source.respond_to?(:each) ? Indexers::Parallel.new(self) : Indexers::Serial.new(self)
|
44
94
|
end
|
45
95
|
|
46
|
-
# TODO This is a hack to get the parallel indexer working.
|
47
|
-
#
|
48
|
-
def categories
|
49
|
-
[self]
|
50
|
-
end
|
51
|
-
|
52
96
|
# Returns an appropriate tokenizer.
|
53
97
|
# If one isn't set on this category, will try the index,
|
54
98
|
# and finally the default index tokenizer.
|
55
99
|
#
|
56
100
|
def tokenizer
|
57
|
-
@tokenizer || @index.tokenizer
|
101
|
+
@tokenizer || @index.tokenizer
|
58
102
|
end
|
59
103
|
|
60
|
-
#
|
61
|
-
#
|
104
|
+
# We need to set what formatting method should be used.
|
105
|
+
# Uses the one defined in the indexer.
|
62
106
|
#
|
63
|
-
|
64
|
-
timed_exclaim "Backing up #{identifier}."
|
65
|
-
indexing_exact.backup
|
66
|
-
indexing_partial.backup
|
67
|
-
end
|
68
|
-
|
69
|
-
# Restore the caches.
|
70
|
-
# (Revert with backup_caches)
|
107
|
+
# TODO Make this more dynamic.
|
71
108
|
#
|
72
|
-
def
|
73
|
-
|
74
|
-
|
75
|
-
indexing_partial.restore
|
109
|
+
def configure
|
110
|
+
indexing_exact[:key_format] = self.key_format
|
111
|
+
indexing_partial[:key_format] = self.key_format
|
76
112
|
end
|
77
113
|
|
78
114
|
# Checks the caches for existence.
|
79
115
|
#
|
80
|
-
def
|
116
|
+
def check
|
81
117
|
timed_exclaim "Checking #{identifier}."
|
82
118
|
indexing_exact.raise_unless_cache_exists
|
83
119
|
indexing_partial.raise_unless_cache_exists
|
@@ -85,61 +121,28 @@ class Category
|
|
85
121
|
|
86
122
|
# Deletes the caches.
|
87
123
|
#
|
88
|
-
def
|
124
|
+
def clear
|
89
125
|
timed_exclaim "Deleting #{identifier}."
|
90
126
|
indexing_exact.delete
|
91
127
|
indexing_partial.delete
|
92
128
|
end
|
93
129
|
|
94
|
-
#
|
95
|
-
#
|
96
|
-
#
|
97
|
-
# TODO Make this more dynamic.
|
98
|
-
#
|
99
|
-
def configure
|
100
|
-
indexing_exact[:key_format] = self.key_format
|
101
|
-
indexing_partial[:key_format] = self.key_format
|
102
|
-
end
|
103
|
-
|
104
|
-
# Indexes, creates the "prepared_..." file.
|
105
|
-
#
|
106
|
-
# TODO This step could already prepare the id (if a
|
107
|
-
# per category key_format is not really needed).
|
108
|
-
#
|
109
|
-
def prepare
|
110
|
-
prepare_index_directory
|
111
|
-
indexer.index
|
112
|
-
end
|
113
|
-
|
114
|
-
# Generates all caches for this category.
|
130
|
+
# Backup the caches.
|
131
|
+
# (Revert with restore_caches)
|
115
132
|
#
|
116
|
-
def
|
117
|
-
|
118
|
-
|
133
|
+
def backup
|
134
|
+
timed_exclaim "Backing up #{identifier}."
|
135
|
+
indexing_exact.backup
|
136
|
+
indexing_partial.backup
|
119
137
|
end
|
120
138
|
|
121
|
-
#
|
139
|
+
# Restore the caches.
|
140
|
+
# (Revert with backup_caches)
|
122
141
|
#
|
123
|
-
def
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
generate_caches_from_memory
|
128
|
-
dump_caches
|
129
|
-
timed_exclaim %Q{"#{identifier}": Caching finished.}
|
130
|
-
end
|
131
|
-
def generate_caches_from_source
|
132
|
-
indexing_exact.generate_caches_from_source
|
133
|
-
end
|
134
|
-
def generate_partial
|
135
|
-
indexing_partial.generate_partial_from indexing_exact.index
|
136
|
-
end
|
137
|
-
def generate_caches_from_memory
|
138
|
-
indexing_partial.generate_caches_from_memory
|
139
|
-
end
|
140
|
-
def dump_caches
|
141
|
-
indexing_exact.dump
|
142
|
-
indexing_partial.dump
|
142
|
+
def restore
|
143
|
+
timed_exclaim "Restoring #{identifier}."
|
144
|
+
indexing_exact.restore
|
145
|
+
indexing_partial.restore
|
143
146
|
end
|
144
147
|
|
145
148
|
end
|
@@ -3,13 +3,13 @@ module Generators # :nodoc:all
|
|
3
3
|
# A cache generator holds an index.
|
4
4
|
#
|
5
5
|
class Base
|
6
|
-
|
7
|
-
attr_reader :
|
8
|
-
|
9
|
-
def initialize
|
10
|
-
@
|
6
|
+
|
7
|
+
attr_reader :inverted
|
8
|
+
|
9
|
+
def initialize inverted
|
10
|
+
@inverted = inverted
|
11
11
|
end
|
12
|
-
|
12
|
+
|
13
13
|
end
|
14
14
|
|
15
15
|
end
|
@@ -1,16 +1,16 @@
|
|
1
1
|
module Generators
|
2
2
|
|
3
3
|
module Partial
|
4
|
-
|
4
|
+
|
5
5
|
# Generates the right substrings for use in the substring strategy.
|
6
6
|
#
|
7
7
|
class SubstringGenerator
|
8
|
-
|
8
|
+
|
9
9
|
attr_reader :from, :to
|
10
|
-
|
10
|
+
|
11
11
|
def initialize from, to
|
12
12
|
@from, @to = from, to
|
13
|
-
|
13
|
+
|
14
14
|
if @to.zero?
|
15
15
|
def each_subtoken token, &block
|
16
16
|
token.each_subtoken @from, &block
|
@@ -20,11 +20,11 @@ module Generators
|
|
20
20
|
token[0..@to].intern.each_subtoken @from, &block
|
21
21
|
end
|
22
22
|
end
|
23
|
-
|
23
|
+
|
24
24
|
end
|
25
|
-
|
25
|
+
|
26
26
|
end
|
27
|
-
|
27
|
+
|
28
28
|
# The subtoken partial strategy.
|
29
29
|
#
|
30
30
|
# If given "florian"
|
@@ -32,7 +32,7 @@ module Generators
|
|
32
32
|
# (Depending on what the given from value is, the example is with option from: 1)
|
33
33
|
#
|
34
34
|
class Substring < Strategy
|
35
|
-
|
35
|
+
|
36
36
|
# The from option signifies where in the symbol it
|
37
37
|
# will start in generating the subtokens.
|
38
38
|
#
|
@@ -51,48 +51,50 @@ module Generators
|
|
51
51
|
to = options[:to] || -1
|
52
52
|
@generator = SubstringGenerator.new from, to
|
53
53
|
end
|
54
|
-
|
54
|
+
|
55
55
|
# Delegator to generator#from.
|
56
56
|
#
|
57
57
|
def from
|
58
58
|
@generator.from
|
59
59
|
end
|
60
|
-
|
60
|
+
|
61
61
|
# Delegator to generator#to.
|
62
62
|
#
|
63
63
|
def to
|
64
64
|
@generator.to
|
65
65
|
end
|
66
|
-
|
67
|
-
# Generates a partial index from the given index.
|
66
|
+
|
67
|
+
# Generates a partial index from the given inverted index.
|
68
68
|
#
|
69
|
-
def generate_from
|
69
|
+
def generate_from inverted
|
70
70
|
result = {}
|
71
|
-
|
71
|
+
|
72
72
|
# Generate for each key token the subtokens.
|
73
73
|
#
|
74
74
|
i = 0
|
75
|
-
|
75
|
+
j = 0
|
76
|
+
inverted.each_key do |token|
|
76
77
|
i += 1
|
77
78
|
if i == 5000
|
78
|
-
|
79
|
+
j += 1
|
80
|
+
timed_exclaim %Q{#{"%8i" % (i*j)} generated (current token: "#{token}").}
|
79
81
|
i = 0
|
80
82
|
end
|
81
|
-
generate_for token,
|
83
|
+
generate_for token, inverted, result
|
82
84
|
end
|
83
|
-
|
85
|
+
|
84
86
|
# Remove duplicate ids.
|
85
87
|
#
|
86
88
|
# THINK If it is unique for a subtoken, it is
|
87
89
|
# unique for all derived longer tokens.
|
88
90
|
#
|
89
91
|
result.each_value &:uniq!
|
90
|
-
|
92
|
+
|
91
93
|
result
|
92
94
|
end
|
93
|
-
|
95
|
+
|
94
96
|
private
|
95
|
-
|
97
|
+
|
96
98
|
# To each shortened token of :test
|
97
99
|
# :test, :tes, :te, :t
|
98
100
|
# add all ids of :test
|
@@ -101,18 +103,18 @@ module Generators
|
|
101
103
|
#
|
102
104
|
# THINK Could be improved by appending the aforegoing ids?
|
103
105
|
#
|
104
|
-
def generate_for token,
|
106
|
+
def generate_for token, inverted, result
|
105
107
|
@generator.each_subtoken(token) do |subtoken|
|
106
108
|
if result[subtoken]
|
107
|
-
result[subtoken] +=
|
109
|
+
result[subtoken] += inverted[token] # unique
|
108
110
|
else
|
109
|
-
result[subtoken] =
|
111
|
+
result[subtoken] = inverted[token].dup
|
110
112
|
end
|
111
113
|
end
|
112
114
|
end
|
113
|
-
|
115
|
+
|
114
116
|
end
|
115
|
-
|
117
|
+
|
116
118
|
end
|
117
119
|
|
118
120
|
end
|
@@ -3,11 +3,11 @@ module Generators
|
|
3
3
|
# The partial generator uses a subtoken(downto:1) generator as default.
|
4
4
|
#
|
5
5
|
class PartialGenerator < Base
|
6
|
-
|
7
|
-
# Generate a partial index based on the given index.
|
6
|
+
|
7
|
+
# Generate a partial index based on the given inverted index.
|
8
8
|
#
|
9
9
|
def generate strategy = Partial::Substring.new(from: 1)
|
10
|
-
strategy.generate_from self.
|
10
|
+
strategy.generate_from self.inverted
|
11
11
|
end
|
12
12
|
|
13
13
|
end
|
@@ -26,8 +26,8 @@ module Generators
|
|
26
26
|
# In the following form:
|
27
27
|
# [:meier, :mueller, :peter, :pater] => { MR: [:meier], MLR: [:mueller], PTR: [:peter, :pater] }
|
28
28
|
#
|
29
|
-
def generate_from
|
30
|
-
hash = hashify
|
29
|
+
def generate_from inverted
|
30
|
+
hash = hashify inverted.keys
|
31
31
|
sort hash
|
32
32
|
end
|
33
33
|
|
@@ -35,12 +35,12 @@ module Generators
|
|
35
35
|
|
36
36
|
# Sorts the index values in place.
|
37
37
|
#
|
38
|
-
def sort
|
39
|
-
|
38
|
+
def sort hash
|
39
|
+
hash.each_pair.each do |code, ary|
|
40
40
|
ary.sort_by_levenshtein! code
|
41
41
|
ary.slice! amount, ary.size # size is not perfectly correct, but anyway
|
42
42
|
end
|
43
|
-
|
43
|
+
hash
|
44
44
|
end
|
45
45
|
|
46
46
|
# Hashifies a list of symbols.
|
@@ -4,10 +4,10 @@ module Generators
|
|
4
4
|
#
|
5
5
|
class SimilarityGenerator < Base
|
6
6
|
|
7
|
-
# Generate a similarity index based on the given index.
|
7
|
+
# Generate a similarity index based on the given inverted index.
|
8
8
|
#
|
9
9
|
def generate strategy = Similarity::None.new
|
10
|
-
strategy.generate_from self.
|
10
|
+
strategy.generate_from self.inverted
|
11
11
|
end
|
12
12
|
|
13
13
|
end
|
@@ -9,10 +9,10 @@ module Generators
|
|
9
9
|
#
|
10
10
|
class Logarithmic < Strategy
|
11
11
|
|
12
|
-
# Generates a partial index from the given index.
|
12
|
+
# Generates a partial index from the given inverted index.
|
13
13
|
#
|
14
|
-
def generate_from
|
15
|
-
|
14
|
+
def generate_from inverted
|
15
|
+
inverted.inject({}) do |hash, text_ids|
|
16
16
|
text, ids = *text_ids
|
17
17
|
weight = weight_for ids.size
|
18
18
|
hash[text] ||= weight.round(2) if weight
|
@@ -4,10 +4,10 @@ module Generators
|
|
4
4
|
#
|
5
5
|
class WeightsGenerator < Base
|
6
6
|
|
7
|
-
# Generate a weights index based on the given index.
|
7
|
+
# Generate a weights index based on the given inverted index.
|
8
8
|
#
|
9
9
|
def generate strategy = Weights::Logarithmic.new
|
10
|
-
strategy.generate_from self.
|
10
|
+
strategy.generate_from self.inverted
|
11
11
|
end
|
12
12
|
|
13
13
|
end
|
data/lib/picky/index/base.rb
CHANGED
@@ -89,6 +89,7 @@ module Index
|
|
89
89
|
:categories
|
90
90
|
|
91
91
|
delegate :[],
|
92
|
+
:each_category,
|
92
93
|
:to => :categories
|
93
94
|
|
94
95
|
# Create a new index with a given source.
|
@@ -381,16 +382,6 @@ SOURCE
|
|
381
382
|
) unless source.respond_to?(:each) || source.respond_to?(:harvest)
|
382
383
|
end
|
383
384
|
|
384
|
-
def method_name
|
385
|
-
|
386
|
-
end
|
387
|
-
|
388
|
-
#
|
389
|
-
#
|
390
|
-
def to_s
|
391
|
-
"#{self.class}(#{name}, result_id: #{result_identifier}, source: #{source}, categories: #{categories})"
|
392
|
-
end
|
393
|
-
|
394
385
|
def to_stats # :nodoc:
|
395
386
|
stats = <<-INDEX
|
396
387
|
#{name} (#{self.class}):
|
@@ -401,6 +392,18 @@ INDEX
|
|
401
392
|
stats
|
402
393
|
end
|
403
394
|
|
395
|
+
# Identifier used for technical output.
|
396
|
+
#
|
397
|
+
def identifier
|
398
|
+
"#{PICKY_ENVIRONMENT}:#{name}"
|
399
|
+
end
|
400
|
+
|
401
|
+
#
|
402
|
+
#
|
403
|
+
def to_s
|
404
|
+
"#{self.class}(#{name}, result_id: #{result_identifier}, source: #{source}, categories: #{categories})"
|
405
|
+
end
|
406
|
+
|
404
407
|
end
|
405
408
|
|
406
409
|
end
|