picky 2.6.0 → 2.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/picky/analyzer.rb +4 -4
- data/lib/picky/application.rb +6 -7
- data/lib/picky/backend/{backend.rb → base.rb} +31 -14
- data/lib/picky/backend/file/basic.rb +12 -4
- data/lib/picky/backend/file/json.rb +5 -5
- data/lib/picky/backend/file/text.rb +1 -1
- data/lib/picky/backend/files.rb +3 -9
- data/lib/picky/backend/redis/basic.rb +8 -0
- data/lib/picky/backend/redis/list_hash.rb +5 -5
- data/lib/picky/backend/redis/string_hash.rb +5 -5
- data/lib/picky/backend/redis.rb +5 -5
- data/lib/picky/bundle.rb +62 -0
- data/lib/picky/categories.rb +10 -9
- data/lib/picky/categories_indexed.rb +12 -7
- data/lib/picky/categories_indexing.rb +7 -9
- data/lib/picky/category.rb +38 -26
- data/lib/picky/category_indexed.rb +4 -20
- data/lib/picky/category_indexing.rb +71 -68
- data/lib/picky/generators/base.rb +6 -6
- data/lib/picky/generators/partial/substring.rb +28 -26
- data/lib/picky/generators/partial_generator.rb +3 -3
- data/lib/picky/generators/similarity/phonetic.rb +5 -5
- data/lib/picky/generators/similarity_generator.rb +2 -2
- data/lib/picky/generators/weights/logarithmic.rb +3 -3
- data/lib/picky/generators/weights_generator.rb +2 -2
- data/lib/picky/index/base.rb +13 -10
- data/lib/picky/index/base_indexed.rb +2 -0
- data/lib/picky/index/base_indexing.rb +65 -57
- data/lib/picky/indexed/bundle/base.rb +21 -86
- data/lib/picky/indexed/bundle/memory.rb +5 -12
- data/lib/picky/indexed/bundle/redis.rb +42 -0
- data/lib/picky/indexed/wrappers/bundle/wrapper.rb +3 -3
- data/lib/picky/indexers/base.rb +20 -3
- data/lib/picky/indexers/parallel.rb +32 -14
- data/lib/picky/indexers/serial.rb +29 -26
- data/lib/picky/indexes.rb +5 -3
- data/lib/picky/indexes_indexed.rb +3 -15
- data/lib/picky/indexes_indexing.rb +18 -21
- data/lib/picky/indexing/bundle/base.rb +64 -45
- data/lib/picky/indexing/bundle/memory.rb +0 -4
- data/lib/picky/loader.rb +7 -6
- data/lib/picky/query/allocation.rb +3 -3
- data/lib/picky/query/token.rb +5 -1
- data/lib/picky/search.rb +5 -0
- data/lib/picky/sources/base.rb +21 -2
- data/lib/picky/sources/db.rb +0 -7
- data/lib/picky/statistics.rb +9 -12
- data/lib/picky/tokenizers/location.rb +1 -1
- data/lib/tasks/checks.rake +8 -6
- data/lib/tasks/index.rake +14 -20
- data/lib/tasks/server.rake +18 -2
- data/lib/tasks/statistics.rake +27 -14
- data/lib/tasks/todo.rake +2 -2
- data/lib/tasks/try.rake +12 -27
- data/spec/lib/application_spec.rb +1 -1
- data/spec/lib/backend/file/basic_spec.rb +6 -6
- data/spec/lib/backend/file/json_spec.rb +11 -6
- data/spec/lib/backend/file/marshal_spec.rb +11 -6
- data/spec/lib/backend/files_spec.rb +21 -7
- data/spec/lib/backend/redis/basic_spec.rb +6 -0
- data/spec/lib/backend/redis/list_hash_spec.rb +9 -3
- data/spec/lib/backend/redis/string_hash_spec.rb +7 -1
- data/spec/lib/backend/redis_spec.rb +22 -12
- data/spec/lib/categories_indexed_spec.rb +2 -2
- data/spec/lib/category_indexing_spec.rb +12 -33
- data/spec/lib/category_spec.rb +22 -0
- data/spec/lib/index/base_indexing_spec.rb +30 -0
- data/spec/lib/indexed/bundle/memory_spec.rb +13 -20
- data/spec/lib/indexers/base_spec.rb +39 -4
- data/spec/lib/indexers/parallel_spec.rb +2 -10
- data/spec/lib/indexers/serial_spec.rb +11 -26
- data/spec/lib/indexes_class_spec.rb +4 -4
- data/spec/lib/indexes_indexed_spec.rb +2 -2
- data/spec/lib/indexes_indexing_spec.rb +6 -10
- data/spec/lib/indexes_spec.rb +3 -3
- data/spec/lib/indexing/bundle/{super_base_spec.rb → base_spec.rb} +2 -2
- data/spec/lib/indexing/bundle/memory_partial_generation_speed_spec.rb +3 -3
- data/spec/lib/indexing/bundle/memory_spec.rb +16 -14
- data/spec/lib/indexing/bundle/redis_spec.rb +18 -16
- data/spec/lib/query/allocation_spec.rb +1 -1
- data/spec/lib/query/token_spec.rb +5 -7
- data/spec/lib/sources/base_spec.rb +53 -0
- data/spec/lib/sources/db_spec.rb +0 -7
- metadata +11 -12
- data/lib/picky/indexers/solr.rb +0 -56
- data/lib/picky/indexing/bundle/super_base.rb +0 -61
- data/lib/picky/solr/schema_generator.rb +0 -74
- data/lib/tasks/search.rake +0 -9
- data/lib/tasks/shortcuts.rake +0 -32
- data/lib/tasks/solr.rake +0 -36
data/lib/picky/category.rb
CHANGED
@@ -1,7 +1,6 @@
|
|
1
1
|
class Category
|
2
2
|
|
3
|
-
attr_reader :name
|
4
|
-
:index
|
3
|
+
attr_reader :name
|
5
4
|
|
6
5
|
# Mandatory params:
|
7
6
|
# * name: Category name to use as identifier and file names.
|
@@ -31,12 +30,12 @@ class Category
|
|
31
30
|
|
32
31
|
# TODO Push into Bundle. At least the weights.
|
33
32
|
#
|
34
|
-
partial = options[:partial] || Generators::Partial::Default
|
35
33
|
weights = options[:weights] || Generators::Weights::Default
|
34
|
+
partial = options[:partial] || Generators::Partial::Default
|
36
35
|
similarity = options[:similarity] || Generators::Similarity::Default
|
37
36
|
|
38
|
-
@indexing_exact = index.indexing_bundle_class.new(:exact, self,
|
39
|
-
@indexing_partial = index.indexing_bundle_class.new(:partial, self, Generators::Similarity::None.new
|
37
|
+
@indexing_exact = index.indexing_bundle_class.new(:exact, self, weights, Generators::Partial::None.new, similarity)
|
38
|
+
@indexing_partial = index.indexing_bundle_class.new(:partial, self, weights, partial, Generators::Similarity::None.new)
|
40
39
|
|
41
40
|
# Indexed.
|
42
41
|
#
|
@@ -55,6 +54,12 @@ class Category
|
|
55
54
|
Query::Qualifiers.add(name, generate_qualifiers_from(options) || [name])
|
56
55
|
end
|
57
56
|
|
57
|
+
# TODO Move to Index.
|
58
|
+
#
|
59
|
+
def generate_qualifiers_from options
|
60
|
+
options[:qualifiers] || options[:qualifier] && [options[:qualifier]]
|
61
|
+
end
|
62
|
+
|
58
63
|
# Indexes and reloads the category.
|
59
64
|
#
|
60
65
|
def reindex
|
@@ -74,10 +79,10 @@ class Category
|
|
74
79
|
@index.name
|
75
80
|
end
|
76
81
|
|
77
|
-
#
|
82
|
+
# The category itself just yields itself.
|
78
83
|
#
|
79
|
-
def
|
80
|
-
|
84
|
+
def each_category
|
85
|
+
yield self
|
81
86
|
end
|
82
87
|
|
83
88
|
# Path and partial filename of the prepared index on this category.
|
@@ -85,9 +90,20 @@ class Category
|
|
85
90
|
def prepared_index_path
|
86
91
|
@prepared_index_path ||= "#{index_directory}/prepared_#{name}_index"
|
87
92
|
end
|
93
|
+
# Get an opened index file.
|
94
|
+
#
|
95
|
+
# Note: If you don't use it with the block, do not forget to close it.
|
96
|
+
#
|
88
97
|
def prepared_index_file &block
|
89
98
|
@prepared_index_file ||= Backend::File::Text.new prepared_index_path
|
90
|
-
@prepared_index_file.
|
99
|
+
@prepared_index_file.open &block
|
100
|
+
end
|
101
|
+
# Creates the index directory including all necessary paths above it.
|
102
|
+
#
|
103
|
+
# Note: Interface method called by any indexers.
|
104
|
+
#
|
105
|
+
def prepare_index_directory
|
106
|
+
FileUtils.mkdir_p index_directory
|
91
107
|
end
|
92
108
|
|
93
109
|
# The index directory for this category.
|
@@ -96,30 +112,26 @@ class Category
|
|
96
112
|
@index_directory ||= "#{PICKY_ROOT}/index/#{PICKY_ENVIRONMENT}/#{@index.name}"
|
97
113
|
end
|
98
114
|
|
99
|
-
#
|
115
|
+
# Path and partial filename of a specific subindex on this category.
|
100
116
|
#
|
101
|
-
|
102
|
-
|
117
|
+
# Subindexes are:
|
118
|
+
# * inverted index
|
119
|
+
# * weights index
|
120
|
+
# * partial index
|
121
|
+
# * similarity index
|
122
|
+
#
|
123
|
+
def index_path bundle_name, type
|
124
|
+
"#{index_directory}/#{name}_#{bundle_name}_#{type}"
|
103
125
|
end
|
104
126
|
|
105
|
-
# Identifier for
|
106
|
-
#
|
107
|
-
# TODO What internal use?
|
127
|
+
# Identifier for technical output.
|
108
128
|
#
|
109
129
|
def identifier
|
110
|
-
@identifier ||= "#{
|
111
|
-
end
|
112
|
-
|
113
|
-
def to_info
|
114
|
-
<<-CATEGORY
|
115
|
-
Category(#{name}):
|
116
|
-
Exact:
|
117
|
-
#{exact.indented_to_s(4)}
|
118
|
-
Partial:
|
119
|
-
#{partial.indented_to_s(4)}
|
120
|
-
CATEGORY
|
130
|
+
@identifier ||= "#{PICKY_ENVIRONMENT}:#{index_name}:#{name}"
|
121
131
|
end
|
122
132
|
|
133
|
+
#
|
134
|
+
#
|
123
135
|
def to_s
|
124
136
|
"Category(#{name})"
|
125
137
|
end
|
@@ -4,12 +4,6 @@ class Category
|
|
4
4
|
|
5
5
|
attr_reader :indexed_exact
|
6
6
|
|
7
|
-
# TODO Move to Index.
|
8
|
-
#
|
9
|
-
def generate_qualifiers_from options
|
10
|
-
options[:qualifiers] || options[:qualifier] && [options[:qualifier]]
|
11
|
-
end
|
12
|
-
|
13
7
|
# Loads the index from cache.
|
14
8
|
#
|
15
9
|
def load_from_cache
|
@@ -19,18 +13,6 @@ class Category
|
|
19
13
|
end
|
20
14
|
alias reload load_from_cache
|
21
15
|
|
22
|
-
# Loads, analyzes, and clears the index.
|
23
|
-
#
|
24
|
-
# Note: The idea is not to run this while the search engine is running.
|
25
|
-
#
|
26
|
-
def analyze collector
|
27
|
-
collector[identifier] = {
|
28
|
-
:exact => Analyzer.new.analyze(indexed_exact),
|
29
|
-
:partial => Analyzer.new.analyze(indexed_partial)
|
30
|
-
}
|
31
|
-
collector
|
32
|
-
end
|
33
|
-
|
34
16
|
# Gets the weight for this token's text.
|
35
17
|
#
|
36
18
|
def weight token
|
@@ -49,13 +31,15 @@ class Category
|
|
49
31
|
token.partial? ? indexed_partial : indexed_exact
|
50
32
|
end
|
51
33
|
|
52
|
-
# The partial strategy defines whether to
|
34
|
+
# The partial strategy defines whether to
|
35
|
+
# really use the partial index.
|
53
36
|
#
|
54
37
|
def indexed_partial
|
55
38
|
@partial_strategy.use_exact_for_partial? ? @indexed_exact : @indexed_partial
|
56
39
|
end
|
57
40
|
|
58
|
-
#
|
41
|
+
# Returns a combination for the token,
|
42
|
+
# or nil, if there is none.
|
59
43
|
#
|
60
44
|
def combination_for token
|
61
45
|
weight(token) && Query::Combination.new(token, self)
|
@@ -14,8 +14,56 @@ class Category
|
|
14
14
|
cache
|
15
15
|
end
|
16
16
|
|
17
|
+
# Indexes, creates the "prepared_..." file.
|
18
|
+
#
|
19
|
+
def prepare
|
20
|
+
with_data_snapshot do
|
21
|
+
indexer.index [self]
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
# Take a data snapshot if the source offers it.
|
26
|
+
#
|
27
|
+
def with_data_snapshot
|
28
|
+
if source.respond_to? :with_snapshot
|
29
|
+
source.with_snapshot(@index) do
|
30
|
+
yield
|
31
|
+
end
|
32
|
+
else
|
33
|
+
yield
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
# Generates all caches for this category.
|
38
|
+
#
|
39
|
+
def cache
|
40
|
+
configure
|
41
|
+
generate_caches_from_source
|
42
|
+
generate_partial
|
43
|
+
generate_caches_from_memory
|
44
|
+
dump_caches
|
45
|
+
timed_exclaim %Q{"#{identifier}": Caching finished.}
|
46
|
+
end
|
47
|
+
# Generate the cache data.
|
48
|
+
#
|
49
|
+
def generate_caches_from_source
|
50
|
+
indexing_exact.generate_caches_from_source
|
51
|
+
end
|
52
|
+
def generate_partial
|
53
|
+
indexing_partial.generate_partial_from indexing_exact.inverted
|
54
|
+
end
|
55
|
+
def generate_caches_from_memory
|
56
|
+
indexing_partial.generate_caches_from_memory
|
57
|
+
end
|
58
|
+
def dump_caches
|
59
|
+
indexing_exact.dump
|
60
|
+
indexing_partial.dump
|
61
|
+
end
|
62
|
+
|
17
63
|
# Return an appropriate source.
|
18
64
|
#
|
65
|
+
# If we have no explicit source, we'll check the index for one.
|
66
|
+
#
|
19
67
|
def source
|
20
68
|
@source || @index.source
|
21
69
|
end
|
@@ -39,45 +87,33 @@ class Category
|
|
39
87
|
|
40
88
|
# The indexer is lazily generated and cached.
|
41
89
|
#
|
90
|
+
# TODO Really cache?
|
91
|
+
#
|
42
92
|
def indexer
|
43
93
|
@indexer ||= source.respond_to?(:each) ? Indexers::Parallel.new(self) : Indexers::Serial.new(self)
|
44
94
|
end
|
45
95
|
|
46
|
-
# TODO This is a hack to get the parallel indexer working.
|
47
|
-
#
|
48
|
-
def categories
|
49
|
-
[self]
|
50
|
-
end
|
51
|
-
|
52
96
|
# Returns an appropriate tokenizer.
|
53
97
|
# If one isn't set on this category, will try the index,
|
54
98
|
# and finally the default index tokenizer.
|
55
99
|
#
|
56
100
|
def tokenizer
|
57
|
-
@tokenizer || @index.tokenizer
|
101
|
+
@tokenizer || @index.tokenizer
|
58
102
|
end
|
59
103
|
|
60
|
-
#
|
61
|
-
#
|
104
|
+
# We need to set what formatting method should be used.
|
105
|
+
# Uses the one defined in the indexer.
|
62
106
|
#
|
63
|
-
|
64
|
-
timed_exclaim "Backing up #{identifier}."
|
65
|
-
indexing_exact.backup
|
66
|
-
indexing_partial.backup
|
67
|
-
end
|
68
|
-
|
69
|
-
# Restore the caches.
|
70
|
-
# (Revert with backup_caches)
|
107
|
+
# TODO Make this more dynamic.
|
71
108
|
#
|
72
|
-
def
|
73
|
-
|
74
|
-
|
75
|
-
indexing_partial.restore
|
109
|
+
def configure
|
110
|
+
indexing_exact[:key_format] = self.key_format
|
111
|
+
indexing_partial[:key_format] = self.key_format
|
76
112
|
end
|
77
113
|
|
78
114
|
# Checks the caches for existence.
|
79
115
|
#
|
80
|
-
def
|
116
|
+
def check
|
81
117
|
timed_exclaim "Checking #{identifier}."
|
82
118
|
indexing_exact.raise_unless_cache_exists
|
83
119
|
indexing_partial.raise_unless_cache_exists
|
@@ -85,61 +121,28 @@ class Category
|
|
85
121
|
|
86
122
|
# Deletes the caches.
|
87
123
|
#
|
88
|
-
def
|
124
|
+
def clear
|
89
125
|
timed_exclaim "Deleting #{identifier}."
|
90
126
|
indexing_exact.delete
|
91
127
|
indexing_partial.delete
|
92
128
|
end
|
93
129
|
|
94
|
-
#
|
95
|
-
#
|
96
|
-
#
|
97
|
-
# TODO Make this more dynamic.
|
98
|
-
#
|
99
|
-
def configure
|
100
|
-
indexing_exact[:key_format] = self.key_format
|
101
|
-
indexing_partial[:key_format] = self.key_format
|
102
|
-
end
|
103
|
-
|
104
|
-
# Indexes, creates the "prepared_..." file.
|
105
|
-
#
|
106
|
-
# TODO This step could already prepare the id (if a
|
107
|
-
# per category key_format is not really needed).
|
108
|
-
#
|
109
|
-
def prepare
|
110
|
-
prepare_index_directory
|
111
|
-
indexer.index
|
112
|
-
end
|
113
|
-
|
114
|
-
# Generates all caches for this category.
|
130
|
+
# Backup the caches.
|
131
|
+
# (Revert with restore_caches)
|
115
132
|
#
|
116
|
-
def
|
117
|
-
|
118
|
-
|
133
|
+
def backup
|
134
|
+
timed_exclaim "Backing up #{identifier}."
|
135
|
+
indexing_exact.backup
|
136
|
+
indexing_partial.backup
|
119
137
|
end
|
120
138
|
|
121
|
-
#
|
139
|
+
# Restore the caches.
|
140
|
+
# (Revert with backup_caches)
|
122
141
|
#
|
123
|
-
def
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
generate_caches_from_memory
|
128
|
-
dump_caches
|
129
|
-
timed_exclaim %Q{"#{identifier}": Caching finished.}
|
130
|
-
end
|
131
|
-
def generate_caches_from_source
|
132
|
-
indexing_exact.generate_caches_from_source
|
133
|
-
end
|
134
|
-
def generate_partial
|
135
|
-
indexing_partial.generate_partial_from indexing_exact.index
|
136
|
-
end
|
137
|
-
def generate_caches_from_memory
|
138
|
-
indexing_partial.generate_caches_from_memory
|
139
|
-
end
|
140
|
-
def dump_caches
|
141
|
-
indexing_exact.dump
|
142
|
-
indexing_partial.dump
|
142
|
+
def restore
|
143
|
+
timed_exclaim "Restoring #{identifier}."
|
144
|
+
indexing_exact.restore
|
145
|
+
indexing_partial.restore
|
143
146
|
end
|
144
147
|
|
145
148
|
end
|
@@ -3,13 +3,13 @@ module Generators # :nodoc:all
|
|
3
3
|
# A cache generator holds an index.
|
4
4
|
#
|
5
5
|
class Base
|
6
|
-
|
7
|
-
attr_reader :
|
8
|
-
|
9
|
-
def initialize
|
10
|
-
@
|
6
|
+
|
7
|
+
attr_reader :inverted
|
8
|
+
|
9
|
+
def initialize inverted
|
10
|
+
@inverted = inverted
|
11
11
|
end
|
12
|
-
|
12
|
+
|
13
13
|
end
|
14
14
|
|
15
15
|
end
|
@@ -1,16 +1,16 @@
|
|
1
1
|
module Generators
|
2
2
|
|
3
3
|
module Partial
|
4
|
-
|
4
|
+
|
5
5
|
# Generates the right substrings for use in the substring strategy.
|
6
6
|
#
|
7
7
|
class SubstringGenerator
|
8
|
-
|
8
|
+
|
9
9
|
attr_reader :from, :to
|
10
|
-
|
10
|
+
|
11
11
|
def initialize from, to
|
12
12
|
@from, @to = from, to
|
13
|
-
|
13
|
+
|
14
14
|
if @to.zero?
|
15
15
|
def each_subtoken token, &block
|
16
16
|
token.each_subtoken @from, &block
|
@@ -20,11 +20,11 @@ module Generators
|
|
20
20
|
token[0..@to].intern.each_subtoken @from, &block
|
21
21
|
end
|
22
22
|
end
|
23
|
-
|
23
|
+
|
24
24
|
end
|
25
|
-
|
25
|
+
|
26
26
|
end
|
27
|
-
|
27
|
+
|
28
28
|
# The subtoken partial strategy.
|
29
29
|
#
|
30
30
|
# If given "florian"
|
@@ -32,7 +32,7 @@ module Generators
|
|
32
32
|
# (Depending on what the given from value is, the example is with option from: 1)
|
33
33
|
#
|
34
34
|
class Substring < Strategy
|
35
|
-
|
35
|
+
|
36
36
|
# The from option signifies where in the symbol it
|
37
37
|
# will start in generating the subtokens.
|
38
38
|
#
|
@@ -51,48 +51,50 @@ module Generators
|
|
51
51
|
to = options[:to] || -1
|
52
52
|
@generator = SubstringGenerator.new from, to
|
53
53
|
end
|
54
|
-
|
54
|
+
|
55
55
|
# Delegator to generator#from.
|
56
56
|
#
|
57
57
|
def from
|
58
58
|
@generator.from
|
59
59
|
end
|
60
|
-
|
60
|
+
|
61
61
|
# Delegator to generator#to.
|
62
62
|
#
|
63
63
|
def to
|
64
64
|
@generator.to
|
65
65
|
end
|
66
|
-
|
67
|
-
# Generates a partial index from the given index.
|
66
|
+
|
67
|
+
# Generates a partial index from the given inverted index.
|
68
68
|
#
|
69
|
-
def generate_from
|
69
|
+
def generate_from inverted
|
70
70
|
result = {}
|
71
|
-
|
71
|
+
|
72
72
|
# Generate for each key token the subtokens.
|
73
73
|
#
|
74
74
|
i = 0
|
75
|
-
|
75
|
+
j = 0
|
76
|
+
inverted.each_key do |token|
|
76
77
|
i += 1
|
77
78
|
if i == 5000
|
78
|
-
|
79
|
+
j += 1
|
80
|
+
timed_exclaim %Q{#{"%8i" % (i*j)} generated (current token: "#{token}").}
|
79
81
|
i = 0
|
80
82
|
end
|
81
|
-
generate_for token,
|
83
|
+
generate_for token, inverted, result
|
82
84
|
end
|
83
|
-
|
85
|
+
|
84
86
|
# Remove duplicate ids.
|
85
87
|
#
|
86
88
|
# THINK If it is unique for a subtoken, it is
|
87
89
|
# unique for all derived longer tokens.
|
88
90
|
#
|
89
91
|
result.each_value &:uniq!
|
90
|
-
|
92
|
+
|
91
93
|
result
|
92
94
|
end
|
93
|
-
|
95
|
+
|
94
96
|
private
|
95
|
-
|
97
|
+
|
96
98
|
# To each shortened token of :test
|
97
99
|
# :test, :tes, :te, :t
|
98
100
|
# add all ids of :test
|
@@ -101,18 +103,18 @@ module Generators
|
|
101
103
|
#
|
102
104
|
# THINK Could be improved by appending the aforegoing ids?
|
103
105
|
#
|
104
|
-
def generate_for token,
|
106
|
+
def generate_for token, inverted, result
|
105
107
|
@generator.each_subtoken(token) do |subtoken|
|
106
108
|
if result[subtoken]
|
107
|
-
result[subtoken] +=
|
109
|
+
result[subtoken] += inverted[token] # unique
|
108
110
|
else
|
109
|
-
result[subtoken] =
|
111
|
+
result[subtoken] = inverted[token].dup
|
110
112
|
end
|
111
113
|
end
|
112
114
|
end
|
113
|
-
|
115
|
+
|
114
116
|
end
|
115
|
-
|
117
|
+
|
116
118
|
end
|
117
119
|
|
118
120
|
end
|
@@ -3,11 +3,11 @@ module Generators
|
|
3
3
|
# The partial generator uses a subtoken(downto:1) generator as default.
|
4
4
|
#
|
5
5
|
class PartialGenerator < Base
|
6
|
-
|
7
|
-
# Generate a partial index based on the given index.
|
6
|
+
|
7
|
+
# Generate a partial index based on the given inverted index.
|
8
8
|
#
|
9
9
|
def generate strategy = Partial::Substring.new(from: 1)
|
10
|
-
strategy.generate_from self.
|
10
|
+
strategy.generate_from self.inverted
|
11
11
|
end
|
12
12
|
|
13
13
|
end
|
@@ -26,8 +26,8 @@ module Generators
|
|
26
26
|
# In the following form:
|
27
27
|
# [:meier, :mueller, :peter, :pater] => { MR: [:meier], MLR: [:mueller], PTR: [:peter, :pater] }
|
28
28
|
#
|
29
|
-
def generate_from
|
30
|
-
hash = hashify
|
29
|
+
def generate_from inverted
|
30
|
+
hash = hashify inverted.keys
|
31
31
|
sort hash
|
32
32
|
end
|
33
33
|
|
@@ -35,12 +35,12 @@ module Generators
|
|
35
35
|
|
36
36
|
# Sorts the index values in place.
|
37
37
|
#
|
38
|
-
def sort
|
39
|
-
|
38
|
+
def sort hash
|
39
|
+
hash.each_pair.each do |code, ary|
|
40
40
|
ary.sort_by_levenshtein! code
|
41
41
|
ary.slice! amount, ary.size # size is not perfectly correct, but anyway
|
42
42
|
end
|
43
|
-
|
43
|
+
hash
|
44
44
|
end
|
45
45
|
|
46
46
|
# Hashifies a list of symbols.
|
@@ -4,10 +4,10 @@ module Generators
|
|
4
4
|
#
|
5
5
|
class SimilarityGenerator < Base
|
6
6
|
|
7
|
-
# Generate a similarity index based on the given index.
|
7
|
+
# Generate a similarity index based on the given inverted index.
|
8
8
|
#
|
9
9
|
def generate strategy = Similarity::None.new
|
10
|
-
strategy.generate_from self.
|
10
|
+
strategy.generate_from self.inverted
|
11
11
|
end
|
12
12
|
|
13
13
|
end
|
@@ -9,10 +9,10 @@ module Generators
|
|
9
9
|
#
|
10
10
|
class Logarithmic < Strategy
|
11
11
|
|
12
|
-
# Generates a partial index from the given index.
|
12
|
+
# Generates a partial index from the given inverted index.
|
13
13
|
#
|
14
|
-
def generate_from
|
15
|
-
|
14
|
+
def generate_from inverted
|
15
|
+
inverted.inject({}) do |hash, text_ids|
|
16
16
|
text, ids = *text_ids
|
17
17
|
weight = weight_for ids.size
|
18
18
|
hash[text] ||= weight.round(2) if weight
|
@@ -4,10 +4,10 @@ module Generators
|
|
4
4
|
#
|
5
5
|
class WeightsGenerator < Base
|
6
6
|
|
7
|
-
# Generate a weights index based on the given index.
|
7
|
+
# Generate a weights index based on the given inverted index.
|
8
8
|
#
|
9
9
|
def generate strategy = Weights::Logarithmic.new
|
10
|
-
strategy.generate_from self.
|
10
|
+
strategy.generate_from self.inverted
|
11
11
|
end
|
12
12
|
|
13
13
|
end
|
data/lib/picky/index/base.rb
CHANGED
@@ -89,6 +89,7 @@ module Index
|
|
89
89
|
:categories
|
90
90
|
|
91
91
|
delegate :[],
|
92
|
+
:each_category,
|
92
93
|
:to => :categories
|
93
94
|
|
94
95
|
# Create a new index with a given source.
|
@@ -381,16 +382,6 @@ SOURCE
|
|
381
382
|
) unless source.respond_to?(:each) || source.respond_to?(:harvest)
|
382
383
|
end
|
383
384
|
|
384
|
-
def method_name
|
385
|
-
|
386
|
-
end
|
387
|
-
|
388
|
-
#
|
389
|
-
#
|
390
|
-
def to_s
|
391
|
-
"#{self.class}(#{name}, result_id: #{result_identifier}, source: #{source}, categories: #{categories})"
|
392
|
-
end
|
393
|
-
|
394
385
|
def to_stats # :nodoc:
|
395
386
|
stats = <<-INDEX
|
396
387
|
#{name} (#{self.class}):
|
@@ -401,6 +392,18 @@ INDEX
|
|
401
392
|
stats
|
402
393
|
end
|
403
394
|
|
395
|
+
# Identifier used for technical output.
|
396
|
+
#
|
397
|
+
def identifier
|
398
|
+
"#{PICKY_ENVIRONMENT}:#{name}"
|
399
|
+
end
|
400
|
+
|
401
|
+
#
|
402
|
+
#
|
403
|
+
def to_s
|
404
|
+
"#{self.class}(#{name}, result_id: #{result_identifier}, source: #{source}, categories: #{categories})"
|
405
|
+
end
|
406
|
+
|
404
407
|
end
|
405
408
|
|
406
409
|
end
|