picky 2.5.2 → 2.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/picky/adapters/rack/base.rb +23 -0
- data/lib/picky/adapters/rack/live_parameters.rb +33 -0
- data/lib/picky/adapters/rack/query.rb +65 -0
- data/lib/picky/adapters/rack.rb +30 -0
- data/lib/picky/application.rb +5 -5
- data/lib/picky/backend/backend.rb +108 -0
- data/lib/picky/backend/file/basic.rb +101 -0
- data/lib/picky/backend/file/json.rb +34 -0
- data/lib/picky/backend/file/marshal.rb +34 -0
- data/lib/picky/backend/file/text.rb +56 -0
- data/lib/picky/backend/files.rb +30 -0
- data/lib/picky/backend/redis/basic.rb +85 -0
- data/lib/picky/backend/redis/list_hash.rb +49 -0
- data/lib/picky/backend/redis/string_hash.rb +40 -0
- data/lib/picky/backend/redis.rb +40 -0
- data/lib/picky/calculations/location.rb +57 -0
- data/lib/picky/categories.rb +62 -0
- data/lib/picky/categories_indexed.rb +93 -0
- data/lib/picky/categories_indexing.rb +12 -0
- data/lib/picky/category.rb +127 -0
- data/lib/picky/category_indexed.rb +64 -0
- data/lib/picky/category_indexing.rb +145 -0
- data/lib/picky/{internals/ext → ext}/maybe_compile.rb +0 -0
- data/lib/picky/{internals/ext → ext}/ruby19/extconf.rb +0 -0
- data/lib/picky/{internals/ext → ext}/ruby19/performant.c +0 -0
- data/lib/picky/{internals/extensions → extensions}/array.rb +0 -0
- data/lib/picky/extensions/class.rb +11 -0
- data/lib/picky/{internals/extensions → extensions}/hash.rb +0 -0
- data/lib/picky/{internals/extensions → extensions}/module.rb +0 -0
- data/lib/picky/{internals/extensions → extensions}/object.rb +0 -0
- data/lib/picky/{internals/extensions → extensions}/symbol.rb +0 -0
- data/lib/picky/frontend_adapters/rack.rb +146 -0
- data/lib/picky/generators/aliases.rb +3 -3
- data/lib/picky/generators/base.rb +15 -0
- data/lib/picky/generators/partial/default.rb +5 -0
- data/lib/picky/generators/partial/none.rb +31 -0
- data/lib/picky/generators/partial/strategy.rb +25 -0
- data/lib/picky/generators/partial/substring.rb +118 -0
- data/lib/picky/generators/partial_generator.rb +15 -0
- data/lib/picky/generators/similarity/default.rb +7 -0
- data/lib/picky/generators/similarity/double_metaphone.rb +28 -0
- data/lib/picky/generators/similarity/metaphone.rb +28 -0
- data/lib/picky/generators/similarity/none.rb +31 -0
- data/lib/picky/generators/similarity/phonetic.rb +65 -0
- data/lib/picky/generators/similarity/soundex.rb +28 -0
- data/lib/picky/generators/similarity/strategy.rb +9 -0
- data/lib/picky/generators/similarity_generator.rb +15 -0
- data/lib/picky/generators/strategy.rb +14 -0
- data/lib/picky/generators/weights/default.rb +7 -0
- data/lib/picky/generators/weights/logarithmic.rb +39 -0
- data/lib/picky/generators/weights/strategy.rb +9 -0
- data/lib/picky/generators/weights_generator.rb +15 -0
- data/lib/picky/{internals/helpers → helpers}/measuring.rb +0 -0
- data/lib/picky/index/base.rb +119 -104
- data/lib/picky/index/base_indexed.rb +27 -0
- data/lib/picky/index/base_indexing.rb +119 -0
- data/lib/picky/index/memory.rb +6 -18
- data/lib/picky/index/redis.rb +6 -18
- data/lib/picky/indexed/bundle/base.rb +110 -0
- data/lib/picky/indexed/bundle/memory.rb +91 -0
- data/lib/picky/indexed/bundle/redis.rb +45 -0
- data/lib/picky/indexed/wrappers/bundle/calculation.rb +35 -0
- data/lib/picky/indexed/wrappers/bundle/location.rb +42 -0
- data/lib/picky/indexed/wrappers/bundle/wrapper.rb +43 -0
- data/lib/picky/indexed/wrappers/category/location.rb +25 -0
- data/lib/picky/indexed/wrappers/exact_first.rb +55 -0
- data/lib/picky/{internals/indexers → indexers}/base.rb +0 -0
- data/lib/picky/{internals/indexers → indexers}/parallel.rb +0 -0
- data/lib/picky/{internals/indexers → indexers}/serial.rb +0 -0
- data/lib/picky/{internals/indexers → indexers}/solr.rb +0 -0
- data/lib/picky/indexes.rb +73 -0
- data/lib/picky/indexes_indexed.rb +29 -0
- data/lib/picky/indexes_indexing.rb +49 -0
- data/lib/picky/indexing/bundle/base.rb +212 -0
- data/lib/picky/indexing/bundle/memory.rb +25 -0
- data/lib/picky/indexing/bundle/redis.rb +24 -0
- data/lib/picky/indexing/bundle/super_base.rb +61 -0
- data/lib/picky/indexing/wrappers/category/location.rb +25 -0
- data/lib/picky/interfaces/live_parameters.rb +8 -8
- data/lib/picky/loader.rb +89 -95
- data/lib/picky/{internals/performant.rb → performant.rb} +0 -0
- data/lib/picky/query/allocation.rb +84 -0
- data/lib/picky/query/allocations.rb +114 -0
- data/lib/picky/query/combination.rb +76 -0
- data/lib/picky/query/combinations/base.rb +70 -0
- data/lib/picky/query/combinations/memory.rb +48 -0
- data/lib/picky/query/combinations/redis.rb +86 -0
- data/lib/picky/query/indexes.rb +195 -0
- data/lib/picky/query/qualifiers.rb +76 -0
- data/lib/picky/query/token.rb +198 -0
- data/lib/picky/query/tokens.rb +103 -0
- data/lib/picky/{internals/query → query}/weights.rb +0 -0
- data/lib/picky/results.rb +1 -1
- data/lib/picky/search.rb +6 -6
- data/lib/picky/{internals/solr → solr}/schema_generator.rb +0 -0
- data/lib/picky/sources/db.rb +7 -7
- data/lib/picky/sources/wrappers/location.rb +2 -2
- data/lib/picky/tokenizers/base.rb +224 -0
- data/lib/picky/tokenizers/index.rb +30 -0
- data/lib/picky/tokenizers/location.rb +49 -0
- data/lib/picky/tokenizers/query.rb +55 -0
- data/lib/tasks/index.rake +4 -3
- data/lib/tasks/try.rake +2 -2
- data/spec/lib/{internals/adapters → adapters}/rack/base_spec.rb +1 -1
- data/spec/lib/{internals/adapters → adapters}/rack/live_parameters_spec.rb +1 -1
- data/spec/lib/{internals/adapters → adapters}/rack/query_spec.rb +1 -1
- data/spec/lib/application_spec.rb +3 -3
- data/spec/lib/{internals/index → backend}/file/basic_spec.rb +1 -1
- data/spec/lib/{internals/index → backend}/file/json_spec.rb +1 -1
- data/spec/lib/{internals/index → backend}/file/marshal_spec.rb +1 -1
- data/spec/lib/{internals/index → backend}/file/text_spec.rb +1 -1
- data/spec/lib/{internals/index → backend}/files_spec.rb +3 -3
- data/spec/lib/{internals/index → backend}/redis/basic_spec.rb +1 -1
- data/spec/lib/{internals/index → backend}/redis/list_hash_spec.rb +1 -1
- data/spec/lib/{internals/index → backend}/redis/string_hash_spec.rb +1 -1
- data/spec/lib/{internals/index → backend}/redis_spec.rb +11 -5
- data/spec/lib/{internals/calculations → calculations}/location_spec.rb +1 -1
- data/spec/lib/{internals/indexed/categories_spec.rb → categories_indexed_spec.rb} +10 -10
- data/spec/lib/{internals/indexed/category_spec.rb → category_indexed_spec.rb} +12 -12
- data/spec/lib/{internals/indexing/category_spec.rb → category_indexing_spec.rb} +10 -10
- data/spec/lib/{internals/cores_spec.rb → cores_spec.rb} +0 -0
- data/spec/lib/{internals/extensions → extensions}/array_spec.rb +0 -0
- data/spec/lib/{internals/extensions → extensions}/hash_spec.rb +0 -0
- data/spec/lib/{internals/extensions → extensions}/module_spec.rb +0 -0
- data/spec/lib/{internals/extensions → extensions}/object_spec.rb +0 -0
- data/spec/lib/{internals/extensions → extensions}/symbol_spec.rb +0 -0
- data/spec/lib/{internals/frontend_adapters → frontend_adapters}/rack_spec.rb +10 -10
- data/spec/lib/generators/aliases_spec.rb +3 -3
- data/spec/lib/{internals/generators → generators}/cacher_strategy_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/partial/default_spec.rb +3 -3
- data/spec/lib/{internals/generators → generators}/partial/none_spec.rb +2 -2
- data/spec/lib/{internals/generators → generators}/partial/substring_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/partial_generator_spec.rb +3 -3
- data/spec/lib/{internals/generators → generators}/similarity/double_metaphone_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/similarity/metaphone_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/similarity/none_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/similarity/phonetic_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/similarity/soundex_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/similarity_generator_spec.rb +2 -2
- data/spec/lib/{internals/generators → generators}/weights/logarithmic_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/weights_generator_spec.rb +5 -5
- data/spec/lib/{internals/helpers → helpers}/measuring_spec.rb +0 -0
- data/spec/lib/{internals/indexed/index_spec.rb → index/base_indexed_spec.rb} +5 -5
- data/spec/lib/{internals/indexing/index_spec.rb → index/base_indexing_spec.rb} +6 -19
- data/spec/lib/index/base_spec.rb +10 -53
- data/spec/lib/{internals/indexed → indexed}/bundle/memory_spec.rb +5 -5
- data/spec/lib/{internals/indexed → indexed}/bundle/redis_spec.rb +4 -4
- data/spec/lib/{internals/indexed → indexed}/wrappers/bundle/calculation_spec.rb +1 -1
- data/spec/lib/{internals/indexed → indexed}/wrappers/bundle/wrapper_spec.rb +1 -1
- data/spec/lib/{internals/indexed → indexed}/wrappers/exact_first_spec.rb +7 -7
- data/spec/lib/{internals/indexers → indexers}/base_spec.rb +0 -0
- data/spec/lib/{internals/indexers → indexers}/parallel_spec.rb +0 -0
- data/spec/lib/{internals/indexers → indexers}/serial_spec.rb +0 -0
- data/spec/lib/indexes_class_spec.rb +30 -0
- data/spec/lib/{indexed/indexes_spec.rb → indexes_indexed_spec.rb} +1 -1
- data/spec/lib/{indexing/indexes_spec.rb → indexes_indexing_spec.rb} +8 -8
- data/spec/lib/{internals/indexing/indexes_spec.rb → indexes_spec.rb} +15 -12
- data/spec/lib/{internals/indexing → indexing}/bundle/memory_partial_generation_speed_spec.rb +4 -4
- data/spec/lib/{internals/indexing → indexing}/bundle/memory_spec.rb +3 -3
- data/spec/lib/{internals/indexing → indexing}/bundle/redis_spec.rb +3 -3
- data/spec/lib/{internals/indexing → indexing}/bundle/super_base_spec.rb +2 -2
- data/spec/lib/{internals/interfaces → interfaces}/live_parameters_spec.rb +0 -0
- data/spec/lib/query/allocation_spec.rb +1 -1
- data/spec/lib/query/allocations_spec.rb +1 -1
- data/spec/lib/query/combination_spec.rb +5 -5
- data/spec/lib/query/combinations/base_spec.rb +1 -1
- data/spec/lib/query/combinations/memory_spec.rb +1 -1
- data/spec/lib/query/combinations/redis_spec.rb +1 -1
- data/spec/lib/query/indexes_spec.rb +1 -1
- data/spec/lib/query/qualifiers_spec.rb +4 -4
- data/spec/lib/query/token_spec.rb +3 -3
- data/spec/lib/query/tokens_spec.rb +32 -32
- data/spec/lib/search_spec.rb +5 -5
- data/spec/lib/{internals/solr → solr}/schema_generator_spec.rb +0 -0
- data/spec/lib/sources/db_spec.rb +4 -8
- data/spec/lib/sources/wrappers/location_spec.rb +1 -1
- data/spec/lib/{internals/tokenizers → tokenizers}/base_spec.rb +1 -1
- data/spec/lib/{internals/tokenizers → tokenizers}/index_spec.rb +1 -1
- data/spec/lib/{internals/tokenizers → tokenizers}/query_spec.rb +1 -1
- metadata +214 -215
- data/lib/picky/aliases.rb +0 -4
- data/lib/picky/index_bundle.rb +0 -48
- data/lib/picky/indexed/indexes.rb +0 -59
- data/lib/picky/indexing/indexes.rb +0 -87
- data/lib/picky/internals/adapters/rack/base.rb +0 -27
- data/lib/picky/internals/adapters/rack/live_parameters.rb +0 -37
- data/lib/picky/internals/adapters/rack/query.rb +0 -69
- data/lib/picky/internals/adapters/rack.rb +0 -34
- data/lib/picky/internals/calculations/location.rb +0 -59
- data/lib/picky/internals/frontend_adapters/rack.rb +0 -150
- data/lib/picky/internals/generators/base.rb +0 -19
- data/lib/picky/internals/generators/partial/default.rb +0 -7
- data/lib/picky/internals/generators/partial/none.rb +0 -35
- data/lib/picky/internals/generators/partial/strategy.rb +0 -29
- data/lib/picky/internals/generators/partial/substring.rb +0 -122
- data/lib/picky/internals/generators/partial_generator.rb +0 -19
- data/lib/picky/internals/generators/similarity/default.rb +0 -9
- data/lib/picky/internals/generators/similarity/double_metaphone.rb +0 -32
- data/lib/picky/internals/generators/similarity/metaphone.rb +0 -32
- data/lib/picky/internals/generators/similarity/none.rb +0 -35
- data/lib/picky/internals/generators/similarity/phonetic.rb +0 -69
- data/lib/picky/internals/generators/similarity/soundex.rb +0 -32
- data/lib/picky/internals/generators/similarity/strategy.rb +0 -11
- data/lib/picky/internals/generators/similarity_generator.rb +0 -19
- data/lib/picky/internals/generators/strategy.rb +0 -18
- data/lib/picky/internals/generators/weights/default.rb +0 -9
- data/lib/picky/internals/generators/weights/logarithmic.rb +0 -43
- data/lib/picky/internals/generators/weights/strategy.rb +0 -11
- data/lib/picky/internals/generators/weights_generator.rb +0 -19
- data/lib/picky/internals/index/backend.rb +0 -112
- data/lib/picky/internals/index/file/basic.rb +0 -105
- data/lib/picky/internals/index/file/json.rb +0 -38
- data/lib/picky/internals/index/file/marshal.rb +0 -38
- data/lib/picky/internals/index/file/text.rb +0 -60
- data/lib/picky/internals/index/files.rb +0 -34
- data/lib/picky/internals/index/redis/basic.rb +0 -89
- data/lib/picky/internals/index/redis/list_hash.rb +0 -53
- data/lib/picky/internals/index/redis/string_hash.rb +0 -44
- data/lib/picky/internals/index/redis.rb +0 -44
- data/lib/picky/internals/indexed/bundle/base.rb +0 -114
- data/lib/picky/internals/indexed/bundle/memory.rb +0 -95
- data/lib/picky/internals/indexed/bundle/redis.rb +0 -49
- data/lib/picky/internals/indexed/categories.rb +0 -140
- data/lib/picky/internals/indexed/category.rb +0 -111
- data/lib/picky/internals/indexed/index.rb +0 -63
- data/lib/picky/internals/indexed/wrappers/bundle/calculation.rb +0 -37
- data/lib/picky/internals/indexed/wrappers/bundle/location.rb +0 -44
- data/lib/picky/internals/indexed/wrappers/bundle/wrapper.rb +0 -45
- data/lib/picky/internals/indexed/wrappers/category/location.rb +0 -27
- data/lib/picky/internals/indexed/wrappers/exact_first.rb +0 -59
- data/lib/picky/internals/indexing/bundle/base.rb +0 -216
- data/lib/picky/internals/indexing/bundle/memory.rb +0 -29
- data/lib/picky/internals/indexing/bundle/redis.rb +0 -28
- data/lib/picky/internals/indexing/bundle/super_base.rb +0 -65
- data/lib/picky/internals/indexing/category.rb +0 -153
- data/lib/picky/internals/indexing/index.rb +0 -142
- data/lib/picky/internals/indexing/wrappers/category/location.rb +0 -27
- data/lib/picky/internals/query/allocation.rb +0 -88
- data/lib/picky/internals/query/allocations.rb +0 -118
- data/lib/picky/internals/query/combination.rb +0 -80
- data/lib/picky/internals/query/combinations/base.rb +0 -74
- data/lib/picky/internals/query/combinations/memory.rb +0 -52
- data/lib/picky/internals/query/combinations/redis.rb +0 -90
- data/lib/picky/internals/query/indexes.rb +0 -199
- data/lib/picky/internals/query/qualifiers.rb +0 -82
- data/lib/picky/internals/query/token.rb +0 -202
- data/lib/picky/internals/query/tokens.rb +0 -109
- data/lib/picky/internals/shared/category.rb +0 -52
- data/lib/picky/internals/tokenizers/base.rb +0 -228
- data/lib/picky/internals/tokenizers/index.rb +0 -34
- data/lib/picky/internals/tokenizers/location.rb +0 -54
- data/lib/picky/internals/tokenizers/query.rb +0 -59
- data/lib/picky/internals.rb +0 -2
- data/spec/lib/aliases_spec.rb +0 -9
- data/spec/lib/index_bundle_spec.rb +0 -69
|
@@ -1,109 +0,0 @@
|
|
|
1
|
-
# encoding: utf-8
|
|
2
|
-
#
|
|
3
|
-
module Internals
|
|
4
|
-
|
|
5
|
-
#
|
|
6
|
-
#
|
|
7
|
-
module Query
|
|
8
|
-
|
|
9
|
-
# This class primarily handles switching through similar token constellations.
|
|
10
|
-
#
|
|
11
|
-
class Tokens # :nodoc:all
|
|
12
|
-
|
|
13
|
-
# Basically delegates to its internal tokens array.
|
|
14
|
-
#
|
|
15
|
-
self.delegate *[Enumerable.instance_methods, :slice!, :[], :uniq!, :last, :reject!, :length, :size, :empty?, :each, :exit, { :to => :@tokens }].flatten
|
|
16
|
-
|
|
17
|
-
# Create a new Tokens object with the array of tokens passed in.
|
|
18
|
-
#
|
|
19
|
-
def initialize tokens = []
|
|
20
|
-
@tokens = tokens
|
|
21
|
-
end
|
|
22
|
-
|
|
23
|
-
# Creates a new Tokens object from a number of Strings.
|
|
24
|
-
#
|
|
25
|
-
# Options:
|
|
26
|
-
# * downcase: Whether to downcase the passed strings (default is true)
|
|
27
|
-
#
|
|
28
|
-
def self.processed words, downcase = true
|
|
29
|
-
new words.collect! { |word| Token.processed word, downcase }
|
|
30
|
-
end
|
|
31
|
-
|
|
32
|
-
# Tokenizes each token.
|
|
33
|
-
#
|
|
34
|
-
# Note: Passed tokenizer needs to offer #normalize(text).
|
|
35
|
-
#
|
|
36
|
-
def tokenize_with tokenizer
|
|
37
|
-
@tokens.each { |token| token.tokenize_with(tokenizer) }
|
|
38
|
-
end
|
|
39
|
-
|
|
40
|
-
# Generates an array in the form of
|
|
41
|
-
# [
|
|
42
|
-
# [combination], # of token 1
|
|
43
|
-
# [combination, combination, combination], # of token 2
|
|
44
|
-
# [combination, combination] # of token 3
|
|
45
|
-
# ]
|
|
46
|
-
#
|
|
47
|
-
def possible_combinations_in index
|
|
48
|
-
@tokens.inject([]) do |combinations, token|
|
|
49
|
-
possible_combinations = token.possible_combinations_in index
|
|
50
|
-
|
|
51
|
-
# TODO Could move the ignore_unassigned_tokens here!
|
|
52
|
-
#
|
|
53
|
-
# Note: Optimization for ignoring tokens that allocate to nothing and
|
|
54
|
-
# can be ignored.
|
|
55
|
-
# For example in a special search, where "florian" is not
|
|
56
|
-
# mapped to any category.
|
|
57
|
-
#
|
|
58
|
-
possible_combinations ? combinations << possible_combinations : combinations
|
|
59
|
-
end
|
|
60
|
-
end
|
|
61
|
-
|
|
62
|
-
# Makes the last of the tokens partial.
|
|
63
|
-
#
|
|
64
|
-
def partialize_last
|
|
65
|
-
@tokens.last.partial = true unless empty?
|
|
66
|
-
end
|
|
67
|
-
|
|
68
|
-
# Caps the tokens to the maximum.
|
|
69
|
-
#
|
|
70
|
-
def cap maximum
|
|
71
|
-
@tokens.slice!(maximum..-1) if cap?(maximum)
|
|
72
|
-
end
|
|
73
|
-
def cap? maximum
|
|
74
|
-
@tokens.size > maximum
|
|
75
|
-
end
|
|
76
|
-
|
|
77
|
-
# Rejects blank tokens.
|
|
78
|
-
#
|
|
79
|
-
def reject
|
|
80
|
-
@tokens.reject! &:blank?
|
|
81
|
-
end
|
|
82
|
-
|
|
83
|
-
# Returns a solr query.
|
|
84
|
-
#
|
|
85
|
-
def to_solr_query
|
|
86
|
-
@tokens.map(&:to_solr).join ' '
|
|
87
|
-
end
|
|
88
|
-
|
|
89
|
-
#
|
|
90
|
-
#
|
|
91
|
-
def originals
|
|
92
|
-
@tokens.map(&:original)
|
|
93
|
-
end
|
|
94
|
-
|
|
95
|
-
def == other
|
|
96
|
-
self.tokens == other.tokens
|
|
97
|
-
end
|
|
98
|
-
|
|
99
|
-
# Just join the token original texts.
|
|
100
|
-
#
|
|
101
|
-
def to_s
|
|
102
|
-
originals.join ' '
|
|
103
|
-
end
|
|
104
|
-
|
|
105
|
-
end
|
|
106
|
-
|
|
107
|
-
end
|
|
108
|
-
|
|
109
|
-
end
|
|
@@ -1,52 +0,0 @@
|
|
|
1
|
-
module Internals
|
|
2
|
-
module Shared
|
|
3
|
-
|
|
4
|
-
module Category
|
|
5
|
-
|
|
6
|
-
def index_name
|
|
7
|
-
index.name
|
|
8
|
-
end
|
|
9
|
-
def category_name
|
|
10
|
-
name
|
|
11
|
-
end
|
|
12
|
-
|
|
13
|
-
# Path and partial filename of a specific index on this category.
|
|
14
|
-
#
|
|
15
|
-
def index_path bundle_name, type
|
|
16
|
-
"#{index_directory}/#{name}_#{bundle_name}_#{type}"
|
|
17
|
-
end
|
|
18
|
-
|
|
19
|
-
#
|
|
20
|
-
#
|
|
21
|
-
def prepared_index_path
|
|
22
|
-
@prepared_index_path ||= "#{index_directory}/prepared_#{name}_index"
|
|
23
|
-
end
|
|
24
|
-
def prepared_index_file &block
|
|
25
|
-
@prepared_index_file ||= Internals::Index::File::Text.new prepared_index_path
|
|
26
|
-
@prepared_index_file.open_for_indexing &block
|
|
27
|
-
end
|
|
28
|
-
|
|
29
|
-
# Identifier for internal use.
|
|
30
|
-
#
|
|
31
|
-
def identifier
|
|
32
|
-
@identifier ||= "#{index.name}:#{name}"
|
|
33
|
-
end
|
|
34
|
-
def to_s
|
|
35
|
-
"#{index.name} #{name}"
|
|
36
|
-
end
|
|
37
|
-
|
|
38
|
-
# The index directory for this category.
|
|
39
|
-
#
|
|
40
|
-
def index_directory
|
|
41
|
-
@index_directory ||= "#{PICKY_ROOT}/index/#{PICKY_ENVIRONMENT}/#{index.name}"
|
|
42
|
-
end
|
|
43
|
-
# Creates the index directory including all necessary paths above it.
|
|
44
|
-
#
|
|
45
|
-
def prepare_index_directory
|
|
46
|
-
FileUtils.mkdir_p index_directory
|
|
47
|
-
end
|
|
48
|
-
|
|
49
|
-
end
|
|
50
|
-
|
|
51
|
-
end
|
|
52
|
-
end
|
|
@@ -1,228 +0,0 @@
|
|
|
1
|
-
module Internals
|
|
2
|
-
|
|
3
|
-
module Tokenizers # :nodoc:all
|
|
4
|
-
|
|
5
|
-
# Defines tokenizing processes used both in indexing and querying.
|
|
6
|
-
#
|
|
7
|
-
class Base
|
|
8
|
-
|
|
9
|
-
# TODO Move EMPTY_STRING top level.
|
|
10
|
-
#
|
|
11
|
-
EMPTY_STRING = ''.freeze
|
|
12
|
-
|
|
13
|
-
def to_s
|
|
14
|
-
reject_condition_location = @reject_condition.to_s[/:(\d+) \(lambda\)/, 1]
|
|
15
|
-
<<-TOKENIZER
|
|
16
|
-
Removes characters: #{@removes_characters_regexp ? "/#{@removes_characters_regexp.source}/" : '-'}
|
|
17
|
-
Stopwords: #{@remove_stopwords_regexp ? "/#{@remove_stopwords_regexp.source}/" : '-'}
|
|
18
|
-
Splits text on: #{@splits_text_on.respond_to?(:source) ? "/#{@splits_text_on.source}/" : (@splits_text_on ? @splits_text_on : '-')}
|
|
19
|
-
Removes chars after split: #{@removes_characters_after_splitting_regexp ? "/#{@removes_characters_after_splitting_regexp.source}/" : '-'}
|
|
20
|
-
Normalizes words: #{@normalizes_words_regexp_replaces ? @normalizes_words_regexp_replaces : '-'}
|
|
21
|
-
Rejects tokens? #{reject_condition_location ? "Yes, see line #{reject_condition_location} in app/application.rb" : '-'}
|
|
22
|
-
Substitutes chars? #{@substituter ? "Yes, using #{@substituter}." : '-' }
|
|
23
|
-
Case sensitive? #{@case_sensitive ? "Yes." : "-"}
|
|
24
|
-
TOKENIZER
|
|
25
|
-
end
|
|
26
|
-
|
|
27
|
-
# Stopwords.
|
|
28
|
-
#
|
|
29
|
-
# We only allow regexps (even if string would be okay
|
|
30
|
-
# too for gsub! - it's too hard to understand)
|
|
31
|
-
#
|
|
32
|
-
def stopwords regexp
|
|
33
|
-
check_argument_in __method__, Regexp, regexp
|
|
34
|
-
@remove_stopwords_regexp = regexp
|
|
35
|
-
end
|
|
36
|
-
def remove_stopwords text
|
|
37
|
-
text.gsub! @remove_stopwords_regexp, EMPTY_STRING if @remove_stopwords_regexp
|
|
38
|
-
text
|
|
39
|
-
end
|
|
40
|
-
@@non_single_stopword_regexp = /^\b[\w:]+?\b[\.\*\~]?\s?$/
|
|
41
|
-
def remove_non_single_stopwords text
|
|
42
|
-
return text if text.match @@non_single_stopword_regexp
|
|
43
|
-
remove_stopwords text
|
|
44
|
-
end
|
|
45
|
-
|
|
46
|
-
# Illegals.
|
|
47
|
-
#
|
|
48
|
-
# We only allow regexps (even if string would be okay
|
|
49
|
-
# too for gsub! - it's too hard to understand)
|
|
50
|
-
#
|
|
51
|
-
def removes_characters regexp
|
|
52
|
-
check_argument_in __method__, Regexp, regexp
|
|
53
|
-
@removes_characters_regexp = regexp
|
|
54
|
-
end
|
|
55
|
-
def remove_illegals text
|
|
56
|
-
text.gsub! @removes_characters_regexp, EMPTY_STRING if @removes_characters_regexp
|
|
57
|
-
text
|
|
58
|
-
end
|
|
59
|
-
|
|
60
|
-
# Splitting.
|
|
61
|
-
#
|
|
62
|
-
# We allow Strings and Regexps.
|
|
63
|
-
# Note: We do not test against to_str since symbols do not work with String#split.
|
|
64
|
-
#
|
|
65
|
-
def splits_text_on regexp_or_string
|
|
66
|
-
raise ArgumentError.new "#{__method__} takes a Regexp or String as argument, not a #{regexp_or_string.class}." unless Regexp === regexp_or_string || String === regexp_or_string
|
|
67
|
-
@splits_text_on = regexp_or_string
|
|
68
|
-
end
|
|
69
|
-
def split text
|
|
70
|
-
text.split @splits_text_on
|
|
71
|
-
end
|
|
72
|
-
|
|
73
|
-
# Normalizing.
|
|
74
|
-
#
|
|
75
|
-
# We only allow arrays.
|
|
76
|
-
#
|
|
77
|
-
def normalizes_words regexp_replaces
|
|
78
|
-
raise ArgumentError.new "#{__method__} takes an Array of replaces as argument, not a #{regexp_replaces.class}." unless regexp_replaces.respond_to?(:to_ary)
|
|
79
|
-
@normalizes_words_regexp_replaces = regexp_replaces
|
|
80
|
-
end
|
|
81
|
-
def normalize_with_patterns text
|
|
82
|
-
return text unless @normalizes_words_regexp_replaces
|
|
83
|
-
|
|
84
|
-
@normalizes_words_regexp_replaces.each do |regex, replace|
|
|
85
|
-
# This should be sufficient
|
|
86
|
-
#
|
|
87
|
-
text.gsub!(regex, replace) and break
|
|
88
|
-
end
|
|
89
|
-
remove_after_normalizing_illegals text
|
|
90
|
-
text
|
|
91
|
-
end
|
|
92
|
-
|
|
93
|
-
# Illegal after normalizing.
|
|
94
|
-
#
|
|
95
|
-
# We only allow regexps (even if string would be okay
|
|
96
|
-
# too for gsub! - it's too hard to understand)
|
|
97
|
-
#
|
|
98
|
-
def removes_characters_after_splitting regexp
|
|
99
|
-
check_argument_in __method__, Regexp, regexp
|
|
100
|
-
@removes_characters_after_splitting_regexp = regexp
|
|
101
|
-
end
|
|
102
|
-
def remove_after_normalizing_illegals text
|
|
103
|
-
text.gsub! @removes_characters_after_splitting_regexp, EMPTY_STRING if @removes_characters_after_splitting_regexp
|
|
104
|
-
end
|
|
105
|
-
|
|
106
|
-
# Substitute Characters with this substituter.
|
|
107
|
-
#
|
|
108
|
-
# Default is European Character substitution.
|
|
109
|
-
#
|
|
110
|
-
def substitutes_characters_with substituter = CharacterSubstituters::WestEuropean.new
|
|
111
|
-
raise ArgumentError.new "The substitutes_characters_with option needs a character substituter, which responds to #substitute." unless substituter.respond_to?(:substitute)
|
|
112
|
-
@substituter = substituter
|
|
113
|
-
end
|
|
114
|
-
def substitute_characters text
|
|
115
|
-
substituter?? substituter.substitute(text) : text
|
|
116
|
-
end
|
|
117
|
-
|
|
118
|
-
# Reject tokens after tokenizing based on the given criteria.
|
|
119
|
-
#
|
|
120
|
-
# Note: Currently only for indexing.
|
|
121
|
-
#
|
|
122
|
-
def reject_token_if &condition
|
|
123
|
-
@reject_condition = condition
|
|
124
|
-
end
|
|
125
|
-
def reject tokens
|
|
126
|
-
tokens.reject! &@reject_condition
|
|
127
|
-
end
|
|
128
|
-
|
|
129
|
-
def case_sensitive case_sensitive
|
|
130
|
-
@case_sensitive = case_sensitive
|
|
131
|
-
end
|
|
132
|
-
def downcase?
|
|
133
|
-
!@case_sensitive
|
|
134
|
-
end
|
|
135
|
-
|
|
136
|
-
# Checks if the right argument type has been given.
|
|
137
|
-
#
|
|
138
|
-
def check_argument_in method, type, argument, &condition
|
|
139
|
-
raise ArgumentError.new "Application##{method} takes a #{type} as argument, not a #{argument.class}." unless type === argument
|
|
140
|
-
end
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
# Returns a number of tokens, generated from the given text.
|
|
144
|
-
#
|
|
145
|
-
# Note:
|
|
146
|
-
# * preprocess, pretokenize are hooks
|
|
147
|
-
#
|
|
148
|
-
def tokenize text
|
|
149
|
-
text = preprocess text # processing the text
|
|
150
|
-
return empty_tokens if text.blank?
|
|
151
|
-
words = pretokenize text # splitting and preparations for tokenizing
|
|
152
|
-
return empty_tokens if words.empty?
|
|
153
|
-
tokens = tokens_for words # creating tokens / strings
|
|
154
|
-
process tokens # processing tokens / strings
|
|
155
|
-
end
|
|
156
|
-
|
|
157
|
-
attr_reader :substituter
|
|
158
|
-
alias substituter? substituter
|
|
159
|
-
|
|
160
|
-
def initialize options = {}
|
|
161
|
-
removes_characters options[:removes_characters] if options[:removes_characters]
|
|
162
|
-
contracts_expressions *options[:contracts_expressions] if options[:contracts_expressions]
|
|
163
|
-
stopwords options[:stopwords] if options[:stopwords]
|
|
164
|
-
normalizes_words options[:normalizes_words] if options[:normalizes_words]
|
|
165
|
-
removes_characters_after_splitting options[:removes_characters_after_splitting] if options[:removes_characters_after_splitting]
|
|
166
|
-
substitutes_characters_with options[:substitutes_characters_with] if options[:substitutes_characters_with]
|
|
167
|
-
case_sensitive options[:case_sensitive] unless options[:case_sensitive].nil?
|
|
168
|
-
|
|
169
|
-
# Defaults.
|
|
170
|
-
#
|
|
171
|
-
splits_text_on options[:splits_text_on] || /\s/
|
|
172
|
-
reject_token_if &(options[:reject_token_if] || options[:rejects_token_if] || :blank?) # TODO Decide on using an s or not.
|
|
173
|
-
end
|
|
174
|
-
|
|
175
|
-
# Default preprocessing hook.
|
|
176
|
-
#
|
|
177
|
-
# Does:
|
|
178
|
-
# 1. Character substitution.
|
|
179
|
-
# 2. Remove illegal expressions.
|
|
180
|
-
# 3. Remove non-single stopwords. (Stopwords that occur with other words)
|
|
181
|
-
#
|
|
182
|
-
def preprocess text
|
|
183
|
-
text = substitute_characters text
|
|
184
|
-
remove_illegals text
|
|
185
|
-
# We do not remove single stopwords e.g. in the indexer for
|
|
186
|
-
# an entirely different reason than in the query tokenizer.
|
|
187
|
-
# An indexed thing with just name "UND" (a possible stopword)
|
|
188
|
-
# should not lose its name.
|
|
189
|
-
#
|
|
190
|
-
remove_non_single_stopwords text
|
|
191
|
-
text
|
|
192
|
-
end
|
|
193
|
-
# Pretokenizing.
|
|
194
|
-
#
|
|
195
|
-
# Does:
|
|
196
|
-
# 1. Split the text into words.
|
|
197
|
-
# 2. Normalize each word.
|
|
198
|
-
#
|
|
199
|
-
def pretokenize text
|
|
200
|
-
words = split text
|
|
201
|
-
words.collect! do |word|
|
|
202
|
-
normalize_with_patterns word
|
|
203
|
-
word
|
|
204
|
-
end
|
|
205
|
-
end
|
|
206
|
-
# Basic postprocessing (overridden in both query/index tokenizers).
|
|
207
|
-
#
|
|
208
|
-
def process tokens
|
|
209
|
-
reject tokens # Reject any tokens that don't meet criteria
|
|
210
|
-
tokens
|
|
211
|
-
end
|
|
212
|
-
|
|
213
|
-
# # Converts words into real tokens.
|
|
214
|
-
# #
|
|
215
|
-
# def tokens_for words
|
|
216
|
-
# Internals::Query::Tokens.new words.collect! { |word| token_for word }
|
|
217
|
-
# end
|
|
218
|
-
# Turns non-blank text into symbols.
|
|
219
|
-
#
|
|
220
|
-
def symbolize text
|
|
221
|
-
text.blank? ? nil : text.to_sym
|
|
222
|
-
end
|
|
223
|
-
|
|
224
|
-
end
|
|
225
|
-
|
|
226
|
-
end
|
|
227
|
-
|
|
228
|
-
end
|
|
@@ -1,34 +0,0 @@
|
|
|
1
|
-
module Internals
|
|
2
|
-
|
|
3
|
-
module Tokenizers
|
|
4
|
-
|
|
5
|
-
# The base indexing tokenizer.
|
|
6
|
-
#
|
|
7
|
-
# Override in indexing subclasses and define in configuration.
|
|
8
|
-
#
|
|
9
|
-
class Index < Base
|
|
10
|
-
|
|
11
|
-
def self.default= new_default
|
|
12
|
-
@default = new_default
|
|
13
|
-
end
|
|
14
|
-
def self.default
|
|
15
|
-
@default ||= new
|
|
16
|
-
end
|
|
17
|
-
|
|
18
|
-
# Does not actually return a token, but a
|
|
19
|
-
# symbol "token".
|
|
20
|
-
#
|
|
21
|
-
def tokens_for words
|
|
22
|
-
words.collect! { |word| word.downcase! if downcase?; word.to_sym }
|
|
23
|
-
end
|
|
24
|
-
# Returns empty tokens.
|
|
25
|
-
#
|
|
26
|
-
def empty_tokens
|
|
27
|
-
[]
|
|
28
|
-
end
|
|
29
|
-
|
|
30
|
-
end
|
|
31
|
-
|
|
32
|
-
end
|
|
33
|
-
|
|
34
|
-
end
|
|
@@ -1,54 +0,0 @@
|
|
|
1
|
-
module Internals
|
|
2
|
-
|
|
3
|
-
module Tokenizers
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
class Location < Base
|
|
7
|
-
|
|
8
|
-
attr_reader :calculation
|
|
9
|
-
|
|
10
|
-
def initialize options = {}
|
|
11
|
-
super options
|
|
12
|
-
|
|
13
|
-
grid = options[:grid]
|
|
14
|
-
precision = options[:precision] || 1
|
|
15
|
-
|
|
16
|
-
@calculation = Internals::Calculations::Location.new grid, precision
|
|
17
|
-
|
|
18
|
-
@minimum = 1.0 / 0
|
|
19
|
-
|
|
20
|
-
@locations = []
|
|
21
|
-
end
|
|
22
|
-
|
|
23
|
-
# TODO Work on this!
|
|
24
|
-
#
|
|
25
|
-
def tokenize text
|
|
26
|
-
|
|
27
|
-
# Gather min/max.
|
|
28
|
-
#
|
|
29
|
-
source.harvest category do |indexed_id, location|
|
|
30
|
-
location = location.to_f
|
|
31
|
-
minimum = location if location < minimum
|
|
32
|
-
locations << [indexed_id, location]
|
|
33
|
-
end
|
|
34
|
-
|
|
35
|
-
calculation.minimum = minimum
|
|
36
|
-
|
|
37
|
-
# Recalculate locations.
|
|
38
|
-
#
|
|
39
|
-
locations.each do |indexed_id, location|
|
|
40
|
-
calculation.recalculated_range(location).each do |new_location|
|
|
41
|
-
yield indexed_id, new_location.to_s
|
|
42
|
-
end
|
|
43
|
-
end
|
|
44
|
-
|
|
45
|
-
# TODO Move to the right place.
|
|
46
|
-
#
|
|
47
|
-
category.exact[:location_minimum] = minimum
|
|
48
|
-
end
|
|
49
|
-
|
|
50
|
-
end
|
|
51
|
-
|
|
52
|
-
end
|
|
53
|
-
|
|
54
|
-
end
|
|
@@ -1,59 +0,0 @@
|
|
|
1
|
-
# encoding: utf-8
|
|
2
|
-
#
|
|
3
|
-
module Internals
|
|
4
|
-
|
|
5
|
-
module Tokenizers
|
|
6
|
-
|
|
7
|
-
# There are a few class methods that you can use to configure how a query works.
|
|
8
|
-
#
|
|
9
|
-
# removes_characters regexp
|
|
10
|
-
# illegal_after_normalizing regexp
|
|
11
|
-
# stopwords regexp
|
|
12
|
-
# contracts_expressions regexp, to_string
|
|
13
|
-
# splits_text_on regexp
|
|
14
|
-
# normalizes_words [[/regexp1/, 'replacement1'], [/regexp2/, 'replacement2']]
|
|
15
|
-
#
|
|
16
|
-
class Query < Base
|
|
17
|
-
|
|
18
|
-
def self.default= new_default
|
|
19
|
-
@default = new_default
|
|
20
|
-
end
|
|
21
|
-
def self.default
|
|
22
|
-
@default ||= new
|
|
23
|
-
end
|
|
24
|
-
|
|
25
|
-
attr_reader :maximum_tokens
|
|
26
|
-
|
|
27
|
-
def initialize options = {}
|
|
28
|
-
super options
|
|
29
|
-
@maximum_tokens = options[:maximum_tokens] || 5
|
|
30
|
-
end
|
|
31
|
-
|
|
32
|
-
# Let each token process itself.
|
|
33
|
-
# Reject, limit, and partialize tokens.
|
|
34
|
-
#
|
|
35
|
-
# In querying we work with real tokens (in indexing it's just symbols).
|
|
36
|
-
#
|
|
37
|
-
def process tokens
|
|
38
|
-
tokens.reject # Reject any tokens that don't meet criteria.
|
|
39
|
-
tokens.cap maximum_tokens # Cut off superfluous tokens.
|
|
40
|
-
tokens.partialize_last # Set certain tokens as partial.
|
|
41
|
-
tokens
|
|
42
|
-
end
|
|
43
|
-
|
|
44
|
-
# Converts words into real tokens.
|
|
45
|
-
#
|
|
46
|
-
def tokens_for words
|
|
47
|
-
Internals::Query::Tokens.processed words, downcase?
|
|
48
|
-
end
|
|
49
|
-
# Returns a tokens object.
|
|
50
|
-
#
|
|
51
|
-
def empty_tokens
|
|
52
|
-
Internals::Query::Tokens.new
|
|
53
|
-
end
|
|
54
|
-
|
|
55
|
-
end
|
|
56
|
-
|
|
57
|
-
end
|
|
58
|
-
|
|
59
|
-
end
|
data/lib/picky/internals.rb
DELETED
data/spec/lib/aliases_spec.rb
DELETED
|
@@ -1,69 +0,0 @@
|
|
|
1
|
-
# encoding: utf-8
|
|
2
|
-
#
|
|
3
|
-
require 'spec_helper'
|
|
4
|
-
|
|
5
|
-
describe IndexBundle do
|
|
6
|
-
|
|
7
|
-
let(:some_index) { stub :index, :name => :some_index, :internal_indexed => :indexed_index, :internal_indexing => :indexing_index }
|
|
8
|
-
let(:indexes) { described_class.new }
|
|
9
|
-
let(:indexed) { stub :indexed, :register => nil }
|
|
10
|
-
let(:indexing) { stub :indexing, :register => nil }
|
|
11
|
-
|
|
12
|
-
before(:each) do
|
|
13
|
-
indexes.stub! :indexing => indexing
|
|
14
|
-
indexes.stub! :indexed => indexed
|
|
15
|
-
end
|
|
16
|
-
|
|
17
|
-
def self.it_delegates method, receiver
|
|
18
|
-
it "delegates #{method} to #{receiver}" do
|
|
19
|
-
indexes.send(receiver).should_receive(method.to_sym).once
|
|
20
|
-
|
|
21
|
-
indexes.send method
|
|
22
|
-
end
|
|
23
|
-
end
|
|
24
|
-
|
|
25
|
-
describe 'delegation' do
|
|
26
|
-
it_delegates :reload, :indexed
|
|
27
|
-
it_delegates :load_from_cache, :indexed
|
|
28
|
-
|
|
29
|
-
it_delegates :check_caches, :indexing
|
|
30
|
-
it_delegates :find, :indexing
|
|
31
|
-
it_delegates :index, :indexing
|
|
32
|
-
it_delegates :index_for_tests, :indexing
|
|
33
|
-
end
|
|
34
|
-
|
|
35
|
-
describe '[]' do
|
|
36
|
-
before(:each) do
|
|
37
|
-
indexes.register some_index
|
|
38
|
-
end
|
|
39
|
-
it 'takes strings' do
|
|
40
|
-
indexes['some_index'].should == some_index
|
|
41
|
-
end
|
|
42
|
-
it 'takes symbols' do
|
|
43
|
-
indexes[:some_index].should == some_index
|
|
44
|
-
end
|
|
45
|
-
end
|
|
46
|
-
|
|
47
|
-
describe 'register' do
|
|
48
|
-
it 'registers with the indexes' do
|
|
49
|
-
indexes.register some_index
|
|
50
|
-
|
|
51
|
-
indexes.indexes.should == [some_index]
|
|
52
|
-
end
|
|
53
|
-
it 'registers with the index map' do
|
|
54
|
-
indexes.register some_index
|
|
55
|
-
|
|
56
|
-
indexes[some_index.name].should == some_index
|
|
57
|
-
end
|
|
58
|
-
it 'registers with the indexing' do
|
|
59
|
-
indexing.should_receive(:register).once.with :indexing_index
|
|
60
|
-
|
|
61
|
-
indexes.register some_index
|
|
62
|
-
end
|
|
63
|
-
it 'registers with the indexed' do
|
|
64
|
-
indexed.should_receive(:register).once.with :indexed_index
|
|
65
|
-
|
|
66
|
-
indexes.register some_index
|
|
67
|
-
end
|
|
68
|
-
end
|
|
69
|
-
end
|