picky 1.4.1 → 1.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/picky/{alias_instances.rb → aliases.rb} +1 -3
- data/lib/picky/application.rb +18 -19
- data/lib/picky/cores.rb +1 -1
- data/lib/picky/generators/aliases.rb +3 -0
- data/lib/picky/index/base.rb +179 -0
- data/lib/picky/index/memory.rb +28 -0
- data/lib/picky/index/redis.rb +28 -0
- data/lib/picky/{indexes_api.rb → index_bundle.rb} +16 -16
- data/lib/picky/indexed/indexes.rb +11 -7
- data/lib/picky/indexing/indexes.rb +14 -8
- data/lib/picky/internals/adapters/rack/base.rb +27 -0
- data/lib/picky/internals/adapters/rack/live_parameters.rb +37 -0
- data/lib/picky/internals/adapters/rack/query.rb +63 -0
- data/lib/picky/internals/adapters/rack.rb +34 -0
- data/lib/picky/{calculations → internals/calculations}/location.rb +0 -0
- data/lib/picky/{cli.rb → internals/cli.rb} +0 -0
- data/lib/picky/{configuration → internals/configuration}/index.rb +8 -2
- data/lib/picky/{ext → internals/ext}/maybe_compile.rb +0 -0
- data/lib/picky/{ext → internals/ext}/ruby19/extconf.rb +0 -0
- data/lib/picky/{ext → internals/ext}/ruby19/performant.c +0 -0
- data/lib/picky/{extensions → internals/extensions}/array.rb +0 -0
- data/lib/picky/{extensions → internals/extensions}/hash.rb +0 -0
- data/lib/picky/{extensions → internals/extensions}/module.rb +0 -0
- data/lib/picky/{extensions → internals/extensions}/object.rb +0 -0
- data/lib/picky/{extensions → internals/extensions}/symbol.rb +0 -0
- data/lib/picky/internals/frontend_adapters/rack.rb +154 -0
- data/lib/picky/internals/generators/base.rb +19 -0
- data/lib/picky/internals/generators/partial/default.rb +7 -0
- data/lib/picky/internals/generators/partial/none.rb +35 -0
- data/lib/picky/internals/generators/partial/strategy.rb +29 -0
- data/lib/picky/internals/generators/partial/substring.rb +122 -0
- data/lib/picky/internals/generators/partial_generator.rb +19 -0
- data/lib/picky/internals/generators/similarity/default.rb +9 -0
- data/lib/picky/internals/generators/similarity/double_levenshtone.rb +81 -0
- data/lib/picky/internals/generators/similarity/none.rb +35 -0
- data/lib/picky/internals/generators/similarity/strategy.rb +11 -0
- data/lib/picky/internals/generators/similarity_generator.rb +19 -0
- data/lib/picky/internals/generators/strategy.rb +18 -0
- data/lib/picky/internals/generators/weights/default.rb +9 -0
- data/lib/picky/internals/generators/weights/logarithmic.rb +43 -0
- data/lib/picky/internals/generators/weights/strategy.rb +11 -0
- data/lib/picky/internals/generators/weights_generator.rb +19 -0
- data/lib/picky/{helpers → internals/helpers}/measuring.rb +0 -0
- data/lib/picky/internals/index/backend.rb +113 -0
- data/lib/picky/internals/index/file/basic.rb +101 -0
- data/lib/picky/internals/index/file/json.rb +38 -0
- data/lib/picky/internals/index/file/marshal.rb +38 -0
- data/lib/picky/internals/index/file/text.rb +60 -0
- data/lib/picky/internals/index/files.rb +24 -0
- data/lib/picky/internals/index/redis/basic.rb +77 -0
- data/lib/picky/internals/index/redis/list_hash.rb +46 -0
- data/lib/picky/internals/index/redis/string_hash.rb +35 -0
- data/lib/picky/internals/index/redis.rb +44 -0
- data/lib/picky/internals/indexed/bundle/base.rb +72 -0
- data/lib/picky/internals/indexed/bundle/memory.rb +69 -0
- data/lib/picky/internals/indexed/bundle/redis.rb +70 -0
- data/lib/picky/internals/indexed/categories.rb +135 -0
- data/lib/picky/internals/indexed/category.rb +90 -0
- data/lib/picky/internals/indexed/index.rb +57 -0
- data/lib/picky/{indexed → internals/indexed}/wrappers/bundle/calculation.rb +0 -0
- data/lib/picky/{indexed → internals/indexed}/wrappers/bundle/location.rb +4 -2
- data/lib/picky/{indexed → internals/indexed}/wrappers/bundle/wrapper.rb +1 -1
- data/lib/picky/internals/indexed/wrappers/exact_first.rb +65 -0
- data/lib/picky/{indexers → internals/indexers}/no_source_specified_error.rb +0 -0
- data/lib/picky/{indexers → internals/indexers}/serial.rb +2 -2
- data/lib/picky/{indexers → internals/indexers}/solr.rb +0 -0
- data/lib/picky/internals/indexing/bundle/base.rb +219 -0
- data/lib/picky/internals/indexing/bundle/memory.rb +25 -0
- data/lib/picky/internals/indexing/bundle/redis.rb +28 -0
- data/lib/picky/internals/indexing/bundle/super_base.rb +65 -0
- data/lib/picky/internals/indexing/categories.rb +42 -0
- data/lib/picky/internals/indexing/category.rb +120 -0
- data/lib/picky/internals/indexing/index.rb +67 -0
- data/lib/picky/{performant.rb → internals/performant.rb} +0 -0
- data/lib/picky/internals/query/allocation.rb +88 -0
- data/lib/picky/internals/query/allocations.rb +137 -0
- data/lib/picky/internals/query/combination.rb +80 -0
- data/lib/picky/internals/query/combinations/base.rb +84 -0
- data/lib/picky/internals/query/combinations/memory.rb +58 -0
- data/lib/picky/internals/query/combinations/redis.rb +59 -0
- data/lib/picky/internals/query/indexes.rb +180 -0
- data/lib/picky/internals/query/qualifiers.rb +81 -0
- data/lib/picky/internals/query/token.rb +215 -0
- data/lib/picky/internals/query/tokens.rb +89 -0
- data/lib/picky/{query → internals/query}/weights.rb +0 -0
- data/lib/picky/internals/results/base.rb +106 -0
- data/lib/picky/internals/results/full.rb +17 -0
- data/lib/picky/internals/results/live.rb +17 -0
- data/lib/picky/{solr → internals/solr}/schema_generator.rb +0 -0
- data/lib/picky/internals/tokenizers/base.rb +166 -0
- data/lib/picky/internals/tokenizers/index.rb +63 -0
- data/lib/picky/internals/tokenizers/query.rb +79 -0
- data/lib/picky/loader.rb +148 -112
- data/lib/picky/query/base.rb +57 -26
- data/lib/picky/query/full.rb +1 -1
- data/lib/picky/query/live.rb +1 -1
- data/lib/picky/sources/db.rb +27 -6
- data/lib/tasks/index.rake +3 -3
- data/lib/tasks/try.rake +2 -2
- data/spec/lib/aliases_spec.rb +9 -0
- data/spec/lib/application_spec.rb +3 -3
- data/spec/lib/generators/aliases_spec.rb +1 -0
- data/spec/lib/{index_api_spec.rb → index/base_spec.rb} +7 -7
- data/spec/lib/index_bundle_spec.rb +71 -0
- data/spec/lib/indexed/indexes_spec.rb +61 -0
- data/spec/lib/indexing/indexes_spec.rb +94 -24
- data/spec/lib/{adapters → internals/adapters}/rack/base_spec.rb +2 -2
- data/spec/lib/{adapters → internals/adapters}/rack/live_parameters_spec.rb +2 -2
- data/spec/lib/{adapters → internals/adapters}/rack/query_spec.rb +2 -2
- data/spec/lib/{calculations → internals/calculations}/location_spec.rb +0 -0
- data/spec/lib/{cli_spec.rb → internals/cli_spec.rb} +4 -1
- data/spec/lib/{configuration → internals/configuration}/index_spec.rb +1 -1
- data/spec/lib/{cores_spec.rb → internals/cores_spec.rb} +0 -0
- data/spec/lib/{extensions → internals/extensions}/array_spec.rb +0 -0
- data/spec/lib/{extensions → internals/extensions}/hash_spec.rb +0 -0
- data/spec/lib/{extensions → internals/extensions}/module_spec.rb +0 -0
- data/spec/lib/{extensions → internals/extensions}/object_spec.rb +0 -0
- data/spec/lib/{extensions → internals/extensions}/symbol_spec.rb +0 -0
- data/spec/lib/{frontend_adapters → internals/frontend_adapters}/rack_spec.rb +11 -11
- data/spec/lib/{cacher → internals/generators}/cacher_strategy_spec.rb +2 -2
- data/spec/lib/internals/generators/partial/default_spec.rb +17 -0
- data/spec/lib/internals/generators/partial/none_spec.rb +17 -0
- data/spec/lib/{cacher → internals/generators}/partial/substring_spec.rb +26 -27
- data/spec/lib/{cacher → internals/generators}/partial_generator_spec.rb +5 -5
- data/spec/lib/{cacher → internals/generators}/similarity/double_levenshtone_spec.rb +4 -4
- data/spec/lib/{cacher → internals/generators}/similarity/none_spec.rb +2 -2
- data/spec/lib/{cacher → internals/generators}/similarity_generator_spec.rb +4 -4
- data/spec/lib/{cacher → internals/generators}/weights/logarithmic_spec.rb +2 -2
- data/spec/lib/internals/generators/weights_generator_spec.rb +21 -0
- data/spec/lib/{helpers → internals/helpers}/measuring_spec.rb +0 -0
- data/spec/lib/{index → internals/index}/file/basic_spec.rb +2 -2
- data/spec/lib/{index → internals/index}/file/json_spec.rb +2 -2
- data/spec/lib/{index → internals/index}/file/marshal_spec.rb +2 -2
- data/spec/lib/{index → internals/index}/file/text_spec.rb +2 -2
- data/spec/lib/{index → internals/index}/files_spec.rb +2 -2
- data/spec/lib/{indexed/bundle_spec.rb → internals/indexed/bundle/memory_spec.rb} +4 -5
- data/spec/lib/{indexed → internals/indexed}/categories_spec.rb +13 -13
- data/spec/lib/{indexed → internals/indexed}/category_spec.rb +59 -32
- data/spec/lib/{indexed → internals/indexed}/index_spec.rb +5 -5
- data/spec/lib/{indexed → internals/indexed}/wrappers/bundle/calculation_spec.rb +0 -0
- data/spec/lib/{indexed → internals/indexed}/wrappers/bundle/wrapper_spec.rb +0 -0
- data/spec/lib/{indexed → internals/indexed}/wrappers/exact_first_spec.rb +5 -5
- data/spec/lib/{indexers → internals/indexers}/serial_spec.rb +0 -0
- data/spec/lib/{indexing/bundle_partial_generation_speed_spec.rb → internals/indexing/bundle/memory_partial_generation_speed_spec.rb} +3 -3
- data/spec/lib/{indexing/bundle_spec.rb → internals/indexing/bundle/memory_spec.rb} +3 -3
- data/spec/lib/{index/bundle_spec.rb → internals/indexing/bundle/super_base_spec.rb} +9 -3
- data/spec/lib/{indexing → internals/indexing}/category_spec.rb +3 -3
- data/spec/lib/{indexing → internals/indexing}/index_spec.rb +3 -3
- data/spec/lib/internals/indexing/indexes_spec.rb +36 -0
- data/spec/lib/{interfaces → internals/interfaces}/live_parameters_spec.rb +0 -0
- data/spec/lib/internals/results/base_spec.rb +105 -0
- data/spec/lib/internals/results/full_spec.rb +78 -0
- data/spec/lib/internals/results/live_spec.rb +88 -0
- data/spec/lib/{solr → internals/solr}/schema_generator_spec.rb +0 -0
- data/spec/lib/{tokenizers → internals/tokenizers}/base_spec.rb +3 -3
- data/spec/lib/{tokenizers → internals/tokenizers}/index_spec.rb +9 -9
- data/spec/lib/{tokenizers → internals/tokenizers}/query_spec.rb +11 -11
- data/spec/lib/query/allocation_spec.rb +12 -12
- data/spec/lib/query/allocations_spec.rb +19 -19
- data/spec/lib/query/base_spec.rb +28 -4
- data/spec/lib/query/combination_spec.rb +8 -9
- data/spec/lib/query/combinations/base_spec.rb +116 -0
- data/spec/lib/query/{combinations_spec.rb → combinations/memory_spec.rb} +14 -14
- data/spec/lib/query/combinations/redis_spec.rb +132 -0
- data/spec/lib/query/full_spec.rb +2 -2
- data/spec/lib/query/indexes_spec.rb +81 -0
- data/spec/lib/query/live_spec.rb +3 -3
- data/spec/lib/query/qualifiers_spec.rb +6 -6
- data/spec/lib/query/token_spec.rb +38 -38
- data/spec/lib/query/tokens_spec.rb +35 -35
- data/spec/lib/sources/db_spec.rb +23 -18
- metadata +212 -181
- data/lib/picky/adapters/rack/base.rb +0 -23
- data/lib/picky/adapters/rack/live_parameters.rb +0 -33
- data/lib/picky/adapters/rack/query.rb +0 -59
- data/lib/picky/adapters/rack.rb +0 -28
- data/lib/picky/cacher/convenience.rb +0 -3
- data/lib/picky/cacher/generator.rb +0 -15
- data/lib/picky/cacher/partial/default.rb +0 -5
- data/lib/picky/cacher/partial/none.rb +0 -31
- data/lib/picky/cacher/partial/strategy.rb +0 -21
- data/lib/picky/cacher/partial/substring.rb +0 -118
- data/lib/picky/cacher/partial_generator.rb +0 -15
- data/lib/picky/cacher/similarity/default.rb +0 -7
- data/lib/picky/cacher/similarity/double_levenshtone.rb +0 -77
- data/lib/picky/cacher/similarity/none.rb +0 -31
- data/lib/picky/cacher/similarity/strategy.rb +0 -9
- data/lib/picky/cacher/similarity_generator.rb +0 -15
- data/lib/picky/cacher/strategy.rb +0 -12
- data/lib/picky/cacher/weights/default.rb +0 -7
- data/lib/picky/cacher/weights/logarithmic.rb +0 -39
- data/lib/picky/cacher/weights/strategy.rb +0 -9
- data/lib/picky/cacher/weights_generator.rb +0 -15
- data/lib/picky/frontend_adapters/rack.rb +0 -150
- data/lib/picky/index/bundle.rb +0 -54
- data/lib/picky/index/file/basic.rb +0 -97
- data/lib/picky/index/file/json.rb +0 -34
- data/lib/picky/index/file/marshal.rb +0 -34
- data/lib/picky/index/file/text.rb +0 -56
- data/lib/picky/index/files.rb +0 -118
- data/lib/picky/index_api.rb +0 -175
- data/lib/picky/indexed/bundle.rb +0 -54
- data/lib/picky/indexed/categories.rb +0 -131
- data/lib/picky/indexed/category.rb +0 -85
- data/lib/picky/indexed/index.rb +0 -39
- data/lib/picky/indexed/wrappers/exact_first.rb +0 -61
- data/lib/picky/indexing/bundle.rb +0 -213
- data/lib/picky/indexing/categories.rb +0 -38
- data/lib/picky/indexing/category.rb +0 -117
- data/lib/picky/indexing/index.rb +0 -55
- data/lib/picky/query/allocation.rb +0 -82
- data/lib/picky/query/allocations.rb +0 -130
- data/lib/picky/query/combination.rb +0 -74
- data/lib/picky/query/combinations.rb +0 -105
- data/lib/picky/query/qualifiers.rb +0 -77
- data/lib/picky/query/token.rb +0 -202
- data/lib/picky/query/tokens.rb +0 -86
- data/lib/picky/query/weigher.rb +0 -165
- data/lib/picky/results/base.rb +0 -102
- data/lib/picky/results/full.rb +0 -13
- data/lib/picky/results/live.rb +0 -13
- data/lib/picky/tokenizers/base.rb +0 -161
- data/lib/picky/tokenizers/index.rb +0 -58
- data/lib/picky/tokenizers/query.rb +0 -74
- data/spec/lib/cacher/partial/default_spec.rb +0 -15
- data/spec/lib/cacher/partial/none_spec.rb +0 -17
- data/spec/lib/cacher/weights_generator_spec.rb +0 -21
- data/spec/lib/results/base_spec.rb +0 -257
- data/spec/lib/results/live_spec.rb +0 -15
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
# encoding: utf-8
|
|
2
|
+
#
|
|
3
|
+
module Internals
|
|
4
|
+
|
|
5
|
+
module Tokenizers
|
|
6
|
+
|
|
7
|
+
# There are a few class methods that you can use to configure how a query works.
|
|
8
|
+
#
|
|
9
|
+
# removes_characters regexp
|
|
10
|
+
# illegal_after_normalizing regexp
|
|
11
|
+
# stopwords regexp
|
|
12
|
+
# contracts_expressions regexp, to_string
|
|
13
|
+
# splits_text_on regexp
|
|
14
|
+
# normalizes_words [[/regexp1/, 'replacement1'], [/regexp2/, 'replacement2']]
|
|
15
|
+
#
|
|
16
|
+
class Query < Base
|
|
17
|
+
|
|
18
|
+
def self.default= new_default
|
|
19
|
+
@default = new_default
|
|
20
|
+
end
|
|
21
|
+
def self.default
|
|
22
|
+
@default ||= new
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
attr_reader :maximum_tokens
|
|
26
|
+
|
|
27
|
+
def initialize options = {}
|
|
28
|
+
super options
|
|
29
|
+
@maximum_tokens = options[:maximum_tokens] || 5
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def preprocess text
|
|
33
|
+
remove_illegals text # Remove illegal characters
|
|
34
|
+
remove_non_single_stopwords text # remove stop words
|
|
35
|
+
text
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# Split the text and put some back together.
|
|
39
|
+
#
|
|
40
|
+
# TODO Make the same as in indexing?
|
|
41
|
+
#
|
|
42
|
+
def pretokenize text
|
|
43
|
+
split text
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
# Let each token process itself.
|
|
47
|
+
# Reject, limit, and partialize tokens.
|
|
48
|
+
#
|
|
49
|
+
def process tokens
|
|
50
|
+
tokens.tokenize_with self
|
|
51
|
+
tokens.reject # Reject any tokens that don't meet criteria
|
|
52
|
+
tokens.cap maximum_tokens # Cut off superfluous tokens
|
|
53
|
+
tokens.partialize_last # Set certain tokens as partial
|
|
54
|
+
tokens
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
# Called by the token.
|
|
58
|
+
#
|
|
59
|
+
# TODO Perhaps move to Normalizer?
|
|
60
|
+
#
|
|
61
|
+
def normalize text
|
|
62
|
+
text = substitute_characters text # Substitute special characters
|
|
63
|
+
text.downcase! # Downcase all text
|
|
64
|
+
normalize_with_patterns text # normalize
|
|
65
|
+
text.to_sym # symbolize
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# Returns a token for a word.
|
|
69
|
+
# The basic query tokenizer uses new tokens.
|
|
70
|
+
#
|
|
71
|
+
def token_for word
|
|
72
|
+
Internals::Query::Token.processed word
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
end
|
data/lib/picky/loader.rb
CHANGED
|
@@ -28,6 +28,9 @@ module Loader # :nodoc:all
|
|
|
28
28
|
def self.load_relative filename_without_rb
|
|
29
29
|
load File.join(File.dirname(__FILE__), "#{filename_without_rb}.rb")
|
|
30
30
|
end
|
|
31
|
+
def self.load_internals filename_without_rb
|
|
32
|
+
load File.join(File.dirname(__FILE__), "internals/#{filename_without_rb}.rb")
|
|
33
|
+
end
|
|
31
34
|
|
|
32
35
|
def self.load_user filename
|
|
33
36
|
load File.join(PICKY_ROOT, "#{filename}.rb")
|
|
@@ -74,151 +77,196 @@ module Loader # :nodoc:all
|
|
|
74
77
|
|
|
75
78
|
# TODO Rewrite
|
|
76
79
|
#
|
|
77
|
-
Query::Qualifiers.instance.prepare
|
|
80
|
+
Internals::Query::Qualifiers.instance.prepare
|
|
78
81
|
|
|
79
82
|
exclaim "Application #{Application.apps.map(&:name).join(', ')} loaded."
|
|
80
83
|
end
|
|
81
84
|
|
|
82
|
-
# Loads the framework.
|
|
85
|
+
# Loads the internal parts of the framework.
|
|
86
|
+
# (Not for the user)
|
|
83
87
|
#
|
|
84
|
-
def self.
|
|
88
|
+
def self.load_framework_internals
|
|
85
89
|
# Load compiled C code.
|
|
86
90
|
#
|
|
87
|
-
|
|
91
|
+
load_internals 'ext/maybe_compile'
|
|
88
92
|
|
|
89
93
|
# Load extensions.
|
|
90
94
|
#
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
# Load harakiri.
|
|
98
|
-
#
|
|
99
|
-
load_relative 'rack/harakiri'
|
|
95
|
+
load_internals 'extensions/object'
|
|
96
|
+
load_internals 'extensions/array'
|
|
97
|
+
load_internals 'extensions/symbol'
|
|
98
|
+
load_internals 'extensions/module'
|
|
99
|
+
load_internals 'extensions/hash'
|
|
100
100
|
|
|
101
101
|
# Requiring Helpers
|
|
102
102
|
#
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
# Character Substituters
|
|
106
|
-
#
|
|
107
|
-
load_relative 'character_substituters/west_european'
|
|
103
|
+
load_internals 'helpers/measuring'
|
|
108
104
|
|
|
109
105
|
# Calculations.
|
|
110
106
|
#
|
|
111
|
-
|
|
107
|
+
load_internals 'calculations/location'
|
|
112
108
|
|
|
113
|
-
# Signal handling
|
|
114
|
-
#
|
|
115
|
-
load_relative 'signals'
|
|
116
|
-
|
|
117
|
-
# Various.
|
|
118
|
-
#
|
|
119
|
-
load_relative 'loggers/search'
|
|
120
|
-
|
|
121
109
|
# Index generation strategies.
|
|
122
110
|
#
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
#
|
|
126
|
-
# load_relative 'indexers/solr'
|
|
111
|
+
load_internals 'indexers/no_source_specified_error'
|
|
112
|
+
load_internals 'indexers/serial'
|
|
127
113
|
|
|
128
|
-
#
|
|
114
|
+
# Generators.
|
|
129
115
|
#
|
|
130
|
-
|
|
116
|
+
load_internals 'generators/strategy'
|
|
131
117
|
|
|
132
118
|
# Partial index generation strategies.
|
|
133
119
|
#
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
120
|
+
load_internals 'generators/partial/strategy'
|
|
121
|
+
load_internals 'generators/partial/none'
|
|
122
|
+
load_internals 'generators/partial/substring'
|
|
123
|
+
load_internals 'generators/partial/default'
|
|
138
124
|
|
|
139
125
|
# Weight index generation strategies.
|
|
140
126
|
#
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
127
|
+
load_internals 'generators/weights/strategy'
|
|
128
|
+
load_internals 'generators/weights/logarithmic'
|
|
129
|
+
load_internals 'generators/weights/default'
|
|
144
130
|
|
|
145
131
|
# Similarity index generation strategies.
|
|
146
132
|
#
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
# Convenience accessors for generators.
|
|
153
|
-
#
|
|
154
|
-
load_relative 'cacher/convenience'
|
|
133
|
+
load_internals 'generators/similarity/strategy'
|
|
134
|
+
load_internals 'generators/similarity/none'
|
|
135
|
+
load_internals 'generators/similarity/double_levenshtone'
|
|
136
|
+
load_internals 'generators/similarity/default'
|
|
155
137
|
|
|
156
138
|
# Index generators.
|
|
157
139
|
#
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
140
|
+
load_internals 'generators/base'
|
|
141
|
+
load_internals 'generators/partial_generator'
|
|
142
|
+
load_internals 'generators/weights_generator'
|
|
143
|
+
load_internals 'generators/similarity_generator'
|
|
162
144
|
|
|
163
|
-
# Index
|
|
164
|
-
#
|
|
165
|
-
load_relative 'index/file/basic'
|
|
166
|
-
load_relative 'index/file/text'
|
|
167
|
-
load_relative 'index/file/marshal'
|
|
168
|
-
load_relative 'index/file/json'
|
|
169
|
-
load_relative 'index/files'
|
|
170
|
-
|
|
171
|
-
# Indexing and Indexed things.
|
|
145
|
+
# Index store handling.
|
|
172
146
|
#
|
|
173
|
-
|
|
147
|
+
load_internals 'index/backend'
|
|
174
148
|
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
load_relative 'indexing/indexes'
|
|
149
|
+
load_internals 'index/redis'
|
|
150
|
+
load_internals 'index/redis/basic'
|
|
151
|
+
load_internals 'index/redis/list_hash'
|
|
152
|
+
load_internals 'index/redis/string_hash'
|
|
180
153
|
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
load_relative 'indexed/indexes'
|
|
154
|
+
load_internals 'index/file/basic'
|
|
155
|
+
load_internals 'index/file/text'
|
|
156
|
+
load_internals 'index/file/marshal'
|
|
157
|
+
load_internals 'index/file/json'
|
|
186
158
|
|
|
187
|
-
|
|
188
|
-
load_relative 'alias_instances'
|
|
189
|
-
load_relative 'index_api'
|
|
159
|
+
load_internals 'index/files'
|
|
190
160
|
|
|
191
|
-
|
|
161
|
+
# Indexing and Indexed things.
|
|
162
|
+
#
|
|
163
|
+
load_internals 'indexing/bundle/super_base' # TODO Remove.
|
|
164
|
+
load_internals 'indexing/bundle/base'
|
|
165
|
+
load_internals 'indexing/bundle/memory'
|
|
166
|
+
load_internals 'indexing/bundle/redis'
|
|
167
|
+
load_internals 'indexing/category'
|
|
168
|
+
load_internals 'indexing/categories'
|
|
169
|
+
load_internals 'indexing/index'
|
|
170
|
+
|
|
171
|
+
load_internals 'indexed/bundle/base'
|
|
172
|
+
load_internals 'indexed/bundle/memory'
|
|
173
|
+
load_internals 'indexed/bundle/redis'
|
|
174
|
+
load_internals 'indexed/category'
|
|
175
|
+
load_internals 'indexed/categories'
|
|
176
|
+
load_internals 'indexed/index'
|
|
177
|
+
|
|
178
|
+
# TODO Ok here?
|
|
179
|
+
#
|
|
180
|
+
load_internals 'indexed/wrappers/exact_first'
|
|
192
181
|
|
|
193
182
|
# Bundle Wrapper
|
|
194
183
|
#
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
184
|
+
load_internals 'indexed/wrappers/bundle/wrapper'
|
|
185
|
+
load_internals 'indexed/wrappers/bundle/calculation'
|
|
186
|
+
load_internals 'indexed/wrappers/bundle/location'
|
|
198
187
|
|
|
199
188
|
# Tokens.
|
|
200
189
|
#
|
|
201
|
-
|
|
202
|
-
|
|
190
|
+
load_internals 'query/token'
|
|
191
|
+
load_internals 'query/tokens'
|
|
203
192
|
|
|
204
193
|
# Tokenizers types.
|
|
205
194
|
#
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
195
|
+
load_internals 'tokenizers/base'
|
|
196
|
+
load_internals 'tokenizers/index'
|
|
197
|
+
load_internals 'tokenizers/query'
|
|
209
198
|
|
|
210
199
|
# Query combinations, qualifiers, weigher.
|
|
211
200
|
#
|
|
212
|
-
|
|
213
|
-
|
|
201
|
+
load_internals 'query/combination'
|
|
202
|
+
load_internals 'query/combinations/base'
|
|
203
|
+
load_internals 'query/combinations/memory'
|
|
204
|
+
load_internals 'query/combinations/redis'
|
|
205
|
+
|
|
206
|
+
load_internals 'query/allocation'
|
|
207
|
+
load_internals 'query/allocations'
|
|
208
|
+
|
|
209
|
+
load_internals 'query/qualifiers'
|
|
210
|
+
|
|
211
|
+
load_internals 'query/weights'
|
|
212
|
+
|
|
213
|
+
load_internals 'query/indexes'
|
|
214
|
+
|
|
215
|
+
# Results.
|
|
216
|
+
#
|
|
217
|
+
load_internals 'results/base'
|
|
218
|
+
load_internals 'results/full'
|
|
219
|
+
load_internals 'results/live'
|
|
220
|
+
|
|
221
|
+
# Configuration.
|
|
222
|
+
#
|
|
223
|
+
load_internals 'configuration/index'
|
|
224
|
+
|
|
225
|
+
# Adapters.
|
|
226
|
+
#
|
|
227
|
+
load_internals 'adapters/rack/base'
|
|
228
|
+
load_internals 'adapters/rack/query'
|
|
229
|
+
load_internals 'adapters/rack/live_parameters'
|
|
230
|
+
load_internals 'adapters/rack'
|
|
231
|
+
|
|
232
|
+
# Routing.
|
|
233
|
+
#
|
|
234
|
+
load_internals 'frontend_adapters/rack'
|
|
235
|
+
end
|
|
236
|
+
# Loads the user interface parts.
|
|
237
|
+
#
|
|
238
|
+
def self.load_user_interface
|
|
239
|
+
# Load harakiri.
|
|
240
|
+
#
|
|
241
|
+
load_relative 'rack/harakiri'
|
|
242
|
+
|
|
243
|
+
# Character Substituters
|
|
244
|
+
#
|
|
245
|
+
load_relative 'character_substituters/west_european'
|
|
246
|
+
|
|
247
|
+
# Signal handling
|
|
248
|
+
#
|
|
249
|
+
load_relative 'signals'
|
|
250
|
+
|
|
251
|
+
# Logging.
|
|
252
|
+
#
|
|
253
|
+
load_relative 'loggers/search'
|
|
254
|
+
|
|
255
|
+
# Convenience accessors for generators.
|
|
256
|
+
#
|
|
257
|
+
load_relative 'generators/aliases'
|
|
214
258
|
|
|
215
|
-
|
|
216
|
-
|
|
259
|
+
# API.
|
|
260
|
+
#
|
|
261
|
+
load_relative 'index/base'
|
|
262
|
+
load_relative 'index/memory'
|
|
263
|
+
load_relative 'index/redis'
|
|
217
264
|
|
|
218
|
-
load_relative '
|
|
219
|
-
load_relative '
|
|
265
|
+
load_relative 'indexing/indexes'
|
|
266
|
+
load_relative 'indexed/indexes'
|
|
220
267
|
|
|
221
|
-
load_relative '
|
|
268
|
+
load_relative 'index_bundle'
|
|
269
|
+
load_relative 'aliases'
|
|
222
270
|
|
|
223
271
|
# Query.
|
|
224
272
|
#
|
|
@@ -228,12 +276,6 @@ module Loader # :nodoc:all
|
|
|
228
276
|
#
|
|
229
277
|
# load_relative 'query/solr'
|
|
230
278
|
|
|
231
|
-
# Results.
|
|
232
|
-
#
|
|
233
|
-
load_relative 'results/base'
|
|
234
|
-
load_relative 'results/full'
|
|
235
|
-
load_relative 'results/live'
|
|
236
|
-
|
|
237
279
|
# Sources.
|
|
238
280
|
#
|
|
239
281
|
load_relative 'sources/base'
|
|
@@ -245,30 +287,24 @@ module Loader # :nodoc:all
|
|
|
245
287
|
load_relative 'sources/wrappers/base'
|
|
246
288
|
load_relative 'sources/wrappers/location'
|
|
247
289
|
|
|
248
|
-
# Configuration.
|
|
249
|
-
#
|
|
250
|
-
load_relative 'configuration/index'
|
|
251
|
-
|
|
252
290
|
# Interfaces
|
|
253
291
|
#
|
|
254
292
|
load_relative 'interfaces/live_parameters'
|
|
255
293
|
|
|
256
|
-
#
|
|
257
|
-
#
|
|
258
|
-
load_relative 'adapters/rack/base'
|
|
259
|
-
load_relative 'adapters/rack/query'
|
|
260
|
-
load_relative 'adapters/rack/live_parameters'
|
|
261
|
-
load_relative 'adapters/rack'
|
|
262
|
-
|
|
263
|
-
# Application and routing.
|
|
294
|
+
# Application.
|
|
264
295
|
#
|
|
265
|
-
load_relative 'frontend_adapters/rack'
|
|
266
296
|
load_relative 'application'
|
|
267
297
|
|
|
268
|
-
# Load tools.
|
|
298
|
+
# Load tools. Load in specific case?
|
|
269
299
|
#
|
|
270
|
-
# load_relative 'solr/schema_generator'
|
|
271
300
|
load_relative 'cores'
|
|
272
301
|
end
|
|
302
|
+
|
|
303
|
+
# Loads the framework.
|
|
304
|
+
#
|
|
305
|
+
def self.load_framework
|
|
306
|
+
load_framework_internals
|
|
307
|
+
load_user_interface
|
|
308
|
+
end
|
|
273
309
|
|
|
274
310
|
end
|
data/lib/picky/query/base.rb
CHANGED
|
@@ -14,35 +14,66 @@
|
|
|
14
14
|
# * Query::Live (Same as the Full results without result ids. Useful for query result counters.)
|
|
15
15
|
#
|
|
16
16
|
module Query
|
|
17
|
-
|
|
17
|
+
|
|
18
18
|
# The base query class.
|
|
19
19
|
#
|
|
20
20
|
# Not directly instantiated. However, its methods are used by its subclasses, Full and Live.
|
|
21
21
|
#
|
|
22
22
|
class Base
|
|
23
|
-
|
|
23
|
+
|
|
24
24
|
include Helpers::Measuring
|
|
25
|
-
|
|
25
|
+
|
|
26
26
|
attr_writer :tokenizer, :identifiers_to_remove
|
|
27
27
|
attr_accessor :reduce_to_amount, :weights
|
|
28
|
-
|
|
28
|
+
|
|
29
29
|
# Takes:
|
|
30
30
|
# * A number of indexes
|
|
31
31
|
# * Options hash (optional) with:
|
|
32
|
-
# * weigher: A weigher. Query::Weigher by default.
|
|
33
32
|
# * tokenizer: Tokenizers::Query.default by default.
|
|
34
33
|
# * weights: A hash of weights, or a Query::Weights object.
|
|
35
34
|
#
|
|
36
|
-
def initialize *
|
|
37
|
-
options = Hash ===
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
@
|
|
41
|
-
@tokenizer = options[:tokenizer] || Tokenizers::Query.default
|
|
35
|
+
def initialize *index_definitions
|
|
36
|
+
options = Hash === index_definitions.last ? index_definitions.pop : {}
|
|
37
|
+
|
|
38
|
+
@indexes = Internals::Query::Indexes.new *index_definitions, combinations_type_for(index_definitions)
|
|
39
|
+
@tokenizer = options[:tokenizer] || Internals::Tokenizers::Query.default
|
|
42
40
|
weights = options[:weights] || Weights.new
|
|
43
41
|
@weights = Hash === weights ? Weights.new(weights) : weights
|
|
44
42
|
end
|
|
45
43
|
|
|
44
|
+
# Returns the right combinations strategy for
|
|
45
|
+
# a number of query indexes.
|
|
46
|
+
#
|
|
47
|
+
# Currently it isn't possible using Memory and Redis etc.
|
|
48
|
+
# indexes in the same query index group.
|
|
49
|
+
#
|
|
50
|
+
# Picky will raise a Query::Indexes::DifferentTypesError.
|
|
51
|
+
#
|
|
52
|
+
@@mapping = {
|
|
53
|
+
Index::Memory => Internals::Query::Combinations::Memory,
|
|
54
|
+
Index::Redis => Internals::Query::Combinations::Redis
|
|
55
|
+
}
|
|
56
|
+
def combinations_type_for index_definitions_ary
|
|
57
|
+
index_types = index_definitions_ary.map(&:class)
|
|
58
|
+
index_types.uniq!
|
|
59
|
+
raise_different(index_types) if index_types.size > 1
|
|
60
|
+
!index_types.empty? && @@mapping[*index_types] || Internals::Query::Combinations::Memory
|
|
61
|
+
end
|
|
62
|
+
# Currently it isn't possible using Memory and Redis etc.
|
|
63
|
+
# indexes in the same query index group.
|
|
64
|
+
#
|
|
65
|
+
class DifferentTypesError < StandardError
|
|
66
|
+
def initialize types
|
|
67
|
+
@types = types
|
|
68
|
+
end
|
|
69
|
+
def to_s
|
|
70
|
+
"Currently it isn't possible to mix #{@types.join(" and ")} Indexes in the same Query."
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
def raise_different index_types
|
|
74
|
+
raise DifferentTypesError.new(index_types)
|
|
75
|
+
end
|
|
76
|
+
|
|
46
77
|
# This is the main entry point for a query.
|
|
47
78
|
# Use this in specs and also for running queries.
|
|
48
79
|
#
|
|
@@ -55,22 +86,22 @@ module Query
|
|
|
55
86
|
def search_with_text text, offset = 0
|
|
56
87
|
search tokenized(text), offset
|
|
57
88
|
end
|
|
58
|
-
|
|
89
|
+
|
|
59
90
|
# Runs the actual search using Query::Tokens.
|
|
60
91
|
#
|
|
61
92
|
# Note: Internal method, use #search_with_text.
|
|
62
93
|
#
|
|
63
94
|
def search tokens, offset = 0
|
|
64
95
|
results = nil
|
|
65
|
-
|
|
96
|
+
|
|
66
97
|
duration = timed do
|
|
67
98
|
results = execute(tokens, offset) || empty_results(offset) # TODO Does not work yet
|
|
68
99
|
end
|
|
69
100
|
results.duration = duration.round 6
|
|
70
|
-
|
|
101
|
+
|
|
71
102
|
results
|
|
72
103
|
end
|
|
73
|
-
|
|
104
|
+
|
|
74
105
|
# Execute a search using Query::Tokens.
|
|
75
106
|
#
|
|
76
107
|
# Note: Internal method, use #search_with_text.
|
|
@@ -78,7 +109,7 @@ module Query
|
|
|
78
109
|
def execute tokens, offset
|
|
79
110
|
result_type.from offset, sorted_allocations(tokens)
|
|
80
111
|
end
|
|
81
|
-
|
|
112
|
+
|
|
82
113
|
# Returns an empty result with default values.
|
|
83
114
|
#
|
|
84
115
|
# Parameters:
|
|
@@ -87,7 +118,7 @@ module Query
|
|
|
87
118
|
def empty_results offset = 0
|
|
88
119
|
result_type.new offset
|
|
89
120
|
end
|
|
90
|
-
|
|
121
|
+
|
|
91
122
|
# Delegates the tokenizing to the query tokenizer.
|
|
92
123
|
#
|
|
93
124
|
# Parameters:
|
|
@@ -96,7 +127,7 @@ module Query
|
|
|
96
127
|
def tokenized text
|
|
97
128
|
@tokenizer.tokenize text
|
|
98
129
|
end
|
|
99
|
-
|
|
130
|
+
|
|
100
131
|
# Gets sorted allocations for the tokens.
|
|
101
132
|
#
|
|
102
133
|
# This generates the possible allocations, sorted.
|
|
@@ -112,28 +143,28 @@ module Query
|
|
|
112
143
|
#
|
|
113
144
|
# TODO uniq, score, sort in there
|
|
114
145
|
#
|
|
115
|
-
allocations = @
|
|
116
|
-
|
|
146
|
+
allocations = @indexes.allocations_for tokens
|
|
147
|
+
|
|
117
148
|
# Callbacks.
|
|
118
149
|
#
|
|
119
150
|
# TODO Reduce before sort?
|
|
120
151
|
#
|
|
121
152
|
reduce allocations
|
|
122
153
|
remove_from allocations
|
|
123
|
-
|
|
154
|
+
|
|
124
155
|
# Remove double allocations.
|
|
125
156
|
#
|
|
126
157
|
allocations.uniq
|
|
127
|
-
|
|
158
|
+
|
|
128
159
|
# Score the allocations using weights as bias.
|
|
129
160
|
#
|
|
130
161
|
allocations.calculate_score weights
|
|
131
|
-
|
|
162
|
+
|
|
132
163
|
# Sort the allocations.
|
|
133
164
|
# (allocations are sorted according to score, highest to lowest)
|
|
134
165
|
#
|
|
135
166
|
allocations.sort
|
|
136
|
-
|
|
167
|
+
|
|
137
168
|
# Return the allocations.
|
|
138
169
|
#
|
|
139
170
|
allocations
|
|
@@ -141,7 +172,7 @@ module Query
|
|
|
141
172
|
def reduce allocations # :nodoc:
|
|
142
173
|
allocations.reduce_to reduce_to_amount if reduce_to_amount
|
|
143
174
|
end
|
|
144
|
-
|
|
175
|
+
|
|
145
176
|
#
|
|
146
177
|
#
|
|
147
178
|
def remove_from allocations # :nodoc:
|
|
@@ -152,7 +183,7 @@ module Query
|
|
|
152
183
|
def identifiers_to_remove # :nodoc:
|
|
153
184
|
@identifiers_to_remove ||= []
|
|
154
185
|
end
|
|
155
|
-
|
|
186
|
+
|
|
156
187
|
# Display some nice information for the user.
|
|
157
188
|
#
|
|
158
189
|
def to_s
|
data/lib/picky/query/full.rb
CHANGED
data/lib/picky/query/live.rb
CHANGED
data/lib/picky/sources/db.rb
CHANGED
|
@@ -83,8 +83,15 @@ module Sources
|
|
|
83
83
|
|
|
84
84
|
on_database.execute "DROP TABLE IF EXISTS #{origin}"
|
|
85
85
|
on_database.execute "CREATE TABLE #{origin} AS #{select_statement}"
|
|
86
|
-
|
|
87
|
-
|
|
86
|
+
# TODO Use rename_column ASAP.
|
|
87
|
+
#
|
|
88
|
+
if on_database.adapter_name == "PostgreSQL"
|
|
89
|
+
on_database.execute "ALTER TABLE #{origin} RENAME COLUMN id TO indexed_id"
|
|
90
|
+
on_database.execute "ALTER TABLE #{origin} ADD COLUMN id SERIAL PRIMARY KEY"
|
|
91
|
+
else
|
|
92
|
+
on_database.execute "ALTER TABLE #{origin} CHANGE COLUMN id indexed_id INTEGER"
|
|
93
|
+
on_database.execute "ALTER TABLE #{origin} ADD COLUMN id INTEGER NOT NULL PRIMARY KEY AUTO_INCREMENT"
|
|
94
|
+
end
|
|
88
95
|
|
|
89
96
|
# Execute any special queries this type needs executed.
|
|
90
97
|
#
|
|
@@ -119,10 +126,24 @@ module Sources
|
|
|
119
126
|
#
|
|
120
127
|
def get_data type, category, offset, &block # :nodoc:
|
|
121
128
|
select_statement = harvest_statement_with_offset(type, category, offset)
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
129
|
+
|
|
130
|
+
# TODO Rewrite ASAP.
|
|
131
|
+
#
|
|
132
|
+
if database.connection.adapter_name == "PostgreSQL"
|
|
133
|
+
id_key = 'indexed_id'
|
|
134
|
+
text_key = category.from.to_s
|
|
135
|
+
database.connection.execute(select_statement).each do |hash|
|
|
136
|
+
indexed_id, text = hash.values_at id_key, text_key
|
|
137
|
+
next unless text
|
|
138
|
+
text.force_encoding 'utf-8' # TODO Still needed? Or move to backend?
|
|
139
|
+
yield indexed_id, text
|
|
140
|
+
end
|
|
141
|
+
else
|
|
142
|
+
database.connection.execute(select_statement).each do |indexed_id, text|
|
|
143
|
+
next unless text
|
|
144
|
+
text.force_encoding 'utf-8' # TODO Still needed? Or move to backend?
|
|
145
|
+
yield indexed_id, text
|
|
146
|
+
end
|
|
126
147
|
end
|
|
127
148
|
end
|
|
128
149
|
|