picky 1.4.1 → 1.4.2
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/picky/{alias_instances.rb → aliases.rb} +1 -3
- data/lib/picky/application.rb +18 -19
- data/lib/picky/cores.rb +1 -1
- data/lib/picky/generators/aliases.rb +3 -0
- data/lib/picky/index/base.rb +179 -0
- data/lib/picky/index/memory.rb +28 -0
- data/lib/picky/index/redis.rb +28 -0
- data/lib/picky/{indexes_api.rb → index_bundle.rb} +16 -16
- data/lib/picky/indexed/indexes.rb +11 -7
- data/lib/picky/indexing/indexes.rb +14 -8
- data/lib/picky/internals/adapters/rack/base.rb +27 -0
- data/lib/picky/internals/adapters/rack/live_parameters.rb +37 -0
- data/lib/picky/internals/adapters/rack/query.rb +63 -0
- data/lib/picky/internals/adapters/rack.rb +34 -0
- data/lib/picky/{calculations → internals/calculations}/location.rb +0 -0
- data/lib/picky/{cli.rb → internals/cli.rb} +0 -0
- data/lib/picky/{configuration → internals/configuration}/index.rb +8 -2
- data/lib/picky/{ext → internals/ext}/maybe_compile.rb +0 -0
- data/lib/picky/{ext → internals/ext}/ruby19/extconf.rb +0 -0
- data/lib/picky/{ext → internals/ext}/ruby19/performant.c +0 -0
- data/lib/picky/{extensions → internals/extensions}/array.rb +0 -0
- data/lib/picky/{extensions → internals/extensions}/hash.rb +0 -0
- data/lib/picky/{extensions → internals/extensions}/module.rb +0 -0
- data/lib/picky/{extensions → internals/extensions}/object.rb +0 -0
- data/lib/picky/{extensions → internals/extensions}/symbol.rb +0 -0
- data/lib/picky/internals/frontend_adapters/rack.rb +154 -0
- data/lib/picky/internals/generators/base.rb +19 -0
- data/lib/picky/internals/generators/partial/default.rb +7 -0
- data/lib/picky/internals/generators/partial/none.rb +35 -0
- data/lib/picky/internals/generators/partial/strategy.rb +29 -0
- data/lib/picky/internals/generators/partial/substring.rb +122 -0
- data/lib/picky/internals/generators/partial_generator.rb +19 -0
- data/lib/picky/internals/generators/similarity/default.rb +9 -0
- data/lib/picky/internals/generators/similarity/double_levenshtone.rb +81 -0
- data/lib/picky/internals/generators/similarity/none.rb +35 -0
- data/lib/picky/internals/generators/similarity/strategy.rb +11 -0
- data/lib/picky/internals/generators/similarity_generator.rb +19 -0
- data/lib/picky/internals/generators/strategy.rb +18 -0
- data/lib/picky/internals/generators/weights/default.rb +9 -0
- data/lib/picky/internals/generators/weights/logarithmic.rb +43 -0
- data/lib/picky/internals/generators/weights/strategy.rb +11 -0
- data/lib/picky/internals/generators/weights_generator.rb +19 -0
- data/lib/picky/{helpers → internals/helpers}/measuring.rb +0 -0
- data/lib/picky/internals/index/backend.rb +113 -0
- data/lib/picky/internals/index/file/basic.rb +101 -0
- data/lib/picky/internals/index/file/json.rb +38 -0
- data/lib/picky/internals/index/file/marshal.rb +38 -0
- data/lib/picky/internals/index/file/text.rb +60 -0
- data/lib/picky/internals/index/files.rb +24 -0
- data/lib/picky/internals/index/redis/basic.rb +77 -0
- data/lib/picky/internals/index/redis/list_hash.rb +46 -0
- data/lib/picky/internals/index/redis/string_hash.rb +35 -0
- data/lib/picky/internals/index/redis.rb +44 -0
- data/lib/picky/internals/indexed/bundle/base.rb +72 -0
- data/lib/picky/internals/indexed/bundle/memory.rb +69 -0
- data/lib/picky/internals/indexed/bundle/redis.rb +70 -0
- data/lib/picky/internals/indexed/categories.rb +135 -0
- data/lib/picky/internals/indexed/category.rb +90 -0
- data/lib/picky/internals/indexed/index.rb +57 -0
- data/lib/picky/{indexed → internals/indexed}/wrappers/bundle/calculation.rb +0 -0
- data/lib/picky/{indexed → internals/indexed}/wrappers/bundle/location.rb +4 -2
- data/lib/picky/{indexed → internals/indexed}/wrappers/bundle/wrapper.rb +1 -1
- data/lib/picky/internals/indexed/wrappers/exact_first.rb +65 -0
- data/lib/picky/{indexers → internals/indexers}/no_source_specified_error.rb +0 -0
- data/lib/picky/{indexers → internals/indexers}/serial.rb +2 -2
- data/lib/picky/{indexers → internals/indexers}/solr.rb +0 -0
- data/lib/picky/internals/indexing/bundle/base.rb +219 -0
- data/lib/picky/internals/indexing/bundle/memory.rb +25 -0
- data/lib/picky/internals/indexing/bundle/redis.rb +28 -0
- data/lib/picky/internals/indexing/bundle/super_base.rb +65 -0
- data/lib/picky/internals/indexing/categories.rb +42 -0
- data/lib/picky/internals/indexing/category.rb +120 -0
- data/lib/picky/internals/indexing/index.rb +67 -0
- data/lib/picky/{performant.rb → internals/performant.rb} +0 -0
- data/lib/picky/internals/query/allocation.rb +88 -0
- data/lib/picky/internals/query/allocations.rb +137 -0
- data/lib/picky/internals/query/combination.rb +80 -0
- data/lib/picky/internals/query/combinations/base.rb +84 -0
- data/lib/picky/internals/query/combinations/memory.rb +58 -0
- data/lib/picky/internals/query/combinations/redis.rb +59 -0
- data/lib/picky/internals/query/indexes.rb +180 -0
- data/lib/picky/internals/query/qualifiers.rb +81 -0
- data/lib/picky/internals/query/token.rb +215 -0
- data/lib/picky/internals/query/tokens.rb +89 -0
- data/lib/picky/{query → internals/query}/weights.rb +0 -0
- data/lib/picky/internals/results/base.rb +106 -0
- data/lib/picky/internals/results/full.rb +17 -0
- data/lib/picky/internals/results/live.rb +17 -0
- data/lib/picky/{solr → internals/solr}/schema_generator.rb +0 -0
- data/lib/picky/internals/tokenizers/base.rb +166 -0
- data/lib/picky/internals/tokenizers/index.rb +63 -0
- data/lib/picky/internals/tokenizers/query.rb +79 -0
- data/lib/picky/loader.rb +148 -112
- data/lib/picky/query/base.rb +57 -26
- data/lib/picky/query/full.rb +1 -1
- data/lib/picky/query/live.rb +1 -1
- data/lib/picky/sources/db.rb +27 -6
- data/lib/tasks/index.rake +3 -3
- data/lib/tasks/try.rake +2 -2
- data/spec/lib/aliases_spec.rb +9 -0
- data/spec/lib/application_spec.rb +3 -3
- data/spec/lib/generators/aliases_spec.rb +1 -0
- data/spec/lib/{index_api_spec.rb → index/base_spec.rb} +7 -7
- data/spec/lib/index_bundle_spec.rb +71 -0
- data/spec/lib/indexed/indexes_spec.rb +61 -0
- data/spec/lib/indexing/indexes_spec.rb +94 -24
- data/spec/lib/{adapters → internals/adapters}/rack/base_spec.rb +2 -2
- data/spec/lib/{adapters → internals/adapters}/rack/live_parameters_spec.rb +2 -2
- data/spec/lib/{adapters → internals/adapters}/rack/query_spec.rb +2 -2
- data/spec/lib/{calculations → internals/calculations}/location_spec.rb +0 -0
- data/spec/lib/{cli_spec.rb → internals/cli_spec.rb} +4 -1
- data/spec/lib/{configuration → internals/configuration}/index_spec.rb +1 -1
- data/spec/lib/{cores_spec.rb → internals/cores_spec.rb} +0 -0
- data/spec/lib/{extensions → internals/extensions}/array_spec.rb +0 -0
- data/spec/lib/{extensions → internals/extensions}/hash_spec.rb +0 -0
- data/spec/lib/{extensions → internals/extensions}/module_spec.rb +0 -0
- data/spec/lib/{extensions → internals/extensions}/object_spec.rb +0 -0
- data/spec/lib/{extensions → internals/extensions}/symbol_spec.rb +0 -0
- data/spec/lib/{frontend_adapters → internals/frontend_adapters}/rack_spec.rb +11 -11
- data/spec/lib/{cacher → internals/generators}/cacher_strategy_spec.rb +2 -2
- data/spec/lib/internals/generators/partial/default_spec.rb +17 -0
- data/spec/lib/internals/generators/partial/none_spec.rb +17 -0
- data/spec/lib/{cacher → internals/generators}/partial/substring_spec.rb +26 -27
- data/spec/lib/{cacher → internals/generators}/partial_generator_spec.rb +5 -5
- data/spec/lib/{cacher → internals/generators}/similarity/double_levenshtone_spec.rb +4 -4
- data/spec/lib/{cacher → internals/generators}/similarity/none_spec.rb +2 -2
- data/spec/lib/{cacher → internals/generators}/similarity_generator_spec.rb +4 -4
- data/spec/lib/{cacher → internals/generators}/weights/logarithmic_spec.rb +2 -2
- data/spec/lib/internals/generators/weights_generator_spec.rb +21 -0
- data/spec/lib/{helpers → internals/helpers}/measuring_spec.rb +0 -0
- data/spec/lib/{index → internals/index}/file/basic_spec.rb +2 -2
- data/spec/lib/{index → internals/index}/file/json_spec.rb +2 -2
- data/spec/lib/{index → internals/index}/file/marshal_spec.rb +2 -2
- data/spec/lib/{index → internals/index}/file/text_spec.rb +2 -2
- data/spec/lib/{index → internals/index}/files_spec.rb +2 -2
- data/spec/lib/{indexed/bundle_spec.rb → internals/indexed/bundle/memory_spec.rb} +4 -5
- data/spec/lib/{indexed → internals/indexed}/categories_spec.rb +13 -13
- data/spec/lib/{indexed → internals/indexed}/category_spec.rb +59 -32
- data/spec/lib/{indexed → internals/indexed}/index_spec.rb +5 -5
- data/spec/lib/{indexed → internals/indexed}/wrappers/bundle/calculation_spec.rb +0 -0
- data/spec/lib/{indexed → internals/indexed}/wrappers/bundle/wrapper_spec.rb +0 -0
- data/spec/lib/{indexed → internals/indexed}/wrappers/exact_first_spec.rb +5 -5
- data/spec/lib/{indexers → internals/indexers}/serial_spec.rb +0 -0
- data/spec/lib/{indexing/bundle_partial_generation_speed_spec.rb → internals/indexing/bundle/memory_partial_generation_speed_spec.rb} +3 -3
- data/spec/lib/{indexing/bundle_spec.rb → internals/indexing/bundle/memory_spec.rb} +3 -3
- data/spec/lib/{index/bundle_spec.rb → internals/indexing/bundle/super_base_spec.rb} +9 -3
- data/spec/lib/{indexing → internals/indexing}/category_spec.rb +3 -3
- data/spec/lib/{indexing → internals/indexing}/index_spec.rb +3 -3
- data/spec/lib/internals/indexing/indexes_spec.rb +36 -0
- data/spec/lib/{interfaces → internals/interfaces}/live_parameters_spec.rb +0 -0
- data/spec/lib/internals/results/base_spec.rb +105 -0
- data/spec/lib/internals/results/full_spec.rb +78 -0
- data/spec/lib/internals/results/live_spec.rb +88 -0
- data/spec/lib/{solr → internals/solr}/schema_generator_spec.rb +0 -0
- data/spec/lib/{tokenizers → internals/tokenizers}/base_spec.rb +3 -3
- data/spec/lib/{tokenizers → internals/tokenizers}/index_spec.rb +9 -9
- data/spec/lib/{tokenizers → internals/tokenizers}/query_spec.rb +11 -11
- data/spec/lib/query/allocation_spec.rb +12 -12
- data/spec/lib/query/allocations_spec.rb +19 -19
- data/spec/lib/query/base_spec.rb +28 -4
- data/spec/lib/query/combination_spec.rb +8 -9
- data/spec/lib/query/combinations/base_spec.rb +116 -0
- data/spec/lib/query/{combinations_spec.rb → combinations/memory_spec.rb} +14 -14
- data/spec/lib/query/combinations/redis_spec.rb +132 -0
- data/spec/lib/query/full_spec.rb +2 -2
- data/spec/lib/query/indexes_spec.rb +81 -0
- data/spec/lib/query/live_spec.rb +3 -3
- data/spec/lib/query/qualifiers_spec.rb +6 -6
- data/spec/lib/query/token_spec.rb +38 -38
- data/spec/lib/query/tokens_spec.rb +35 -35
- data/spec/lib/sources/db_spec.rb +23 -18
- metadata +212 -181
- data/lib/picky/adapters/rack/base.rb +0 -23
- data/lib/picky/adapters/rack/live_parameters.rb +0 -33
- data/lib/picky/adapters/rack/query.rb +0 -59
- data/lib/picky/adapters/rack.rb +0 -28
- data/lib/picky/cacher/convenience.rb +0 -3
- data/lib/picky/cacher/generator.rb +0 -15
- data/lib/picky/cacher/partial/default.rb +0 -5
- data/lib/picky/cacher/partial/none.rb +0 -31
- data/lib/picky/cacher/partial/strategy.rb +0 -21
- data/lib/picky/cacher/partial/substring.rb +0 -118
- data/lib/picky/cacher/partial_generator.rb +0 -15
- data/lib/picky/cacher/similarity/default.rb +0 -7
- data/lib/picky/cacher/similarity/double_levenshtone.rb +0 -77
- data/lib/picky/cacher/similarity/none.rb +0 -31
- data/lib/picky/cacher/similarity/strategy.rb +0 -9
- data/lib/picky/cacher/similarity_generator.rb +0 -15
- data/lib/picky/cacher/strategy.rb +0 -12
- data/lib/picky/cacher/weights/default.rb +0 -7
- data/lib/picky/cacher/weights/logarithmic.rb +0 -39
- data/lib/picky/cacher/weights/strategy.rb +0 -9
- data/lib/picky/cacher/weights_generator.rb +0 -15
- data/lib/picky/frontend_adapters/rack.rb +0 -150
- data/lib/picky/index/bundle.rb +0 -54
- data/lib/picky/index/file/basic.rb +0 -97
- data/lib/picky/index/file/json.rb +0 -34
- data/lib/picky/index/file/marshal.rb +0 -34
- data/lib/picky/index/file/text.rb +0 -56
- data/lib/picky/index/files.rb +0 -118
- data/lib/picky/index_api.rb +0 -175
- data/lib/picky/indexed/bundle.rb +0 -54
- data/lib/picky/indexed/categories.rb +0 -131
- data/lib/picky/indexed/category.rb +0 -85
- data/lib/picky/indexed/index.rb +0 -39
- data/lib/picky/indexed/wrappers/exact_first.rb +0 -61
- data/lib/picky/indexing/bundle.rb +0 -213
- data/lib/picky/indexing/categories.rb +0 -38
- data/lib/picky/indexing/category.rb +0 -117
- data/lib/picky/indexing/index.rb +0 -55
- data/lib/picky/query/allocation.rb +0 -82
- data/lib/picky/query/allocations.rb +0 -130
- data/lib/picky/query/combination.rb +0 -74
- data/lib/picky/query/combinations.rb +0 -105
- data/lib/picky/query/qualifiers.rb +0 -77
- data/lib/picky/query/token.rb +0 -202
- data/lib/picky/query/tokens.rb +0 -86
- data/lib/picky/query/weigher.rb +0 -165
- data/lib/picky/results/base.rb +0 -102
- data/lib/picky/results/full.rb +0 -13
- data/lib/picky/results/live.rb +0 -13
- data/lib/picky/tokenizers/base.rb +0 -161
- data/lib/picky/tokenizers/index.rb +0 -58
- data/lib/picky/tokenizers/query.rb +0 -74
- data/spec/lib/cacher/partial/default_spec.rb +0 -15
- data/spec/lib/cacher/partial/none_spec.rb +0 -17
- data/spec/lib/cacher/weights_generator_spec.rb +0 -21
- data/spec/lib/results/base_spec.rb +0 -257
- data/spec/lib/results/live_spec.rb +0 -15
@@ -0,0 +1,79 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
#
|
3
|
+
module Internals
|
4
|
+
|
5
|
+
module Tokenizers
|
6
|
+
|
7
|
+
# There are a few class methods that you can use to configure how a query works.
|
8
|
+
#
|
9
|
+
# removes_characters regexp
|
10
|
+
# illegal_after_normalizing regexp
|
11
|
+
# stopwords regexp
|
12
|
+
# contracts_expressions regexp, to_string
|
13
|
+
# splits_text_on regexp
|
14
|
+
# normalizes_words [[/regexp1/, 'replacement1'], [/regexp2/, 'replacement2']]
|
15
|
+
#
|
16
|
+
class Query < Base
|
17
|
+
|
18
|
+
def self.default= new_default
|
19
|
+
@default = new_default
|
20
|
+
end
|
21
|
+
def self.default
|
22
|
+
@default ||= new
|
23
|
+
end
|
24
|
+
|
25
|
+
attr_reader :maximum_tokens
|
26
|
+
|
27
|
+
def initialize options = {}
|
28
|
+
super options
|
29
|
+
@maximum_tokens = options[:maximum_tokens] || 5
|
30
|
+
end
|
31
|
+
|
32
|
+
def preprocess text
|
33
|
+
remove_illegals text # Remove illegal characters
|
34
|
+
remove_non_single_stopwords text # remove stop words
|
35
|
+
text
|
36
|
+
end
|
37
|
+
|
38
|
+
# Split the text and put some back together.
|
39
|
+
#
|
40
|
+
# TODO Make the same as in indexing?
|
41
|
+
#
|
42
|
+
def pretokenize text
|
43
|
+
split text
|
44
|
+
end
|
45
|
+
|
46
|
+
# Let each token process itself.
|
47
|
+
# Reject, limit, and partialize tokens.
|
48
|
+
#
|
49
|
+
def process tokens
|
50
|
+
tokens.tokenize_with self
|
51
|
+
tokens.reject # Reject any tokens that don't meet criteria
|
52
|
+
tokens.cap maximum_tokens # Cut off superfluous tokens
|
53
|
+
tokens.partialize_last # Set certain tokens as partial
|
54
|
+
tokens
|
55
|
+
end
|
56
|
+
|
57
|
+
# Called by the token.
|
58
|
+
#
|
59
|
+
# TODO Perhaps move to Normalizer?
|
60
|
+
#
|
61
|
+
def normalize text
|
62
|
+
text = substitute_characters text # Substitute special characters
|
63
|
+
text.downcase! # Downcase all text
|
64
|
+
normalize_with_patterns text # normalize
|
65
|
+
text.to_sym # symbolize
|
66
|
+
end
|
67
|
+
|
68
|
+
# Returns a token for a word.
|
69
|
+
# The basic query tokenizer uses new tokens.
|
70
|
+
#
|
71
|
+
def token_for word
|
72
|
+
Internals::Query::Token.processed word
|
73
|
+
end
|
74
|
+
|
75
|
+
end
|
76
|
+
|
77
|
+
end
|
78
|
+
|
79
|
+
end
|
data/lib/picky/loader.rb
CHANGED
@@ -28,6 +28,9 @@ module Loader # :nodoc:all
|
|
28
28
|
def self.load_relative filename_without_rb
|
29
29
|
load File.join(File.dirname(__FILE__), "#{filename_without_rb}.rb")
|
30
30
|
end
|
31
|
+
def self.load_internals filename_without_rb
|
32
|
+
load File.join(File.dirname(__FILE__), "internals/#{filename_without_rb}.rb")
|
33
|
+
end
|
31
34
|
|
32
35
|
def self.load_user filename
|
33
36
|
load File.join(PICKY_ROOT, "#{filename}.rb")
|
@@ -74,151 +77,196 @@ module Loader # :nodoc:all
|
|
74
77
|
|
75
78
|
# TODO Rewrite
|
76
79
|
#
|
77
|
-
Query::Qualifiers.instance.prepare
|
80
|
+
Internals::Query::Qualifiers.instance.prepare
|
78
81
|
|
79
82
|
exclaim "Application #{Application.apps.map(&:name).join(', ')} loaded."
|
80
83
|
end
|
81
84
|
|
82
|
-
# Loads the framework.
|
85
|
+
# Loads the internal parts of the framework.
|
86
|
+
# (Not for the user)
|
83
87
|
#
|
84
|
-
def self.
|
88
|
+
def self.load_framework_internals
|
85
89
|
# Load compiled C code.
|
86
90
|
#
|
87
|
-
|
91
|
+
load_internals 'ext/maybe_compile'
|
88
92
|
|
89
93
|
# Load extensions.
|
90
94
|
#
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
# Load harakiri.
|
98
|
-
#
|
99
|
-
load_relative 'rack/harakiri'
|
95
|
+
load_internals 'extensions/object'
|
96
|
+
load_internals 'extensions/array'
|
97
|
+
load_internals 'extensions/symbol'
|
98
|
+
load_internals 'extensions/module'
|
99
|
+
load_internals 'extensions/hash'
|
100
100
|
|
101
101
|
# Requiring Helpers
|
102
102
|
#
|
103
|
-
|
104
|
-
|
105
|
-
# Character Substituters
|
106
|
-
#
|
107
|
-
load_relative 'character_substituters/west_european'
|
103
|
+
load_internals 'helpers/measuring'
|
108
104
|
|
109
105
|
# Calculations.
|
110
106
|
#
|
111
|
-
|
107
|
+
load_internals 'calculations/location'
|
112
108
|
|
113
|
-
# Signal handling
|
114
|
-
#
|
115
|
-
load_relative 'signals'
|
116
|
-
|
117
|
-
# Various.
|
118
|
-
#
|
119
|
-
load_relative 'loggers/search'
|
120
|
-
|
121
109
|
# Index generation strategies.
|
122
110
|
#
|
123
|
-
|
124
|
-
|
125
|
-
#
|
126
|
-
# load_relative 'indexers/solr'
|
111
|
+
load_internals 'indexers/no_source_specified_error'
|
112
|
+
load_internals 'indexers/serial'
|
127
113
|
|
128
|
-
#
|
114
|
+
# Generators.
|
129
115
|
#
|
130
|
-
|
116
|
+
load_internals 'generators/strategy'
|
131
117
|
|
132
118
|
# Partial index generation strategies.
|
133
119
|
#
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
120
|
+
load_internals 'generators/partial/strategy'
|
121
|
+
load_internals 'generators/partial/none'
|
122
|
+
load_internals 'generators/partial/substring'
|
123
|
+
load_internals 'generators/partial/default'
|
138
124
|
|
139
125
|
# Weight index generation strategies.
|
140
126
|
#
|
141
|
-
|
142
|
-
|
143
|
-
|
127
|
+
load_internals 'generators/weights/strategy'
|
128
|
+
load_internals 'generators/weights/logarithmic'
|
129
|
+
load_internals 'generators/weights/default'
|
144
130
|
|
145
131
|
# Similarity index generation strategies.
|
146
132
|
#
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
# Convenience accessors for generators.
|
153
|
-
#
|
154
|
-
load_relative 'cacher/convenience'
|
133
|
+
load_internals 'generators/similarity/strategy'
|
134
|
+
load_internals 'generators/similarity/none'
|
135
|
+
load_internals 'generators/similarity/double_levenshtone'
|
136
|
+
load_internals 'generators/similarity/default'
|
155
137
|
|
156
138
|
# Index generators.
|
157
139
|
#
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
140
|
+
load_internals 'generators/base'
|
141
|
+
load_internals 'generators/partial_generator'
|
142
|
+
load_internals 'generators/weights_generator'
|
143
|
+
load_internals 'generators/similarity_generator'
|
162
144
|
|
163
|
-
# Index
|
164
|
-
#
|
165
|
-
load_relative 'index/file/basic'
|
166
|
-
load_relative 'index/file/text'
|
167
|
-
load_relative 'index/file/marshal'
|
168
|
-
load_relative 'index/file/json'
|
169
|
-
load_relative 'index/files'
|
170
|
-
|
171
|
-
# Indexing and Indexed things.
|
145
|
+
# Index store handling.
|
172
146
|
#
|
173
|
-
|
147
|
+
load_internals 'index/backend'
|
174
148
|
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
load_relative 'indexing/indexes'
|
149
|
+
load_internals 'index/redis'
|
150
|
+
load_internals 'index/redis/basic'
|
151
|
+
load_internals 'index/redis/list_hash'
|
152
|
+
load_internals 'index/redis/string_hash'
|
180
153
|
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
load_relative 'indexed/indexes'
|
154
|
+
load_internals 'index/file/basic'
|
155
|
+
load_internals 'index/file/text'
|
156
|
+
load_internals 'index/file/marshal'
|
157
|
+
load_internals 'index/file/json'
|
186
158
|
|
187
|
-
|
188
|
-
load_relative 'alias_instances'
|
189
|
-
load_relative 'index_api'
|
159
|
+
load_internals 'index/files'
|
190
160
|
|
191
|
-
|
161
|
+
# Indexing and Indexed things.
|
162
|
+
#
|
163
|
+
load_internals 'indexing/bundle/super_base' # TODO Remove.
|
164
|
+
load_internals 'indexing/bundle/base'
|
165
|
+
load_internals 'indexing/bundle/memory'
|
166
|
+
load_internals 'indexing/bundle/redis'
|
167
|
+
load_internals 'indexing/category'
|
168
|
+
load_internals 'indexing/categories'
|
169
|
+
load_internals 'indexing/index'
|
170
|
+
|
171
|
+
load_internals 'indexed/bundle/base'
|
172
|
+
load_internals 'indexed/bundle/memory'
|
173
|
+
load_internals 'indexed/bundle/redis'
|
174
|
+
load_internals 'indexed/category'
|
175
|
+
load_internals 'indexed/categories'
|
176
|
+
load_internals 'indexed/index'
|
177
|
+
|
178
|
+
# TODO Ok here?
|
179
|
+
#
|
180
|
+
load_internals 'indexed/wrappers/exact_first'
|
192
181
|
|
193
182
|
# Bundle Wrapper
|
194
183
|
#
|
195
|
-
|
196
|
-
|
197
|
-
|
184
|
+
load_internals 'indexed/wrappers/bundle/wrapper'
|
185
|
+
load_internals 'indexed/wrappers/bundle/calculation'
|
186
|
+
load_internals 'indexed/wrappers/bundle/location'
|
198
187
|
|
199
188
|
# Tokens.
|
200
189
|
#
|
201
|
-
|
202
|
-
|
190
|
+
load_internals 'query/token'
|
191
|
+
load_internals 'query/tokens'
|
203
192
|
|
204
193
|
# Tokenizers types.
|
205
194
|
#
|
206
|
-
|
207
|
-
|
208
|
-
|
195
|
+
load_internals 'tokenizers/base'
|
196
|
+
load_internals 'tokenizers/index'
|
197
|
+
load_internals 'tokenizers/query'
|
209
198
|
|
210
199
|
# Query combinations, qualifiers, weigher.
|
211
200
|
#
|
212
|
-
|
213
|
-
|
201
|
+
load_internals 'query/combination'
|
202
|
+
load_internals 'query/combinations/base'
|
203
|
+
load_internals 'query/combinations/memory'
|
204
|
+
load_internals 'query/combinations/redis'
|
205
|
+
|
206
|
+
load_internals 'query/allocation'
|
207
|
+
load_internals 'query/allocations'
|
208
|
+
|
209
|
+
load_internals 'query/qualifiers'
|
210
|
+
|
211
|
+
load_internals 'query/weights'
|
212
|
+
|
213
|
+
load_internals 'query/indexes'
|
214
|
+
|
215
|
+
# Results.
|
216
|
+
#
|
217
|
+
load_internals 'results/base'
|
218
|
+
load_internals 'results/full'
|
219
|
+
load_internals 'results/live'
|
220
|
+
|
221
|
+
# Configuration.
|
222
|
+
#
|
223
|
+
load_internals 'configuration/index'
|
224
|
+
|
225
|
+
# Adapters.
|
226
|
+
#
|
227
|
+
load_internals 'adapters/rack/base'
|
228
|
+
load_internals 'adapters/rack/query'
|
229
|
+
load_internals 'adapters/rack/live_parameters'
|
230
|
+
load_internals 'adapters/rack'
|
231
|
+
|
232
|
+
# Routing.
|
233
|
+
#
|
234
|
+
load_internals 'frontend_adapters/rack'
|
235
|
+
end
|
236
|
+
# Loads the user interface parts.
|
237
|
+
#
|
238
|
+
def self.load_user_interface
|
239
|
+
# Load harakiri.
|
240
|
+
#
|
241
|
+
load_relative 'rack/harakiri'
|
242
|
+
|
243
|
+
# Character Substituters
|
244
|
+
#
|
245
|
+
load_relative 'character_substituters/west_european'
|
246
|
+
|
247
|
+
# Signal handling
|
248
|
+
#
|
249
|
+
load_relative 'signals'
|
250
|
+
|
251
|
+
# Logging.
|
252
|
+
#
|
253
|
+
load_relative 'loggers/search'
|
254
|
+
|
255
|
+
# Convenience accessors for generators.
|
256
|
+
#
|
257
|
+
load_relative 'generators/aliases'
|
214
258
|
|
215
|
-
|
216
|
-
|
259
|
+
# API.
|
260
|
+
#
|
261
|
+
load_relative 'index/base'
|
262
|
+
load_relative 'index/memory'
|
263
|
+
load_relative 'index/redis'
|
217
264
|
|
218
|
-
load_relative '
|
219
|
-
load_relative '
|
265
|
+
load_relative 'indexing/indexes'
|
266
|
+
load_relative 'indexed/indexes'
|
220
267
|
|
221
|
-
load_relative '
|
268
|
+
load_relative 'index_bundle'
|
269
|
+
load_relative 'aliases'
|
222
270
|
|
223
271
|
# Query.
|
224
272
|
#
|
@@ -228,12 +276,6 @@ module Loader # :nodoc:all
|
|
228
276
|
#
|
229
277
|
# load_relative 'query/solr'
|
230
278
|
|
231
|
-
# Results.
|
232
|
-
#
|
233
|
-
load_relative 'results/base'
|
234
|
-
load_relative 'results/full'
|
235
|
-
load_relative 'results/live'
|
236
|
-
|
237
279
|
# Sources.
|
238
280
|
#
|
239
281
|
load_relative 'sources/base'
|
@@ -245,30 +287,24 @@ module Loader # :nodoc:all
|
|
245
287
|
load_relative 'sources/wrappers/base'
|
246
288
|
load_relative 'sources/wrappers/location'
|
247
289
|
|
248
|
-
# Configuration.
|
249
|
-
#
|
250
|
-
load_relative 'configuration/index'
|
251
|
-
|
252
290
|
# Interfaces
|
253
291
|
#
|
254
292
|
load_relative 'interfaces/live_parameters'
|
255
293
|
|
256
|
-
#
|
257
|
-
#
|
258
|
-
load_relative 'adapters/rack/base'
|
259
|
-
load_relative 'adapters/rack/query'
|
260
|
-
load_relative 'adapters/rack/live_parameters'
|
261
|
-
load_relative 'adapters/rack'
|
262
|
-
|
263
|
-
# Application and routing.
|
294
|
+
# Application.
|
264
295
|
#
|
265
|
-
load_relative 'frontend_adapters/rack'
|
266
296
|
load_relative 'application'
|
267
297
|
|
268
|
-
# Load tools.
|
298
|
+
# Load tools. Load in specific case?
|
269
299
|
#
|
270
|
-
# load_relative 'solr/schema_generator'
|
271
300
|
load_relative 'cores'
|
272
301
|
end
|
302
|
+
|
303
|
+
# Loads the framework.
|
304
|
+
#
|
305
|
+
def self.load_framework
|
306
|
+
load_framework_internals
|
307
|
+
load_user_interface
|
308
|
+
end
|
273
309
|
|
274
310
|
end
|
data/lib/picky/query/base.rb
CHANGED
@@ -14,35 +14,66 @@
|
|
14
14
|
# * Query::Live (Same as the Full results without result ids. Useful for query result counters.)
|
15
15
|
#
|
16
16
|
module Query
|
17
|
-
|
17
|
+
|
18
18
|
# The base query class.
|
19
19
|
#
|
20
20
|
# Not directly instantiated. However, its methods are used by its subclasses, Full and Live.
|
21
21
|
#
|
22
22
|
class Base
|
23
|
-
|
23
|
+
|
24
24
|
include Helpers::Measuring
|
25
|
-
|
25
|
+
|
26
26
|
attr_writer :tokenizer, :identifiers_to_remove
|
27
27
|
attr_accessor :reduce_to_amount, :weights
|
28
|
-
|
28
|
+
|
29
29
|
# Takes:
|
30
30
|
# * A number of indexes
|
31
31
|
# * Options hash (optional) with:
|
32
|
-
# * weigher: A weigher. Query::Weigher by default.
|
33
32
|
# * tokenizer: Tokenizers::Query.default by default.
|
34
33
|
# * weights: A hash of weights, or a Query::Weights object.
|
35
34
|
#
|
36
|
-
def initialize *
|
37
|
-
options = Hash ===
|
38
|
-
|
39
|
-
|
40
|
-
@
|
41
|
-
@tokenizer = options[:tokenizer] || Tokenizers::Query.default
|
35
|
+
def initialize *index_definitions
|
36
|
+
options = Hash === index_definitions.last ? index_definitions.pop : {}
|
37
|
+
|
38
|
+
@indexes = Internals::Query::Indexes.new *index_definitions, combinations_type_for(index_definitions)
|
39
|
+
@tokenizer = options[:tokenizer] || Internals::Tokenizers::Query.default
|
42
40
|
weights = options[:weights] || Weights.new
|
43
41
|
@weights = Hash === weights ? Weights.new(weights) : weights
|
44
42
|
end
|
45
43
|
|
44
|
+
# Returns the right combinations strategy for
|
45
|
+
# a number of query indexes.
|
46
|
+
#
|
47
|
+
# Currently it isn't possible using Memory and Redis etc.
|
48
|
+
# indexes in the same query index group.
|
49
|
+
#
|
50
|
+
# Picky will raise a Query::Indexes::DifferentTypesError.
|
51
|
+
#
|
52
|
+
@@mapping = {
|
53
|
+
Index::Memory => Internals::Query::Combinations::Memory,
|
54
|
+
Index::Redis => Internals::Query::Combinations::Redis
|
55
|
+
}
|
56
|
+
def combinations_type_for index_definitions_ary
|
57
|
+
index_types = index_definitions_ary.map(&:class)
|
58
|
+
index_types.uniq!
|
59
|
+
raise_different(index_types) if index_types.size > 1
|
60
|
+
!index_types.empty? && @@mapping[*index_types] || Internals::Query::Combinations::Memory
|
61
|
+
end
|
62
|
+
# Currently it isn't possible using Memory and Redis etc.
|
63
|
+
# indexes in the same query index group.
|
64
|
+
#
|
65
|
+
class DifferentTypesError < StandardError
|
66
|
+
def initialize types
|
67
|
+
@types = types
|
68
|
+
end
|
69
|
+
def to_s
|
70
|
+
"Currently it isn't possible to mix #{@types.join(" and ")} Indexes in the same Query."
|
71
|
+
end
|
72
|
+
end
|
73
|
+
def raise_different index_types
|
74
|
+
raise DifferentTypesError.new(index_types)
|
75
|
+
end
|
76
|
+
|
46
77
|
# This is the main entry point for a query.
|
47
78
|
# Use this in specs and also for running queries.
|
48
79
|
#
|
@@ -55,22 +86,22 @@ module Query
|
|
55
86
|
def search_with_text text, offset = 0
|
56
87
|
search tokenized(text), offset
|
57
88
|
end
|
58
|
-
|
89
|
+
|
59
90
|
# Runs the actual search using Query::Tokens.
|
60
91
|
#
|
61
92
|
# Note: Internal method, use #search_with_text.
|
62
93
|
#
|
63
94
|
def search tokens, offset = 0
|
64
95
|
results = nil
|
65
|
-
|
96
|
+
|
66
97
|
duration = timed do
|
67
98
|
results = execute(tokens, offset) || empty_results(offset) # TODO Does not work yet
|
68
99
|
end
|
69
100
|
results.duration = duration.round 6
|
70
|
-
|
101
|
+
|
71
102
|
results
|
72
103
|
end
|
73
|
-
|
104
|
+
|
74
105
|
# Execute a search using Query::Tokens.
|
75
106
|
#
|
76
107
|
# Note: Internal method, use #search_with_text.
|
@@ -78,7 +109,7 @@ module Query
|
|
78
109
|
def execute tokens, offset
|
79
110
|
result_type.from offset, sorted_allocations(tokens)
|
80
111
|
end
|
81
|
-
|
112
|
+
|
82
113
|
# Returns an empty result with default values.
|
83
114
|
#
|
84
115
|
# Parameters:
|
@@ -87,7 +118,7 @@ module Query
|
|
87
118
|
def empty_results offset = 0
|
88
119
|
result_type.new offset
|
89
120
|
end
|
90
|
-
|
121
|
+
|
91
122
|
# Delegates the tokenizing to the query tokenizer.
|
92
123
|
#
|
93
124
|
# Parameters:
|
@@ -96,7 +127,7 @@ module Query
|
|
96
127
|
def tokenized text
|
97
128
|
@tokenizer.tokenize text
|
98
129
|
end
|
99
|
-
|
130
|
+
|
100
131
|
# Gets sorted allocations for the tokens.
|
101
132
|
#
|
102
133
|
# This generates the possible allocations, sorted.
|
@@ -112,28 +143,28 @@ module Query
|
|
112
143
|
#
|
113
144
|
# TODO uniq, score, sort in there
|
114
145
|
#
|
115
|
-
allocations = @
|
116
|
-
|
146
|
+
allocations = @indexes.allocations_for tokens
|
147
|
+
|
117
148
|
# Callbacks.
|
118
149
|
#
|
119
150
|
# TODO Reduce before sort?
|
120
151
|
#
|
121
152
|
reduce allocations
|
122
153
|
remove_from allocations
|
123
|
-
|
154
|
+
|
124
155
|
# Remove double allocations.
|
125
156
|
#
|
126
157
|
allocations.uniq
|
127
|
-
|
158
|
+
|
128
159
|
# Score the allocations using weights as bias.
|
129
160
|
#
|
130
161
|
allocations.calculate_score weights
|
131
|
-
|
162
|
+
|
132
163
|
# Sort the allocations.
|
133
164
|
# (allocations are sorted according to score, highest to lowest)
|
134
165
|
#
|
135
166
|
allocations.sort
|
136
|
-
|
167
|
+
|
137
168
|
# Return the allocations.
|
138
169
|
#
|
139
170
|
allocations
|
@@ -141,7 +172,7 @@ module Query
|
|
141
172
|
def reduce allocations # :nodoc:
|
142
173
|
allocations.reduce_to reduce_to_amount if reduce_to_amount
|
143
174
|
end
|
144
|
-
|
175
|
+
|
145
176
|
#
|
146
177
|
#
|
147
178
|
def remove_from allocations # :nodoc:
|
@@ -152,7 +183,7 @@ module Query
|
|
152
183
|
def identifiers_to_remove # :nodoc:
|
153
184
|
@identifiers_to_remove ||= []
|
154
185
|
end
|
155
|
-
|
186
|
+
|
156
187
|
# Display some nice information for the user.
|
157
188
|
#
|
158
189
|
def to_s
|
data/lib/picky/query/full.rb
CHANGED
data/lib/picky/query/live.rb
CHANGED
data/lib/picky/sources/db.rb
CHANGED
@@ -83,8 +83,15 @@ module Sources
|
|
83
83
|
|
84
84
|
on_database.execute "DROP TABLE IF EXISTS #{origin}"
|
85
85
|
on_database.execute "CREATE TABLE #{origin} AS #{select_statement}"
|
86
|
-
|
87
|
-
|
86
|
+
# TODO Use rename_column ASAP.
|
87
|
+
#
|
88
|
+
if on_database.adapter_name == "PostgreSQL"
|
89
|
+
on_database.execute "ALTER TABLE #{origin} RENAME COLUMN id TO indexed_id"
|
90
|
+
on_database.execute "ALTER TABLE #{origin} ADD COLUMN id SERIAL PRIMARY KEY"
|
91
|
+
else
|
92
|
+
on_database.execute "ALTER TABLE #{origin} CHANGE COLUMN id indexed_id INTEGER"
|
93
|
+
on_database.execute "ALTER TABLE #{origin} ADD COLUMN id INTEGER NOT NULL PRIMARY KEY AUTO_INCREMENT"
|
94
|
+
end
|
88
95
|
|
89
96
|
# Execute any special queries this type needs executed.
|
90
97
|
#
|
@@ -119,10 +126,24 @@ module Sources
|
|
119
126
|
#
|
120
127
|
def get_data type, category, offset, &block # :nodoc:
|
121
128
|
select_statement = harvest_statement_with_offset(type, category, offset)
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
129
|
+
|
130
|
+
# TODO Rewrite ASAP.
|
131
|
+
#
|
132
|
+
if database.connection.adapter_name == "PostgreSQL"
|
133
|
+
id_key = 'indexed_id'
|
134
|
+
text_key = category.from.to_s
|
135
|
+
database.connection.execute(select_statement).each do |hash|
|
136
|
+
indexed_id, text = hash.values_at id_key, text_key
|
137
|
+
next unless text
|
138
|
+
text.force_encoding 'utf-8' # TODO Still needed? Or move to backend?
|
139
|
+
yield indexed_id, text
|
140
|
+
end
|
141
|
+
else
|
142
|
+
database.connection.execute(select_statement).each do |indexed_id, text|
|
143
|
+
next unless text
|
144
|
+
text.force_encoding 'utf-8' # TODO Still needed? Or move to backend?
|
145
|
+
yield indexed_id, text
|
146
|
+
end
|
126
147
|
end
|
127
148
|
end
|
128
149
|
|