picky 0.0.0 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/picky +14 -0
- data/lib/bundling.rb +10 -0
- data/lib/constants.rb +9 -0
- data/lib/deployment.rb +212 -0
- data/lib/picky/application.rb +40 -0
- data/lib/picky/cacher/convenience.rb +3 -0
- data/lib/picky/cacher/generator.rb +17 -0
- data/lib/picky/cacher/partial/default.rb +7 -0
- data/lib/picky/cacher/partial/none.rb +19 -0
- data/lib/picky/cacher/partial/strategy.rb +7 -0
- data/lib/picky/cacher/partial/subtoken.rb +91 -0
- data/lib/picky/cacher/partial_generator.rb +15 -0
- data/lib/picky/cacher/similarity/default.rb +7 -0
- data/lib/picky/cacher/similarity/double_levenshtone.rb +73 -0
- data/lib/picky/cacher/similarity/none.rb +25 -0
- data/lib/picky/cacher/similarity/strategy.rb +7 -0
- data/lib/picky/cacher/similarity_generator.rb +15 -0
- data/lib/picky/cacher/weights/default.rb +7 -0
- data/lib/picky/cacher/weights/logarithmic.rb +39 -0
- data/lib/picky/cacher/weights/strategy.rb +7 -0
- data/lib/picky/cacher/weights_generator.rb +15 -0
- data/lib/picky/configuration/configuration.rb +13 -0
- data/lib/picky/configuration/field.rb +68 -0
- data/lib/picky/configuration/indexes.rb +60 -0
- data/lib/picky/configuration/queries.rb +32 -0
- data/lib/picky/configuration/type.rb +52 -0
- data/lib/picky/cores.rb +101 -0
- data/lib/picky/db/configuration.rb +23 -0
- data/lib/picky/ext/ruby19/extconf.rb +7 -0
- data/lib/picky/ext/ruby19/performant.c +339 -0
- data/lib/picky/extensions/array.rb +45 -0
- data/lib/picky/extensions/hash.rb +11 -0
- data/lib/picky/extensions/module.rb +15 -0
- data/lib/picky/extensions/symbol.rb +18 -0
- data/lib/picky/generator.rb +156 -0
- data/lib/picky/helpers/cache.rb +23 -0
- data/lib/picky/helpers/gc.rb +11 -0
- data/lib/picky/helpers/measuring.rb +45 -0
- data/lib/picky/helpers/search.rb +27 -0
- data/lib/picky/index/bundle.rb +328 -0
- data/lib/picky/index/category.rb +109 -0
- data/lib/picky/index/combined.rb +38 -0
- data/lib/picky/index/type.rb +30 -0
- data/lib/picky/indexers/base.rb +77 -0
- data/lib/picky/indexers/default.rb +3 -0
- data/lib/picky/indexers/field.rb +13 -0
- data/lib/picky/indexers/no_source_specified_error.rb +5 -0
- data/lib/picky/indexers/solr.rb +60 -0
- data/lib/picky/indexes.rb +180 -0
- data/lib/picky/initializers/ext.rb +6 -0
- data/lib/picky/initializers/mysql.rb +22 -0
- data/lib/picky/loader.rb +287 -0
- data/lib/picky/loggers/search.rb +19 -0
- data/lib/picky/performant/array.rb +23 -0
- data/lib/picky/query/allocation.rb +82 -0
- data/lib/picky/query/allocations.rb +131 -0
- data/lib/picky/query/base.rb +124 -0
- data/lib/picky/query/combination.rb +69 -0
- data/lib/picky/query/combinations.rb +106 -0
- data/lib/picky/query/combinator.rb +92 -0
- data/lib/picky/query/full.rb +15 -0
- data/lib/picky/query/live.rb +22 -0
- data/lib/picky/query/qualifiers.rb +73 -0
- data/lib/picky/query/solr.rb +77 -0
- data/lib/picky/query/token.rb +215 -0
- data/lib/picky/query/tokens.rb +102 -0
- data/lib/picky/query/weigher.rb +159 -0
- data/lib/picky/query/weights.rb +55 -0
- data/lib/picky/rack/harakiri.rb +37 -0
- data/lib/picky/results/base.rb +103 -0
- data/lib/picky/results/full.rb +19 -0
- data/lib/picky/results/live.rb +19 -0
- data/lib/picky/routing.rb +165 -0
- data/lib/picky/signals.rb +11 -0
- data/lib/picky/solr/schema_generator.rb +73 -0
- data/lib/picky/sources/base.rb +19 -0
- data/lib/picky/sources/csv.rb +30 -0
- data/lib/picky/sources/db.rb +77 -0
- data/lib/picky/tokenizers/base.rb +130 -0
- data/lib/picky/tokenizers/default.rb +3 -0
- data/lib/picky/tokenizers/index.rb +73 -0
- data/lib/picky/tokenizers/query.rb +70 -0
- data/lib/picky/umlaut_substituter.rb +21 -0
- data/lib/picky-tasks.rb +6 -0
- data/lib/picky.rb +18 -0
- data/lib/tasks/application.rake +5 -0
- data/lib/tasks/cache.rake +53 -0
- data/lib/tasks/framework.rake +4 -0
- data/lib/tasks/index.rake +29 -0
- data/lib/tasks/server.rake +48 -0
- data/lib/tasks/shortcuts.rake +13 -0
- data/lib/tasks/solr.rake +36 -0
- data/lib/tasks/spec.rake +11 -0
- data/lib/tasks/statistics.rake +13 -0
- data/lib/tasks/try.rake +29 -0
- data/prototype_project/Gemfile +23 -0
- data/prototype_project/Rakefile +1 -0
- data/prototype_project/app/README +6 -0
- data/prototype_project/app/application.rb +50 -0
- data/prototype_project/app/application.ru +29 -0
- data/prototype_project/app/db.yml +10 -0
- data/prototype_project/app/logging.rb +20 -0
- data/prototype_project/app/unicorn.ru +10 -0
- data/prototype_project/log/README +1 -0
- data/prototype_project/script/console +34 -0
- data/prototype_project/tmp/README +0 -0
- data/prototype_project/tmp/pids/README +0 -0
- data/spec/ext/performant_spec.rb +64 -0
- data/spec/lib/application_spec.rb +61 -0
- data/spec/lib/cacher/partial/subtoken_spec.rb +89 -0
- data/spec/lib/cacher/partial_generator_spec.rb +35 -0
- data/spec/lib/cacher/similarity/double_levenshtone_spec.rb +60 -0
- data/spec/lib/cacher/similarity/none_spec.rb +23 -0
- data/spec/lib/cacher/similarity_generator_spec.rb +22 -0
- data/spec/lib/cacher/weights/logarithmic_spec.rb +30 -0
- data/spec/lib/cacher/weights_generator_spec.rb +21 -0
- data/spec/lib/configuration/configuration_spec.rb +38 -0
- data/spec/lib/configuration/type_spec.rb +49 -0
- data/spec/lib/configuration_spec.rb +8 -0
- data/spec/lib/cores_spec.rb +65 -0
- data/spec/lib/extensions/array_spec.rb +37 -0
- data/spec/lib/extensions/hash_spec.rb +11 -0
- data/spec/lib/extensions/module_spec.rb +27 -0
- data/spec/lib/extensions/symbol_spec.rb +85 -0
- data/spec/lib/generator_spec.rb +135 -0
- data/spec/lib/helpers/cache_spec.rb +35 -0
- data/spec/lib/helpers/gc_spec.rb +71 -0
- data/spec/lib/helpers/measuring_spec.rb +18 -0
- data/spec/lib/helpers/search_spec.rb +50 -0
- data/spec/lib/index/bundle_partial_generation_speed_spec.rb +47 -0
- data/spec/lib/index/bundle_spec.rb +260 -0
- data/spec/lib/index/category_spec.rb +203 -0
- data/spec/lib/indexers/base_spec.rb +73 -0
- data/spec/lib/indexers/field_spec.rb +20 -0
- data/spec/lib/loader_spec.rb +48 -0
- data/spec/lib/loggers/search_spec.rb +19 -0
- data/spec/lib/performant/array_spec.rb +13 -0
- data/spec/lib/query/allocation_spec.rb +194 -0
- data/spec/lib/query/allocations_spec.rb +336 -0
- data/spec/lib/query/base_spec.rb +104 -0
- data/spec/lib/query/combination_spec.rb +90 -0
- data/spec/lib/query/combinations_spec.rb +83 -0
- data/spec/lib/query/combinator_spec.rb +112 -0
- data/spec/lib/query/full_spec.rb +22 -0
- data/spec/lib/query/live_spec.rb +61 -0
- data/spec/lib/query/qualifiers_spec.rb +31 -0
- data/spec/lib/query/solr_spec.rb +51 -0
- data/spec/lib/query/token_spec.rb +297 -0
- data/spec/lib/query/tokens_spec.rb +189 -0
- data/spec/lib/query/weights_spec.rb +47 -0
- data/spec/lib/results/base_spec.rb +233 -0
- data/spec/lib/routing_spec.rb +318 -0
- data/spec/lib/solr/schema_generator_spec.rb +42 -0
- data/spec/lib/sources/db_spec.rb +91 -0
- data/spec/lib/tokenizers/base_spec.rb +61 -0
- data/spec/lib/tokenizers/index_spec.rb +51 -0
- data/spec/lib/tokenizers/query_spec.rb +105 -0
- data/spec/lib/umlaut_substituter_spec.rb +84 -0
- data/spec/specific/speed_spec.rb +55 -0
- metadata +371 -15
- data/README.textile +0 -9
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
module Query
|
|
2
|
+
|
|
3
|
+
# Describes the combination of a token (the text) and
|
|
4
|
+
# the index (the bundle).
|
|
5
|
+
#
|
|
6
|
+
# A combination is a single part of an allocation.
|
|
7
|
+
#
|
|
8
|
+
# An allocation consists of a number of combinations.
|
|
9
|
+
#
|
|
10
|
+
class Combination
|
|
11
|
+
|
|
12
|
+
attr_reader :token, :bundle
|
|
13
|
+
|
|
14
|
+
def initialize token, category
|
|
15
|
+
@token = token
|
|
16
|
+
@category = category
|
|
17
|
+
@bundle = category.bundle_for token
|
|
18
|
+
@text = @token.text # don't want to use reset_similar already
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
# Note: Required for uniq!
|
|
22
|
+
#
|
|
23
|
+
def hash
|
|
24
|
+
[@token.to_s, @bundle].hash
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
# Returns the weight of this combination.
|
|
28
|
+
#
|
|
29
|
+
def weight
|
|
30
|
+
@weight || @weight = @bundle.weight(@text)
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
# Returns an array of ids for the given text.
|
|
34
|
+
#
|
|
35
|
+
def ids
|
|
36
|
+
@ids || @ids = @bundle.ids(@text)
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
# The identifier for this combination.
|
|
40
|
+
#
|
|
41
|
+
def identifier
|
|
42
|
+
@category.name
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# Is the identifier in the given identifiers?
|
|
46
|
+
#
|
|
47
|
+
def in? identifiers
|
|
48
|
+
identifiers.include? identifier
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
# Combines the category names with the original names.
|
|
52
|
+
# [
|
|
53
|
+
# [:title, 'Flarbl', :flarbl],
|
|
54
|
+
# [:category, 'Gnorf', :gnorf]
|
|
55
|
+
# ]
|
|
56
|
+
#
|
|
57
|
+
def to_result
|
|
58
|
+
[identifier, *@token.to_result]
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
# full/title:Flarbl:flarbl
|
|
62
|
+
#
|
|
63
|
+
def to_s
|
|
64
|
+
"#{bundle.name}/#{to_result.join(':')}"
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
end
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
module Query
|
|
2
|
+
|
|
3
|
+
# Combinations are a number of Combination-s.
|
|
4
|
+
#
|
|
5
|
+
# They are, in effect, the core of an allocation.
|
|
6
|
+
#
|
|
7
|
+
class Combinations
|
|
8
|
+
|
|
9
|
+
attr_reader :type, :combinations
|
|
10
|
+
|
|
11
|
+
delegate :empty?, :to => :@combinations
|
|
12
|
+
|
|
13
|
+
def initialize type, combinations = []
|
|
14
|
+
@type = type # TODO Remove.
|
|
15
|
+
@combinations = combinations
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def hash
|
|
19
|
+
@combinations.hash
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
#
|
|
23
|
+
#
|
|
24
|
+
# TODO Rewrite.
|
|
25
|
+
#
|
|
26
|
+
def calculate_score weights
|
|
27
|
+
@score = @combinations.sum &:weight
|
|
28
|
+
@score += weights.score @combinations
|
|
29
|
+
@score
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
# Gets all ids for the allocations.
|
|
33
|
+
#
|
|
34
|
+
# Sorts the ids by size and & through them in the following order (sizes):
|
|
35
|
+
# 0. [100_000, 400, 30, 2]
|
|
36
|
+
# 1. [2, 30, 400, 100_000]
|
|
37
|
+
# 2. (100_000 & (400 & (30 & 2))) # => result
|
|
38
|
+
#
|
|
39
|
+
# Returns the ids. Also sets the count.
|
|
40
|
+
#
|
|
41
|
+
# Note: Uses a C-optimized intersection routine for speed and memory efficiency.
|
|
42
|
+
#
|
|
43
|
+
def ids
|
|
44
|
+
return [] if @combinations.empty?
|
|
45
|
+
|
|
46
|
+
# Get the ids for each combination.
|
|
47
|
+
#
|
|
48
|
+
id_arrays = @combinations.inject([]) do |total, combination|
|
|
49
|
+
total << combination.ids
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
# Order by smallest size first such that the intersect can be performed faster.
|
|
53
|
+
#
|
|
54
|
+
# TODO Move into the memory_efficient_intersect such that
|
|
55
|
+
# this precondition for a fast algorithm is always given.
|
|
56
|
+
#
|
|
57
|
+
id_arrays.sort! { |this_array, that_array| this_array.size <=> that_array.size }
|
|
58
|
+
|
|
59
|
+
# Call the optimized C algorithm.
|
|
60
|
+
#
|
|
61
|
+
Performant::Array.memory_efficient_intersect id_arrays
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
#
|
|
65
|
+
#
|
|
66
|
+
def pack_into_allocation
|
|
67
|
+
allocation = Allocation.new self
|
|
68
|
+
allocation.result_type = @type.result_type # TODO Rewrite.
|
|
69
|
+
allocation
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
# Filters the tokens and identifiers such that only identifiers
|
|
73
|
+
# that are passed in, remain, including their tokens.
|
|
74
|
+
#
|
|
75
|
+
# Note: This method is not totally independent of the calculate_ids one.
|
|
76
|
+
# Since identifiers are only nullified, we need to not include the
|
|
77
|
+
# ids that have an associated identifier that is nil.
|
|
78
|
+
#
|
|
79
|
+
def keep identifiers = []
|
|
80
|
+
# TODO Rewrite to use the category!!!
|
|
81
|
+
#
|
|
82
|
+
@combinations.reject! { |combination| !combination.in?(identifiers) }
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
# Filters the tokens and identifiers such that identifiers
|
|
86
|
+
# that are passed in, are removed, including their tokens.
|
|
87
|
+
#
|
|
88
|
+
# Note: This method is not totally independent of the calculate_ids one.
|
|
89
|
+
# Since identifiers are only nullified, we need to not include the
|
|
90
|
+
# ids that have an associated identifier that is nil.
|
|
91
|
+
#
|
|
92
|
+
def remove identifiers = []
|
|
93
|
+
# TODO Rewrite to use the category!!!
|
|
94
|
+
#
|
|
95
|
+
@combinations.reject! { |combination| combination.in?(identifiers) }
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
#
|
|
99
|
+
#
|
|
100
|
+
def to_result
|
|
101
|
+
@combinations.map &:to_result
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
end
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
module Query
|
|
2
|
+
|
|
3
|
+
# Combines tokens and category indexes into combinations.
|
|
4
|
+
#
|
|
5
|
+
class Combinator
|
|
6
|
+
|
|
7
|
+
attr_reader :categories, :category_hash
|
|
8
|
+
attr_reader :ignore_unassigned_tokens # TODO Should this actually be determined by the query? Probably, yes.
|
|
9
|
+
|
|
10
|
+
def initialize categories, options = {}
|
|
11
|
+
@categories = categories
|
|
12
|
+
@category_hash = hashify categories
|
|
13
|
+
|
|
14
|
+
@ignore_unassigned_tokens = options[:ignore_unassigned_tokens] || false
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
# TODO Move somewhere else.
|
|
18
|
+
#
|
|
19
|
+
# TODO Or use active_support's?
|
|
20
|
+
#
|
|
21
|
+
def hashify category_array
|
|
22
|
+
category_array.inject({}) do |hash, category|
|
|
23
|
+
hash[category.name] = [category]
|
|
24
|
+
hash
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
#
|
|
29
|
+
#
|
|
30
|
+
def possible_combinations_for token
|
|
31
|
+
token.similar? ? similar_possible_for(token) : possible_for(token)
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
# TODO Break apart.
|
|
35
|
+
#
|
|
36
|
+
def similar_possible_for token
|
|
37
|
+
# Get as many similar tokens as necessary
|
|
38
|
+
#
|
|
39
|
+
text = token.text
|
|
40
|
+
tokens = categories.inject([]) do |result, category|
|
|
41
|
+
next_token = token
|
|
42
|
+
# TODO adjust either this or the amount of similar in index
|
|
43
|
+
#
|
|
44
|
+
while next_token = next_token.next(category)
|
|
45
|
+
result << next_token if next_token && next_token.text != text
|
|
46
|
+
end
|
|
47
|
+
result
|
|
48
|
+
end
|
|
49
|
+
# possible combinations
|
|
50
|
+
#
|
|
51
|
+
tokens.inject([]) do |result, token|
|
|
52
|
+
possible = possible_categories token
|
|
53
|
+
result + possible_for(token, possible)
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
# Returns possible Combinations for the token.
|
|
58
|
+
#
|
|
59
|
+
# The categories param is an optimization.
|
|
60
|
+
#
|
|
61
|
+
# TODO Return [RemovedCategory(token, nil)]
|
|
62
|
+
# If the search is ...
|
|
63
|
+
#
|
|
64
|
+
# TODO Make categories also a collection class.
|
|
65
|
+
#
|
|
66
|
+
# TODO Return [] if not ok, nil if needs to be removed?
|
|
67
|
+
# Somehow unnice, but…
|
|
68
|
+
#
|
|
69
|
+
def possible_for token, preselected_categories = nil
|
|
70
|
+
possible = (preselected_categories || possible_categories(token)).map { |category| category.combination_for(token) }
|
|
71
|
+
possible.compact!
|
|
72
|
+
# This is an optimization to mark tokens that are ignored.
|
|
73
|
+
#
|
|
74
|
+
return if ignore_unassigned_tokens && possible.empty?
|
|
75
|
+
possible # wrap in combinations
|
|
76
|
+
end
|
|
77
|
+
#
|
|
78
|
+
#
|
|
79
|
+
# TODO too many calls?
|
|
80
|
+
#
|
|
81
|
+
def possible_categories token
|
|
82
|
+
user_defined_categories(token) || categories
|
|
83
|
+
end
|
|
84
|
+
# Returns nil if there is no user defined category, the category else.
|
|
85
|
+
#
|
|
86
|
+
def user_defined_categories token
|
|
87
|
+
category_hash[token.user_defined_category_name]
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
end
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
module Query
|
|
2
|
+
|
|
3
|
+
# This is the query class for live queries.
|
|
4
|
+
#
|
|
5
|
+
# It does:
|
|
6
|
+
# * Return a count of results.
|
|
7
|
+
#
|
|
8
|
+
# It does NOT:
|
|
9
|
+
# * Sort results geographically.
|
|
10
|
+
# * Do any postprocessing.
|
|
11
|
+
#
|
|
12
|
+
class Live < Base
|
|
13
|
+
|
|
14
|
+
# Generates results from allocations.
|
|
15
|
+
#
|
|
16
|
+
def result_type
|
|
17
|
+
Results::Live
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
end
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
# coding: utf-8
|
|
2
|
+
module Query
|
|
3
|
+
|
|
4
|
+
# A single qualifier.
|
|
5
|
+
#
|
|
6
|
+
class Qualifier
|
|
7
|
+
|
|
8
|
+
attr_reader :normalized_qualifier, :codes
|
|
9
|
+
|
|
10
|
+
#
|
|
11
|
+
#
|
|
12
|
+
# codes is an array.
|
|
13
|
+
#
|
|
14
|
+
def initialize normalized_qualifier, codes
|
|
15
|
+
@normalized_qualifier = normalized_qualifier
|
|
16
|
+
@codes = codes
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
# Will overwrite if the key is present in the hash.
|
|
20
|
+
#
|
|
21
|
+
def inject_into hash
|
|
22
|
+
codes.each do |code|
|
|
23
|
+
hash[code] = normalized_qualifier
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
# Collection class for qualifiers.
|
|
30
|
+
#
|
|
31
|
+
class Qualifiers
|
|
32
|
+
|
|
33
|
+
include Singleton
|
|
34
|
+
|
|
35
|
+
attr_reader :qualifiers, :normalization_mapping
|
|
36
|
+
|
|
37
|
+
delegate :<<, :to => :qualifiers
|
|
38
|
+
|
|
39
|
+
#
|
|
40
|
+
#
|
|
41
|
+
def initialize
|
|
42
|
+
@qualifiers = []
|
|
43
|
+
@normalization_mapping = {}
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
# TODO Spec.
|
|
47
|
+
#
|
|
48
|
+
def self.add name, qualifiers
|
|
49
|
+
instance << Qualifier.new(name, qualifiers)
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
# Uses the qualifiers to prepare (optimize) the qualifier handling.
|
|
53
|
+
#
|
|
54
|
+
def prepare
|
|
55
|
+
qualifiers.each do |qualifier|
|
|
56
|
+
qualifier.inject_into normalization_mapping
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
# Normalizes the given qualifier.
|
|
61
|
+
#
|
|
62
|
+
# Returns nil if it is not allowed, the normalized qualifier if it is.
|
|
63
|
+
#
|
|
64
|
+
# Note: Normalizes.
|
|
65
|
+
#
|
|
66
|
+
def normalize qualifier
|
|
67
|
+
return nil if qualifier.blank?
|
|
68
|
+
|
|
69
|
+
normalization_mapping[qualifier.to_sym]
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
end
|
|
73
|
+
end
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
require 'rsolr'
|
|
2
|
+
|
|
3
|
+
module Query
|
|
4
|
+
|
|
5
|
+
#
|
|
6
|
+
#
|
|
7
|
+
class Solr < Base
|
|
8
|
+
|
|
9
|
+
attr_reader :server, :index_types
|
|
10
|
+
|
|
11
|
+
def initialize *index_types
|
|
12
|
+
@server = RSolr.connect rescue nil
|
|
13
|
+
super *index_types
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
# # This runs the actual search.
|
|
17
|
+
# #
|
|
18
|
+
# # TODO Remove!
|
|
19
|
+
# #
|
|
20
|
+
# def search tokens, offset = 0
|
|
21
|
+
# results = nil
|
|
22
|
+
#
|
|
23
|
+
# duration = timed do
|
|
24
|
+
# results = execute(tokens, offset) || empty_results # TODO Does not work yet
|
|
25
|
+
# end
|
|
26
|
+
# results.duration = duration
|
|
27
|
+
#
|
|
28
|
+
# results
|
|
29
|
+
# end
|
|
30
|
+
|
|
31
|
+
#
|
|
32
|
+
#
|
|
33
|
+
def execute tokens, offset = 0
|
|
34
|
+
results = Results::Live.new
|
|
35
|
+
|
|
36
|
+
if server
|
|
37
|
+
similar = {}
|
|
38
|
+
|
|
39
|
+
new_query = tokens.to_solr_query
|
|
40
|
+
|
|
41
|
+
return results if new_query.empty?
|
|
42
|
+
|
|
43
|
+
index_types.each do |index|
|
|
44
|
+
begin
|
|
45
|
+
response = server.select :q => new_query, :fq => "type:#{index.name}", :hl => true, :'hl.fl' => '*', :'hl.simple.pre' => '<', :'hl.simple.post' => '>', :facet => true
|
|
46
|
+
rescue RSolr::RequestError => re
|
|
47
|
+
return results
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
highlighting = response['highlighting']
|
|
51
|
+
possibilities = response['response']['docs'].map do |doc|
|
|
52
|
+
highlights = highlighting[doc['id'].to_s]
|
|
53
|
+
next unless highlights
|
|
54
|
+
selected = doc.select { |key| highlights.has_key?(key) }
|
|
55
|
+
selected.values.join ' '
|
|
56
|
+
end
|
|
57
|
+
possibilities.collect! { |possibility| possibility.strip }.uniq!
|
|
58
|
+
similar[index.name] = possibilities unless possibilities.empty?
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
results.add :similar => similar
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
# TODO
|
|
65
|
+
#
|
|
66
|
+
class << results
|
|
67
|
+
def to_log query
|
|
68
|
+
?* + super
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
results
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
end
|
|
@@ -0,0 +1,215 @@
|
|
|
1
|
+
module Query
|
|
2
|
+
# This is a query token. Together with other tokens it makes up a query.
|
|
3
|
+
#
|
|
4
|
+
# It remembers the original form, and and a normalized form.
|
|
5
|
+
#
|
|
6
|
+
# It also knows whether it needs to look for similarity (bla~), or whether it is a partial (bla*).
|
|
7
|
+
#
|
|
8
|
+
# TODO Make partial / similarity char configurable.
|
|
9
|
+
#
|
|
10
|
+
class Token
|
|
11
|
+
|
|
12
|
+
attr_reader :text, :original
|
|
13
|
+
attr_writer :similar
|
|
14
|
+
|
|
15
|
+
delegate :blank?, :to => :text
|
|
16
|
+
|
|
17
|
+
# Normal initializer.
|
|
18
|
+
#
|
|
19
|
+
# Note: Use this if you do not want a qualified and normalized token.
|
|
20
|
+
#
|
|
21
|
+
def initialize text
|
|
22
|
+
@text = text
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# Returns a qualified and normalized token.
|
|
26
|
+
#
|
|
27
|
+
# Note: Use this in the search engine if you need a qualified
|
|
28
|
+
# and normalized token. I.e. one prepared for a search.
|
|
29
|
+
#
|
|
30
|
+
def self.processed text
|
|
31
|
+
token = new text
|
|
32
|
+
token.qualify
|
|
33
|
+
token.extract_original
|
|
34
|
+
token.partialize
|
|
35
|
+
token.similarize
|
|
36
|
+
token.remove_illegals
|
|
37
|
+
token
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
# This returns a predefined category name if the user has given one.
|
|
41
|
+
#
|
|
42
|
+
def user_defined_category_name
|
|
43
|
+
@qualifier
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
# Extracts a qualifier for this token and pre-assigns an allocation.
|
|
47
|
+
#
|
|
48
|
+
# Note: Removes the qualifier if it is not allowed.
|
|
49
|
+
#
|
|
50
|
+
def qualify
|
|
51
|
+
@qualifier, @text = split @text
|
|
52
|
+
@qualifier = Query::Qualifiers.instance.normalize @qualifier
|
|
53
|
+
end
|
|
54
|
+
def extract_original
|
|
55
|
+
@original = @text.dup
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
# Partial is a conditional setter.
|
|
59
|
+
#
|
|
60
|
+
# It is only settable if it hasn't been set yet.
|
|
61
|
+
#
|
|
62
|
+
def partial= partial
|
|
63
|
+
@partial = partial if @partial.nil?
|
|
64
|
+
end
|
|
65
|
+
def partial?
|
|
66
|
+
!@similar && @partial
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
# If the text ends with *, partialize it. If with ", don't.
|
|
70
|
+
#
|
|
71
|
+
@@no_partial = /\"$/
|
|
72
|
+
@@partial = /[\*]$/
|
|
73
|
+
def partialize
|
|
74
|
+
self.partial = false and return if @text =~ @@no_partial
|
|
75
|
+
self.partial = true if @text =~ @@partial
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
# If the text ends with ~ similarize it. If with ", don't.
|
|
79
|
+
#
|
|
80
|
+
@@no_similar = /\"$/
|
|
81
|
+
@@similar = /[~]$/
|
|
82
|
+
def similarize
|
|
83
|
+
self.similar = false and return if @text =~ @@no_similar
|
|
84
|
+
self.similar = true if @text =~ @@similar
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
def similar?
|
|
88
|
+
@similar
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
# Normalizes this token's text.
|
|
92
|
+
#
|
|
93
|
+
@@illegals = /["*~]/
|
|
94
|
+
def remove_illegals
|
|
95
|
+
@text.gsub! @@illegals, '' unless @text.blank?
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
# TODO Think about these, remove illegals and normalize...
|
|
99
|
+
#
|
|
100
|
+
|
|
101
|
+
# Visitor for tokenizer.
|
|
102
|
+
#
|
|
103
|
+
# TODO Rewrite!!!
|
|
104
|
+
#
|
|
105
|
+
def tokenize_with tokenizer
|
|
106
|
+
@text = tokenizer.normalize @text
|
|
107
|
+
end
|
|
108
|
+
# TODO spec!
|
|
109
|
+
#
|
|
110
|
+
# TODO Rewrite!!
|
|
111
|
+
#
|
|
112
|
+
def tokenized tokenizer
|
|
113
|
+
tokenizer.tokenize(@text.to_s).each do |text|
|
|
114
|
+
yield text
|
|
115
|
+
end
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
# Returns an array of possible combinations.
|
|
119
|
+
#
|
|
120
|
+
def possible_combinations_in type
|
|
121
|
+
type.possible_combinations self
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
#
|
|
125
|
+
#
|
|
126
|
+
def from token
|
|
127
|
+
new_token = token.dup
|
|
128
|
+
new_token.instance_variable_set :@text, @text
|
|
129
|
+
new_token.instance_variable_set :@partial, @partial
|
|
130
|
+
new_token.instance_variable_set :@original, @original
|
|
131
|
+
new_token.instance_variable_set :@qualifier, @qualifier
|
|
132
|
+
# TODO
|
|
133
|
+
#
|
|
134
|
+
# token.instance_variable_set :@similarity, @similarity
|
|
135
|
+
new_token
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
# TODO Rewrite, also next_similar.
|
|
139
|
+
#
|
|
140
|
+
def next category
|
|
141
|
+
token = from self
|
|
142
|
+
token if token.next_similar category.bundle_for(token)
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
# Sets and returns the next similar word.
|
|
146
|
+
#
|
|
147
|
+
def next_similar bundle
|
|
148
|
+
@text = similarity(bundle).next if similar?
|
|
149
|
+
rescue StopIteration => stop_iteration
|
|
150
|
+
# reset_similar # TODO
|
|
151
|
+
nil # TODO
|
|
152
|
+
end
|
|
153
|
+
# Lazy similar reader.
|
|
154
|
+
#
|
|
155
|
+
def similarity bundle = nil
|
|
156
|
+
@similarity || @similarity = generate_similarity_for(bundle)
|
|
157
|
+
end
|
|
158
|
+
# Returns an enumerator that traverses over the similar.
|
|
159
|
+
#
|
|
160
|
+
def generate_similarity_for bundle
|
|
161
|
+
(bundle.similar(@text) || []).each
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
# Generates a solr term from this token.
|
|
165
|
+
#
|
|
166
|
+
# E.g. "name:heroes~0.75"
|
|
167
|
+
#
|
|
168
|
+
@@solr_fuzzy_mapping = {
|
|
169
|
+
1 => :'',
|
|
170
|
+
2 => :'',
|
|
171
|
+
3 => :'',
|
|
172
|
+
4 => :'~0.74',
|
|
173
|
+
5 => :'~0.78',
|
|
174
|
+
6 => :'~0.81',
|
|
175
|
+
7 => :'~0.83',
|
|
176
|
+
8 => :'~0.85',
|
|
177
|
+
9 => :'~0.87',
|
|
178
|
+
10 => :'~0.89'
|
|
179
|
+
}
|
|
180
|
+
@@solr_fuzzy_mapping.default = :'~0.9'
|
|
181
|
+
def to_solr
|
|
182
|
+
blank? ? '' : (to_s + @@solr_fuzzy_mapping[@text.size].to_s)
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
#
|
|
186
|
+
#
|
|
187
|
+
def to_result
|
|
188
|
+
[@original, @text]
|
|
189
|
+
end
|
|
190
|
+
|
|
191
|
+
# Displays the qualifier text and the text, joined.
|
|
192
|
+
#
|
|
193
|
+
# e.g. name:meier
|
|
194
|
+
#
|
|
195
|
+
def to_s
|
|
196
|
+
[@qualifier, @text].compact.join ':'
|
|
197
|
+
end
|
|
198
|
+
|
|
199
|
+
private
|
|
200
|
+
|
|
201
|
+
# Splits text into a qualifier and text.
|
|
202
|
+
#
|
|
203
|
+
# Returns [qualifier, text].
|
|
204
|
+
#
|
|
205
|
+
def split unqualified_text
|
|
206
|
+
qualifier, text = (unqualified_text || '').split(':', 2)
|
|
207
|
+
if text.blank?
|
|
208
|
+
[nil, (qualifier || '')]
|
|
209
|
+
else
|
|
210
|
+
[qualifier, text]
|
|
211
|
+
end
|
|
212
|
+
end
|
|
213
|
+
|
|
214
|
+
end
|
|
215
|
+
end
|