picky 0.0.0 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/picky +14 -0
- data/lib/bundling.rb +10 -0
- data/lib/constants.rb +9 -0
- data/lib/deployment.rb +212 -0
- data/lib/picky/application.rb +40 -0
- data/lib/picky/cacher/convenience.rb +3 -0
- data/lib/picky/cacher/generator.rb +17 -0
- data/lib/picky/cacher/partial/default.rb +7 -0
- data/lib/picky/cacher/partial/none.rb +19 -0
- data/lib/picky/cacher/partial/strategy.rb +7 -0
- data/lib/picky/cacher/partial/subtoken.rb +91 -0
- data/lib/picky/cacher/partial_generator.rb +15 -0
- data/lib/picky/cacher/similarity/default.rb +7 -0
- data/lib/picky/cacher/similarity/double_levenshtone.rb +73 -0
- data/lib/picky/cacher/similarity/none.rb +25 -0
- data/lib/picky/cacher/similarity/strategy.rb +7 -0
- data/lib/picky/cacher/similarity_generator.rb +15 -0
- data/lib/picky/cacher/weights/default.rb +7 -0
- data/lib/picky/cacher/weights/logarithmic.rb +39 -0
- data/lib/picky/cacher/weights/strategy.rb +7 -0
- data/lib/picky/cacher/weights_generator.rb +15 -0
- data/lib/picky/configuration/configuration.rb +13 -0
- data/lib/picky/configuration/field.rb +68 -0
- data/lib/picky/configuration/indexes.rb +60 -0
- data/lib/picky/configuration/queries.rb +32 -0
- data/lib/picky/configuration/type.rb +52 -0
- data/lib/picky/cores.rb +101 -0
- data/lib/picky/db/configuration.rb +23 -0
- data/lib/picky/ext/ruby19/extconf.rb +7 -0
- data/lib/picky/ext/ruby19/performant.c +339 -0
- data/lib/picky/extensions/array.rb +45 -0
- data/lib/picky/extensions/hash.rb +11 -0
- data/lib/picky/extensions/module.rb +15 -0
- data/lib/picky/extensions/symbol.rb +18 -0
- data/lib/picky/generator.rb +156 -0
- data/lib/picky/helpers/cache.rb +23 -0
- data/lib/picky/helpers/gc.rb +11 -0
- data/lib/picky/helpers/measuring.rb +45 -0
- data/lib/picky/helpers/search.rb +27 -0
- data/lib/picky/index/bundle.rb +328 -0
- data/lib/picky/index/category.rb +109 -0
- data/lib/picky/index/combined.rb +38 -0
- data/lib/picky/index/type.rb +30 -0
- data/lib/picky/indexers/base.rb +77 -0
- data/lib/picky/indexers/default.rb +3 -0
- data/lib/picky/indexers/field.rb +13 -0
- data/lib/picky/indexers/no_source_specified_error.rb +5 -0
- data/lib/picky/indexers/solr.rb +60 -0
- data/lib/picky/indexes.rb +180 -0
- data/lib/picky/initializers/ext.rb +6 -0
- data/lib/picky/initializers/mysql.rb +22 -0
- data/lib/picky/loader.rb +287 -0
- data/lib/picky/loggers/search.rb +19 -0
- data/lib/picky/performant/array.rb +23 -0
- data/lib/picky/query/allocation.rb +82 -0
- data/lib/picky/query/allocations.rb +131 -0
- data/lib/picky/query/base.rb +124 -0
- data/lib/picky/query/combination.rb +69 -0
- data/lib/picky/query/combinations.rb +106 -0
- data/lib/picky/query/combinator.rb +92 -0
- data/lib/picky/query/full.rb +15 -0
- data/lib/picky/query/live.rb +22 -0
- data/lib/picky/query/qualifiers.rb +73 -0
- data/lib/picky/query/solr.rb +77 -0
- data/lib/picky/query/token.rb +215 -0
- data/lib/picky/query/tokens.rb +102 -0
- data/lib/picky/query/weigher.rb +159 -0
- data/lib/picky/query/weights.rb +55 -0
- data/lib/picky/rack/harakiri.rb +37 -0
- data/lib/picky/results/base.rb +103 -0
- data/lib/picky/results/full.rb +19 -0
- data/lib/picky/results/live.rb +19 -0
- data/lib/picky/routing.rb +165 -0
- data/lib/picky/signals.rb +11 -0
- data/lib/picky/solr/schema_generator.rb +73 -0
- data/lib/picky/sources/base.rb +19 -0
- data/lib/picky/sources/csv.rb +30 -0
- data/lib/picky/sources/db.rb +77 -0
- data/lib/picky/tokenizers/base.rb +130 -0
- data/lib/picky/tokenizers/default.rb +3 -0
- data/lib/picky/tokenizers/index.rb +73 -0
- data/lib/picky/tokenizers/query.rb +70 -0
- data/lib/picky/umlaut_substituter.rb +21 -0
- data/lib/picky-tasks.rb +6 -0
- data/lib/picky.rb +18 -0
- data/lib/tasks/application.rake +5 -0
- data/lib/tasks/cache.rake +53 -0
- data/lib/tasks/framework.rake +4 -0
- data/lib/tasks/index.rake +29 -0
- data/lib/tasks/server.rake +48 -0
- data/lib/tasks/shortcuts.rake +13 -0
- data/lib/tasks/solr.rake +36 -0
- data/lib/tasks/spec.rake +11 -0
- data/lib/tasks/statistics.rake +13 -0
- data/lib/tasks/try.rake +29 -0
- data/prototype_project/Gemfile +23 -0
- data/prototype_project/Rakefile +1 -0
- data/prototype_project/app/README +6 -0
- data/prototype_project/app/application.rb +50 -0
- data/prototype_project/app/application.ru +29 -0
- data/prototype_project/app/db.yml +10 -0
- data/prototype_project/app/logging.rb +20 -0
- data/prototype_project/app/unicorn.ru +10 -0
- data/prototype_project/log/README +1 -0
- data/prototype_project/script/console +34 -0
- data/prototype_project/tmp/README +0 -0
- data/prototype_project/tmp/pids/README +0 -0
- data/spec/ext/performant_spec.rb +64 -0
- data/spec/lib/application_spec.rb +61 -0
- data/spec/lib/cacher/partial/subtoken_spec.rb +89 -0
- data/spec/lib/cacher/partial_generator_spec.rb +35 -0
- data/spec/lib/cacher/similarity/double_levenshtone_spec.rb +60 -0
- data/spec/lib/cacher/similarity/none_spec.rb +23 -0
- data/spec/lib/cacher/similarity_generator_spec.rb +22 -0
- data/spec/lib/cacher/weights/logarithmic_spec.rb +30 -0
- data/spec/lib/cacher/weights_generator_spec.rb +21 -0
- data/spec/lib/configuration/configuration_spec.rb +38 -0
- data/spec/lib/configuration/type_spec.rb +49 -0
- data/spec/lib/configuration_spec.rb +8 -0
- data/spec/lib/cores_spec.rb +65 -0
- data/spec/lib/extensions/array_spec.rb +37 -0
- data/spec/lib/extensions/hash_spec.rb +11 -0
- data/spec/lib/extensions/module_spec.rb +27 -0
- data/spec/lib/extensions/symbol_spec.rb +85 -0
- data/spec/lib/generator_spec.rb +135 -0
- data/spec/lib/helpers/cache_spec.rb +35 -0
- data/spec/lib/helpers/gc_spec.rb +71 -0
- data/spec/lib/helpers/measuring_spec.rb +18 -0
- data/spec/lib/helpers/search_spec.rb +50 -0
- data/spec/lib/index/bundle_partial_generation_speed_spec.rb +47 -0
- data/spec/lib/index/bundle_spec.rb +260 -0
- data/spec/lib/index/category_spec.rb +203 -0
- data/spec/lib/indexers/base_spec.rb +73 -0
- data/spec/lib/indexers/field_spec.rb +20 -0
- data/spec/lib/loader_spec.rb +48 -0
- data/spec/lib/loggers/search_spec.rb +19 -0
- data/spec/lib/performant/array_spec.rb +13 -0
- data/spec/lib/query/allocation_spec.rb +194 -0
- data/spec/lib/query/allocations_spec.rb +336 -0
- data/spec/lib/query/base_spec.rb +104 -0
- data/spec/lib/query/combination_spec.rb +90 -0
- data/spec/lib/query/combinations_spec.rb +83 -0
- data/spec/lib/query/combinator_spec.rb +112 -0
- data/spec/lib/query/full_spec.rb +22 -0
- data/spec/lib/query/live_spec.rb +61 -0
- data/spec/lib/query/qualifiers_spec.rb +31 -0
- data/spec/lib/query/solr_spec.rb +51 -0
- data/spec/lib/query/token_spec.rb +297 -0
- data/spec/lib/query/tokens_spec.rb +189 -0
- data/spec/lib/query/weights_spec.rb +47 -0
- data/spec/lib/results/base_spec.rb +233 -0
- data/spec/lib/routing_spec.rb +318 -0
- data/spec/lib/solr/schema_generator_spec.rb +42 -0
- data/spec/lib/sources/db_spec.rb +91 -0
- data/spec/lib/tokenizers/base_spec.rb +61 -0
- data/spec/lib/tokenizers/index_spec.rb +51 -0
- data/spec/lib/tokenizers/query_spec.rb +105 -0
- data/spec/lib/umlaut_substituter_spec.rb +84 -0
- data/spec/specific/speed_spec.rb +55 -0
- metadata +371 -15
- data/README.textile +0 -9
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
# encoding: utf-8
|
|
2
|
+
#
|
|
3
|
+
module Query
|
|
4
|
+
|
|
5
|
+
# This class primarily handles switching through similar token constellations.
|
|
6
|
+
#
|
|
7
|
+
class Tokens
|
|
8
|
+
|
|
9
|
+
#
|
|
10
|
+
#
|
|
11
|
+
cattr_accessor :maximum
|
|
12
|
+
self.maximum = 5
|
|
13
|
+
|
|
14
|
+
# Basically delegates to its internal tokens array.
|
|
15
|
+
#
|
|
16
|
+
self.delegate *[Enumerable.instance_methods, :slice!, :[], :uniq!, :last, :reject!, :length, :size, :empty?, :each, :exit, { :to => :@tokens }].flatten
|
|
17
|
+
|
|
18
|
+
#
|
|
19
|
+
#
|
|
20
|
+
def initialize tokens = []
|
|
21
|
+
@tokens = tokens
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
#
|
|
25
|
+
#
|
|
26
|
+
def tokenize_with tokenizer
|
|
27
|
+
@tokens.each { |token| token.tokenize_with(tokenizer) }
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
# Generates an array in the form of
|
|
31
|
+
# [
|
|
32
|
+
# [combination], # of token 1
|
|
33
|
+
# [combination, combination, combination], # of token 2
|
|
34
|
+
# [combination, combination] # of token 3
|
|
35
|
+
# ]
|
|
36
|
+
#
|
|
37
|
+
# TODO If we want token behaviour defined per Query, we can
|
|
38
|
+
# compact! here
|
|
39
|
+
#
|
|
40
|
+
def possible_combinations_in type
|
|
41
|
+
@tokens.inject([]) do |combinations, token|
|
|
42
|
+
combinations << token.possible_combinations_in(type)
|
|
43
|
+
end
|
|
44
|
+
# TODO compact! if ignore_unassigned_tokens
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# Makes the last of the tokens partial.
|
|
48
|
+
#
|
|
49
|
+
def partialize_last
|
|
50
|
+
@tokens.last.partial = true unless empty?
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# Caps the tokens to the maximum.
|
|
54
|
+
#
|
|
55
|
+
# TODO parametrize?
|
|
56
|
+
#
|
|
57
|
+
def cap
|
|
58
|
+
@tokens.slice!(@@maximum..-1) if cap?
|
|
59
|
+
end
|
|
60
|
+
def cap?
|
|
61
|
+
@tokens.size > @@maximum
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
# Rejects blank tokens.
|
|
65
|
+
#
|
|
66
|
+
def reject
|
|
67
|
+
@tokens.reject! &:blank?
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
# Switches the tokens
|
|
71
|
+
#
|
|
72
|
+
# TODO
|
|
73
|
+
#
|
|
74
|
+
def next_similar
|
|
75
|
+
@tokens.first.next_similar unless empty?
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
# Returns a solr query.
|
|
79
|
+
#
|
|
80
|
+
def to_solr_query
|
|
81
|
+
@tokens.map(&:to_solr).join ' '
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
#
|
|
85
|
+
#
|
|
86
|
+
def originals
|
|
87
|
+
@tokens.map(&:original)
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
# Just join the token original texts.
|
|
91
|
+
#
|
|
92
|
+
def to_s
|
|
93
|
+
originals.join ' '
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
# def to_a
|
|
97
|
+
# @tokens
|
|
98
|
+
# end
|
|
99
|
+
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
end
|
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
module Query
|
|
2
|
+
|
|
3
|
+
# Weighs the given tokens, generates Allocations -> Allocation -> Combinations.
|
|
4
|
+
#
|
|
5
|
+
class Weigher
|
|
6
|
+
|
|
7
|
+
attr_reader :indexes
|
|
8
|
+
|
|
9
|
+
# A weigher has a number of typed indexes, for which it generates allocations.
|
|
10
|
+
#
|
|
11
|
+
def initialize types
|
|
12
|
+
@indexes = types
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
#
|
|
16
|
+
#
|
|
17
|
+
def allocations_for tokens
|
|
18
|
+
Allocations.new(indexes.inject([]) do |previous_allocations, index|
|
|
19
|
+
# Expand the combinations.
|
|
20
|
+
#
|
|
21
|
+
possible_combinations = tokens.possible_combinations_in index
|
|
22
|
+
# Optimization for ignoring tokens that allocate to nothing and
|
|
23
|
+
# can be ignored.
|
|
24
|
+
# For example in a context search, where "florian" is not
|
|
25
|
+
# mapped to city, zip, or category.
|
|
26
|
+
#
|
|
27
|
+
possible_combinations.compact!
|
|
28
|
+
expanded_combinations = expand_combinations_from possible_combinations
|
|
29
|
+
|
|
30
|
+
# TODO Rewrite.
|
|
31
|
+
#
|
|
32
|
+
# expanded_combinations.map! { |expanded_combination| Combinations.new(index, expanded_combination) }
|
|
33
|
+
|
|
34
|
+
if expanded_combinations.empty?
|
|
35
|
+
previous_allocations
|
|
36
|
+
else
|
|
37
|
+
# The recombination part, where
|
|
38
|
+
# [
|
|
39
|
+
# [a,a,b,b,c,c]
|
|
40
|
+
# [d,e,d,e,d,e]
|
|
41
|
+
# ]
|
|
42
|
+
# becomes
|
|
43
|
+
# [
|
|
44
|
+
# [a,d],
|
|
45
|
+
# [a,e],
|
|
46
|
+
# [b,d],
|
|
47
|
+
# [b,e],
|
|
48
|
+
# [c,d],
|
|
49
|
+
# [c,e]
|
|
50
|
+
# ]
|
|
51
|
+
#
|
|
52
|
+
expanded_combinations = expanded_combinations.shift.zip(*expanded_combinations)
|
|
53
|
+
|
|
54
|
+
# Wrap into a real combination.
|
|
55
|
+
#
|
|
56
|
+
expanded_combinations.map! { |expanded_combination| Combinations.new(index, expanded_combination) }
|
|
57
|
+
|
|
58
|
+
# Add the possible allocations to the ones we already have.
|
|
59
|
+
#
|
|
60
|
+
previous_allocations + expanded_combinations.map(&:pack_into_allocation)
|
|
61
|
+
end
|
|
62
|
+
end)
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
# This is the core of the search engine.
|
|
66
|
+
#
|
|
67
|
+
# Gets an array of
|
|
68
|
+
# [
|
|
69
|
+
# [<combinations for token1>],
|
|
70
|
+
# [<combinations for token2>],
|
|
71
|
+
# [<combinations for token3>]
|
|
72
|
+
# ]
|
|
73
|
+
#
|
|
74
|
+
# Generates all possible allocations of combinations.
|
|
75
|
+
# [
|
|
76
|
+
# [first combination of token1, first c of t2, first c of t3],
|
|
77
|
+
# [first combination of token1, first c of t2, second c of t3]
|
|
78
|
+
# ...
|
|
79
|
+
# ]
|
|
80
|
+
#
|
|
81
|
+
# Generates all possible combinations of array elements:
|
|
82
|
+
# [1,2,3] x [a,b,c] x [k,l,m] => [[1,a,k], [1,a,l], [1,a,m], [1,b,k], [1,b,l], [1,b,m], [1,c,k], ..., [3,c,m]]
|
|
83
|
+
# Note: Also calculates the weights and sorts them accordingly.
|
|
84
|
+
#
|
|
85
|
+
# Note: This is a heavily optimized ruby version.
|
|
86
|
+
#
|
|
87
|
+
# Works like this:
|
|
88
|
+
# [1,2,3], [a,b,c], [k,l,m] are expanded to
|
|
89
|
+
# group mult: 1
|
|
90
|
+
# <- single mult ->
|
|
91
|
+
# [1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3] = 27 elements
|
|
92
|
+
# group mult: 3
|
|
93
|
+
# <- -> s/m
|
|
94
|
+
# [a,a,a,b,b,b,c,c,c,a,a,a,b,b,b,c,c,c,a,a,a,b,b,b,c,c,c] = 27 elements
|
|
95
|
+
# group mult: 9
|
|
96
|
+
# <> s/m
|
|
97
|
+
# [k,l,m,k,l,m,k,l,m,k,l,m,k,l,m,k,l,m,k,l,m,k,l,m,k,l,m] = 27 elements
|
|
98
|
+
# The array elements are then combined by index (i.e. vertically) to get all combinations.
|
|
99
|
+
#
|
|
100
|
+
# Note: Of course I could split this method up into smaller
|
|
101
|
+
# ones, but I guess I am a bit sentimental.
|
|
102
|
+
#
|
|
103
|
+
def expand_combinations_from possible_combinations
|
|
104
|
+
# Generate the first multiplicator "with which" (well, not quite) to multiply the smallest amount of combinations.
|
|
105
|
+
#
|
|
106
|
+
# TODO How does this work if an element has size 0? Since below we account for size 0.
|
|
107
|
+
# Should we even continue if an element has size 0?
|
|
108
|
+
# This means one of the tokens cannot be allocated.
|
|
109
|
+
#
|
|
110
|
+
single_mult = possible_combinations.inject(1) { |total, combinations| total * combinations.size }
|
|
111
|
+
|
|
112
|
+
# Initialize a group multiplicator.
|
|
113
|
+
#
|
|
114
|
+
group_mult = 1
|
|
115
|
+
|
|
116
|
+
possible_combinations.reject!(&:empty?)
|
|
117
|
+
|
|
118
|
+
# The expanding part to line up the combinations
|
|
119
|
+
# for later combination in allocations.
|
|
120
|
+
#
|
|
121
|
+
possible_combinations.collect! do |combinations|
|
|
122
|
+
|
|
123
|
+
# Get the size of the combinations of the first token.
|
|
124
|
+
#
|
|
125
|
+
combinations_size = combinations.size
|
|
126
|
+
|
|
127
|
+
# Special case: If there is no combination for one of the tokens.
|
|
128
|
+
# In that case, we just use the same single mult for
|
|
129
|
+
# the next iteration.
|
|
130
|
+
# If there are combinations, we divide the single mult
|
|
131
|
+
# by the number of combinations.
|
|
132
|
+
#
|
|
133
|
+
single_mult /= combinations_size unless combinations_size.zero?
|
|
134
|
+
|
|
135
|
+
# Expand each combination by the single mult:
|
|
136
|
+
# [a,b,c]
|
|
137
|
+
# [a,a,a, b,b,b, c,c,c]
|
|
138
|
+
# Then, expand the result by the group mult:
|
|
139
|
+
# [a,a,a,b,b,b,c,c,c, a,a,a,b,b,b,c,c,c, a,a,a,b,b,b,c,c,c]
|
|
140
|
+
#
|
|
141
|
+
combinations = combinations.inject([]) do |total, combination|
|
|
142
|
+
total + [combination]*single_mult
|
|
143
|
+
end * group_mult
|
|
144
|
+
|
|
145
|
+
# Multiply the group mult by the combinations size,
|
|
146
|
+
# since the next combinations' single mult is smaller
|
|
147
|
+
# and we need to adjust for that.
|
|
148
|
+
#
|
|
149
|
+
group_mult = group_mult * combinations_size
|
|
150
|
+
|
|
151
|
+
# Return the combinations.
|
|
152
|
+
#
|
|
153
|
+
combinations
|
|
154
|
+
end
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
end
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
module Query
|
|
2
|
+
|
|
3
|
+
# Calculates weights for certain combinations.
|
|
4
|
+
#
|
|
5
|
+
class Weights
|
|
6
|
+
|
|
7
|
+
#
|
|
8
|
+
#
|
|
9
|
+
def initialize weights = {}
|
|
10
|
+
@weights_cache = {}
|
|
11
|
+
@weights = prepare weights
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
# Get the category indexes for the given bonuses.
|
|
15
|
+
#
|
|
16
|
+
def prepare weights
|
|
17
|
+
weights
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
# Get the weight of an allocation.
|
|
21
|
+
#
|
|
22
|
+
# TODO Add a block to evaluate?
|
|
23
|
+
#
|
|
24
|
+
def weight_for clustered
|
|
25
|
+
@weights[clustered] || 0
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
# Returns an energy term E for allocation. this turns into a probability
|
|
29
|
+
# by P(allocation) = 1/Z * exp (-1/T * E(allocation)),
|
|
30
|
+
# where Z is the normalizing partition function
|
|
31
|
+
# sum_allocations exp(-1/T *E(allocation)), and T is a temperature constant.
|
|
32
|
+
# If T is high the distribution will be close to equally distributed.
|
|
33
|
+
# If T is low, the distribution will be the indicator function
|
|
34
|
+
# for min (E(allocation))…
|
|
35
|
+
#
|
|
36
|
+
# ...
|
|
37
|
+
#
|
|
38
|
+
# Just kidding. It's far more complicated than that. Ha ha ha ha ;)
|
|
39
|
+
#
|
|
40
|
+
include Helpers::Cache
|
|
41
|
+
def score combinations
|
|
42
|
+
# TODO Rewrite to use the category
|
|
43
|
+
#
|
|
44
|
+
categories = combinations.map { |combination| combination.bundle.category }.clustered_uniq
|
|
45
|
+
|
|
46
|
+
# TODO Caching even necessary?
|
|
47
|
+
#
|
|
48
|
+
cached @weights_cache, categories do
|
|
49
|
+
categories.map! &:name
|
|
50
|
+
weight_for categories
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
end
|
|
55
|
+
end
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# Simple Rack Middleware to kill Unicorns after X requests.
|
|
2
|
+
#
|
|
3
|
+
# Use as follows in e.g. your rackup File:
|
|
4
|
+
#
|
|
5
|
+
# Rack::Harakiri.after = 50
|
|
6
|
+
# use Rack::Harakiri
|
|
7
|
+
#
|
|
8
|
+
module Rack
|
|
9
|
+
class Harakiri
|
|
10
|
+
|
|
11
|
+
# Set the amount of requests before the Unicorn commits Harakiri.
|
|
12
|
+
#
|
|
13
|
+
cattr_accessor :after
|
|
14
|
+
|
|
15
|
+
def initialize app
|
|
16
|
+
@app = app
|
|
17
|
+
|
|
18
|
+
@requests = 0
|
|
19
|
+
@quit_after_requests = @@after || 50
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def call env
|
|
23
|
+
harakiri
|
|
24
|
+
@app.call env
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
# Checks to see if it is time to honorably retire.
|
|
28
|
+
#
|
|
29
|
+
# If yes, kills itself (Unicorn will answer the request, honorably).
|
|
30
|
+
#
|
|
31
|
+
def harakiri
|
|
32
|
+
@requests = @requests + 1
|
|
33
|
+
Process.kill(:QUIT, Process.pid) if @requests > @quit_after_requests
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
end
|
|
37
|
+
end
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
module Results
|
|
2
|
+
|
|
3
|
+
# This is the internal results object. Usually, to_marshal, or to_json is called on it.
|
|
4
|
+
#
|
|
5
|
+
class Base
|
|
6
|
+
|
|
7
|
+
# Duration is set by the query.
|
|
8
|
+
#
|
|
9
|
+
attr_writer :duration
|
|
10
|
+
attr_reader :allocations, :offset
|
|
11
|
+
|
|
12
|
+
# Takes instances of Query::Allocations as param.
|
|
13
|
+
#
|
|
14
|
+
def initialize allocations = nil
|
|
15
|
+
@allocations = allocations || Query::Allocations.new
|
|
16
|
+
@offset = 0
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def add more_results
|
|
20
|
+
@added = more_results
|
|
21
|
+
self
|
|
22
|
+
end
|
|
23
|
+
def added
|
|
24
|
+
@added || {}
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
#
|
|
28
|
+
#
|
|
29
|
+
def serialize
|
|
30
|
+
added.merge(:allocations => allocations.to_result,
|
|
31
|
+
:offset => offset,
|
|
32
|
+
:duration => duration,
|
|
33
|
+
:total => total)
|
|
34
|
+
end
|
|
35
|
+
# Convert to marshal format.
|
|
36
|
+
#
|
|
37
|
+
def to_marshal
|
|
38
|
+
Marshal.dump serialize
|
|
39
|
+
end
|
|
40
|
+
# Convert to json format.
|
|
41
|
+
#
|
|
42
|
+
def to_json options = {}
|
|
43
|
+
serialize.to_json options
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
# This starts the actual processing.
|
|
47
|
+
#
|
|
48
|
+
# Without this, the allocations are not processed,
|
|
49
|
+
# and no ids are calculated.
|
|
50
|
+
#
|
|
51
|
+
def prepare! offset = 0
|
|
52
|
+
@offset = offset
|
|
53
|
+
allocations.process! self.max_results, offset
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# Duration default is 0.
|
|
57
|
+
#
|
|
58
|
+
def duration
|
|
59
|
+
@duration || 0
|
|
60
|
+
end
|
|
61
|
+
# The total results. Delegates to the allocations.
|
|
62
|
+
#
|
|
63
|
+
# Caches.
|
|
64
|
+
#
|
|
65
|
+
def total
|
|
66
|
+
@total || @total = allocations.total || 0
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
# How many results are returned.
|
|
70
|
+
#
|
|
71
|
+
# Set in config using
|
|
72
|
+
# Results::Full.max_results = 20
|
|
73
|
+
#
|
|
74
|
+
class_inheritable_accessor :max_results
|
|
75
|
+
def max_results
|
|
76
|
+
self.class.max_results
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
# Convenience methods.
|
|
80
|
+
#
|
|
81
|
+
|
|
82
|
+
# Delegates to allocations.
|
|
83
|
+
#
|
|
84
|
+
def ids amount = 20
|
|
85
|
+
allocations.ids amount
|
|
86
|
+
end
|
|
87
|
+
# Gets an amout of random ids from the allocations.
|
|
88
|
+
#
|
|
89
|
+
# Note: Basically delegates to the allocations.
|
|
90
|
+
#
|
|
91
|
+
def random_ids amount = 1
|
|
92
|
+
allocations.random_ids amount
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
# Human readable log.
|
|
96
|
+
#
|
|
97
|
+
def to_log query
|
|
98
|
+
"|#{Time.now.to_s(:db)}|#{'%8f' % duration}|#{'%-50s' % query}|#{'%8d' % total}|#{'%4d' % offset}|#{'%2d' % allocations.size}|"
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
end
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
module Results
|
|
2
|
+
# Full results are limited to maximally 20 results (by default).
|
|
3
|
+
#
|
|
4
|
+
class Full < Base
|
|
5
|
+
|
|
6
|
+
self.max_results = 20
|
|
7
|
+
|
|
8
|
+
def to_log *args
|
|
9
|
+
?> + super
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
# The default response style for full results is to_marshal.
|
|
13
|
+
#
|
|
14
|
+
def to_response
|
|
15
|
+
to_marshal
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
end
|
|
19
|
+
end
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
module Results
|
|
2
|
+
# Live results are not returning any results.
|
|
3
|
+
#
|
|
4
|
+
class Live < Base
|
|
5
|
+
|
|
6
|
+
self.max_results = 0
|
|
7
|
+
|
|
8
|
+
def to_log *args
|
|
9
|
+
?. + super
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
# The default response style for live results is to_json.
|
|
13
|
+
#
|
|
14
|
+
def to_response
|
|
15
|
+
to_json
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
end
|
|
19
|
+
end
|
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
require 'rack/mount'
|
|
2
|
+
|
|
3
|
+
#
|
|
4
|
+
#
|
|
5
|
+
class Routing
|
|
6
|
+
|
|
7
|
+
@@defaults = {
|
|
8
|
+
:query_key => 'query'.freeze,
|
|
9
|
+
:offset_key => 'offset'.freeze,
|
|
10
|
+
:content_type => 'application/octet-stream'.freeze
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
def initialize
|
|
14
|
+
@defaults = @@defaults.dup
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
# #
|
|
18
|
+
# #
|
|
19
|
+
# def define_using &block
|
|
20
|
+
# reset_routes
|
|
21
|
+
# instance_eval &block
|
|
22
|
+
# routes.freeze
|
|
23
|
+
# end
|
|
24
|
+
|
|
25
|
+
#
|
|
26
|
+
#
|
|
27
|
+
def reset_routes
|
|
28
|
+
@routes = Rack::Mount::RouteSet.new
|
|
29
|
+
end
|
|
30
|
+
def routes
|
|
31
|
+
@routes || reset_routes
|
|
32
|
+
end
|
|
33
|
+
def freeze
|
|
34
|
+
routes.freeze
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
# Routing simply delegates to the route set to handle a request.
|
|
38
|
+
#
|
|
39
|
+
def call env
|
|
40
|
+
routes.call env
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
# Set the defaults.
|
|
44
|
+
#
|
|
45
|
+
# Options are:
|
|
46
|
+
# * :query_key => :query # default
|
|
47
|
+
# * :offset_key => :offset # default
|
|
48
|
+
#
|
|
49
|
+
# * :tokenizer => Tokenizers::Query.new # default
|
|
50
|
+
#
|
|
51
|
+
def defaults options = {}
|
|
52
|
+
@defaults[:query_key] = options[:query_key].to_s if options[:query_key]
|
|
53
|
+
@defaults[:offset_key] = options[:offset_key].to_s if options[:offset_key]
|
|
54
|
+
|
|
55
|
+
@defaults[:tokenizer] = options[:tokenizer] if options[:tokenizer]
|
|
56
|
+
@defaults[:content_type] = options[:content_type] if options[:content_type]
|
|
57
|
+
|
|
58
|
+
@defaults
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
#
|
|
62
|
+
#
|
|
63
|
+
def route url, query, route_options = {}
|
|
64
|
+
query.tokenizer = @defaults[:tokenizer] if @defaults[:tokenizer]
|
|
65
|
+
routes.add_route generate_app(query, route_options), default_options(url, route_options)
|
|
66
|
+
end
|
|
67
|
+
#
|
|
68
|
+
#
|
|
69
|
+
def live url, *indexes_and_options
|
|
70
|
+
route_options = Hash === indexes_and_options.last ? indexes_and_options.pop : {}
|
|
71
|
+
route url, Query::Live.new(*indexes_and_options), route_options
|
|
72
|
+
end
|
|
73
|
+
#
|
|
74
|
+
#
|
|
75
|
+
def full url, *indexes_and_options
|
|
76
|
+
route_options = Hash === indexes_and_options.last ? indexes_and_options.pop : {}
|
|
77
|
+
route url, Query::Full.new(*indexes_and_options), route_options
|
|
78
|
+
end
|
|
79
|
+
#
|
|
80
|
+
#
|
|
81
|
+
def root status
|
|
82
|
+
answer %r{^/$}, STATUSES[status]
|
|
83
|
+
end
|
|
84
|
+
#
|
|
85
|
+
#
|
|
86
|
+
def default status
|
|
87
|
+
answer nil, STATUSES[status]
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
# TODO Can Rack handle this for me?
|
|
93
|
+
#
|
|
94
|
+
# Note: Rack-mount already handles the 404.
|
|
95
|
+
#
|
|
96
|
+
STATUSES = {
|
|
97
|
+
200 => lambda { |_| [200, { 'Content-Type' => 'text/html', 'Content-Length' => '0' }, ['']] },
|
|
98
|
+
404 => lambda { |_| [404, { 'Content-Type' => 'text/html', 'Content-Length' => '0' }, ['']] }
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
#
|
|
102
|
+
#
|
|
103
|
+
def default_options url, route_options = {}
|
|
104
|
+
url = normalized url
|
|
105
|
+
|
|
106
|
+
options = { :request_method => 'GET' }.merge route_options
|
|
107
|
+
|
|
108
|
+
options[:path_info] = url if url
|
|
109
|
+
|
|
110
|
+
options.delete :content_type # TODO
|
|
111
|
+
|
|
112
|
+
query_params = options.delete :query
|
|
113
|
+
options[:query_string] = %r{#{generate_query_string(query_params)}} if query_params
|
|
114
|
+
|
|
115
|
+
options
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
#
|
|
119
|
+
#
|
|
120
|
+
def generate_query_string query_params
|
|
121
|
+
raise "At least one query string condition is needed." if query_params.size.zero?
|
|
122
|
+
raise "Too many query param conditions (only 1 allowed): #{query_params}" if query_params.size > 1
|
|
123
|
+
k, v = query_params.first
|
|
124
|
+
"#{k}=#{v}"
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
# Generates a rack app for the given query.
|
|
128
|
+
#
|
|
129
|
+
def generate_app query, options = {}
|
|
130
|
+
query_key = options[:query_key] || @defaults[:query_key]
|
|
131
|
+
content_type = options[:content_type] || @defaults[:content_type]
|
|
132
|
+
lambda do |env|
|
|
133
|
+
params = Rack::Request.new(env).params
|
|
134
|
+
|
|
135
|
+
results = query.search_with_text *extracted(params)
|
|
136
|
+
|
|
137
|
+
PickyLog.log results.to_log(params[query_key]) # TODO Save the original query in the results object.
|
|
138
|
+
|
|
139
|
+
respond_with results.to_response, content_type
|
|
140
|
+
end
|
|
141
|
+
end
|
|
142
|
+
UTF8_STRING = 'UTF-8'.freeze
|
|
143
|
+
def extracted params
|
|
144
|
+
[
|
|
145
|
+
# query is encoded in ASCII
|
|
146
|
+
#
|
|
147
|
+
params[@defaults[:query_key]] && params[@defaults[:query_key]].force_encoding(UTF8_STRING),
|
|
148
|
+
params[@defaults[:offset_key]] && params[@defaults[:offset_key]].to_i || 0
|
|
149
|
+
]
|
|
150
|
+
end
|
|
151
|
+
def respond_with response, content_type
|
|
152
|
+
[200, { 'Content-Type' => content_type, 'Content-Length' => response.size.to_s, }, [response]]
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
# Setup a route that answers using the given app.
|
|
156
|
+
#
|
|
157
|
+
def answer url = nil, app = nil
|
|
158
|
+
routes.add_route (app || STATUSES[200]), default_options(url)
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
def normalized url
|
|
162
|
+
String === url ? %r{#{url}} : url
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
end
|