picky 0.3.0 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/picky/application.rb +2 -2
- data/lib/picky/cacher/partial/default.rb +1 -1
- data/lib/picky/configuration/field.rb +8 -10
- data/lib/picky/configuration/indexes.rb +6 -6
- data/lib/picky/configuration/queries.rb +4 -3
- data/lib/picky/cores.rb +2 -2
- data/lib/picky/extensions/array.rb +2 -12
- data/lib/picky/generator.rb +27 -4
- data/lib/picky/index/bundle.rb +5 -41
- data/lib/picky/index/bundle_checker.rb +58 -0
- data/lib/picky/index/type.rb +4 -1
- data/lib/picky/index/wrappers/exact_first.rb +57 -0
- data/lib/picky/indexes.rb +12 -19
- data/lib/picky/loader.rb +7 -8
- data/lib/picky/query/allocation.rb +1 -1
- data/lib/picky/query/combinations.rb +9 -6
- data/lib/picky/query/combinator.rb +11 -5
- data/lib/picky/rack/harakiri.rb +1 -1
- data/lib/picky/results/base.rb +4 -12
- data/lib/picky/results/live.rb +0 -6
- data/lib/picky/routing.rb +17 -17
- data/lib/picky/sources/csv.rb +1 -2
- data/lib/picky/sources/db.rb +0 -1
- data/lib/picky/sources/delicious.rb +41 -0
- data/lib/picky/tokenizers/base.rb +52 -43
- data/lib/picky/tokenizers/default/index.rb +7 -0
- data/lib/picky/tokenizers/default/query.rb +7 -0
- data/lib/picky/tokenizers/index.rb +0 -9
- data/lib/picky/tokenizers/query.rb +0 -9
- data/lib/tasks/application.rake +1 -1
- data/lib/tasks/cache.rake +41 -48
- data/lib/tasks/framework.rake +1 -1
- data/lib/tasks/index.rake +22 -12
- data/lib/tasks/server.rake +3 -3
- data/lib/tasks/shortcuts.rake +9 -2
- data/lib/tasks/statistics.rake +8 -8
- data/lib/tasks/try.rake +4 -2
- data/project_prototype/Gemfile +1 -1
- data/project_prototype/app/application.rb +7 -3
- data/spec/lib/cacher/partial/default_spec.rb +1 -1
- data/spec/lib/cacher/partial/none_spec.rb +12 -0
- data/spec/lib/cacher/partial/subtoken_spec.rb +29 -1
- data/spec/lib/configuration/field_spec.rb +162 -3
- data/spec/lib/configuration/indexes_spec.rb +150 -0
- data/spec/lib/cores_spec.rb +43 -0
- data/spec/lib/extensions/module_spec.rb +27 -16
- data/spec/lib/generator_spec.rb +3 -3
- data/spec/lib/index/bundle_checker_spec.rb +67 -0
- data/spec/lib/index/bundle_spec.rb +0 -50
- data/spec/lib/index/type_spec.rb +47 -0
- data/spec/lib/index/wrappers/exact_first_spec.rb +95 -0
- data/spec/lib/indexers/base_spec.rb +18 -2
- data/spec/lib/loader_spec.rb +21 -1
- data/spec/lib/query/allocation_spec.rb +25 -0
- data/spec/lib/query/base_spec.rb +37 -0
- data/spec/lib/query/combination_spec.rb +10 -1
- data/spec/lib/query/combinations_spec.rb +82 -3
- data/spec/lib/query/combinator_spec.rb +45 -0
- data/spec/lib/query/token_spec.rb +24 -0
- data/spec/lib/rack/harakiri_spec.rb +28 -0
- data/spec/lib/results/base_spec.rb +24 -0
- data/spec/lib/results/live_spec.rb +15 -0
- data/spec/lib/routing_spec.rb +5 -0
- data/spec/lib/sources/db_spec.rb +31 -1
- data/spec/lib/sources/delicious_spec.rb +75 -0
- data/spec/lib/tokenizers/base_spec.rb +160 -49
- data/spec/lib/tokenizers/default/index_spec.rb +11 -0
- data/spec/lib/tokenizers/default/query_spec.rb +11 -0
- metadata +26 -5
- data/lib/picky/index/combined.rb +0 -45
- data/lib/picky/tokenizers/default.rb +0 -3
data/lib/picky/application.rb
CHANGED
@@ -34,7 +34,7 @@ class Application
|
|
34
34
|
#
|
35
35
|
delegate :route, :root, :to => :routing
|
36
36
|
|
37
|
-
#
|
37
|
+
# TODO Rename to default_indexing?
|
38
38
|
#
|
39
39
|
def indexing
|
40
40
|
@indexing ||= Configuration::Indexes.new
|
@@ -44,7 +44,7 @@ class Application
|
|
44
44
|
end
|
45
45
|
delegate :type, :field, :to => :indexing
|
46
46
|
|
47
|
-
#
|
47
|
+
# TODO Rename to default_querying?
|
48
48
|
#
|
49
49
|
def querying
|
50
50
|
@queries ||= Configuration::Queries.new
|
@@ -4,19 +4,20 @@ module Configuration
|
|
4
4
|
# (title is a category of a books index, for example).
|
5
5
|
#
|
6
6
|
class Field
|
7
|
-
attr_reader :name, :indexed_name, :virtual
|
7
|
+
attr_reader :name, :indexed_name, :virtual, :tokenizer
|
8
8
|
attr_accessor :type # convenience
|
9
|
-
def initialize name, options = {}
|
9
|
+
def initialize name, tokenizer, options = {}
|
10
10
|
@name = name.to_sym
|
11
|
+
@tokenizer = tokenizer
|
11
12
|
|
12
13
|
# TODO Dup the options?
|
13
|
-
|
14
|
+
|
15
|
+
@source = options.delete :source
|
14
16
|
|
15
17
|
@indexer_class = options.delete(:indexer) || Indexers::Default
|
16
|
-
@tokenizer_class = options.delete(:tokenizer) || Tokenizers::Index # Default
|
17
18
|
|
18
|
-
@indexed_name
|
19
|
-
@virtual
|
19
|
+
@indexed_name = options.delete(:indexed_field) || name # TODO Rename to indexed_as?
|
20
|
+
@virtual = options.delete(:virtual) || false
|
20
21
|
|
21
22
|
qualifiers = generate_qualifiers_from options
|
22
23
|
Query::Qualifiers.add(name, qualifiers) if qualifiers
|
@@ -48,7 +49,7 @@ module Configuration
|
|
48
49
|
File.join search_index_root, PICKY_ENVIRONMENT, type.name.to_s
|
49
50
|
end
|
50
51
|
def search_index_file_name
|
51
|
-
File.join cache_directory, "#{
|
52
|
+
File.join cache_directory, "prepared_#{name}_index.txt"
|
52
53
|
end
|
53
54
|
def index
|
54
55
|
prepare_cache_directory
|
@@ -64,9 +65,6 @@ module Configuration
|
|
64
65
|
def indexer
|
65
66
|
@indexer || @indexer = @indexer_class.new(type, self)
|
66
67
|
end
|
67
|
-
def tokenizer
|
68
|
-
@tokenizer || @tokenizer = @tokenizer_class.new
|
69
|
-
end
|
70
68
|
def virtual?
|
71
69
|
!!virtual
|
72
70
|
end
|
@@ -10,15 +10,13 @@ module Configuration
|
|
10
10
|
@types = []
|
11
11
|
end
|
12
12
|
|
13
|
-
|
14
|
-
|
15
|
-
def default_index
|
16
|
-
Tokenizers::Index
|
13
|
+
def default_tokenizer
|
14
|
+
@default_tokenizer ||= Tokenizers::Default::Index
|
17
15
|
end
|
18
16
|
|
19
17
|
# Delegates
|
20
18
|
#
|
21
|
-
delegate :removes_characters, :contracts_expressions, :stopwords, :splits_text_on, :normalizes_words, :removes_characters_after_splitting, :to => :
|
19
|
+
delegate :removes_characters, :contracts_expressions, :stopwords, :splits_text_on, :normalizes_words, :removes_characters_after_splitting, :to => :default_tokenizer
|
22
20
|
|
23
21
|
# TODO Rewrite all this configuration handling.
|
24
22
|
#
|
@@ -32,7 +30,9 @@ module Configuration
|
|
32
30
|
generated
|
33
31
|
end
|
34
32
|
def field name, options = {}
|
35
|
-
|
33
|
+
tokenizer = options[:tokenizer] || default_tokenizer
|
34
|
+
|
35
|
+
Field.new name, tokenizer, options
|
36
36
|
end
|
37
37
|
|
38
38
|
#
|
@@ -6,10 +6,11 @@ module Configuration
|
|
6
6
|
|
7
7
|
#
|
8
8
|
#
|
9
|
-
def
|
10
|
-
Tokenizers::Query
|
9
|
+
def default_tokenizer
|
10
|
+
@default_tokenizer ||= Tokenizers::Default::Query
|
11
11
|
end
|
12
|
-
|
12
|
+
|
13
|
+
delegate :removes_characters, :contracts_expressions, :stopwords, :splits_text_on, :normalizes_words, :removes_characters_after_splitting, :to => :default_tokenizer
|
13
14
|
|
14
15
|
# Delegates.
|
15
16
|
#
|
data/lib/picky/cores.rb
CHANGED
@@ -25,8 +25,7 @@ class Cores
|
|
25
25
|
|
26
26
|
#
|
27
27
|
#
|
28
|
-
|
29
|
-
|
28
|
+
loop do
|
30
29
|
# Ramp it up to num processors.
|
31
30
|
#
|
32
31
|
while currently_processing < max
|
@@ -42,6 +41,7 @@ class Cores
|
|
42
41
|
break unless element
|
43
42
|
|
44
43
|
Process.fork do
|
44
|
+
sleep 0.01*currently_processing
|
45
45
|
yield element
|
46
46
|
end
|
47
47
|
|
@@ -13,20 +13,10 @@ class Array
|
|
13
13
|
end
|
14
14
|
end
|
15
15
|
|
16
|
-
#
|
17
|
-
#
|
18
|
-
def map_with_index!
|
19
|
-
each_with_index do |element, index| self[index] = yield(element, index); end
|
20
|
-
end
|
21
|
-
|
22
|
-
#
|
23
|
-
#
|
24
|
-
def map_with_index &block
|
25
|
-
dup.map_with_index! &block
|
26
|
-
end
|
27
|
-
|
28
16
|
# Accesses a random element of this array.
|
29
17
|
#
|
18
|
+
# TODO Remove?
|
19
|
+
#
|
30
20
|
def random
|
31
21
|
self[Kernel.rand(self.length)]
|
32
22
|
end
|
data/lib/picky/generator.rb
CHANGED
@@ -8,7 +8,29 @@ module Picky
|
|
8
8
|
# picky <command> <options>
|
9
9
|
# is found.
|
10
10
|
#
|
11
|
-
class
|
11
|
+
class NoGeneratorError < StandardError
|
12
|
+
|
13
|
+
def initialize generator
|
14
|
+
super usage + possible_commands(generator.types)
|
15
|
+
end
|
16
|
+
|
17
|
+
def usage
|
18
|
+
"\nUsage:\n" +
|
19
|
+
"picky <command> <params>\n" +
|
20
|
+
?\n
|
21
|
+
end
|
22
|
+
|
23
|
+
def possible_commands types
|
24
|
+
"Possible commands:\n" +
|
25
|
+
types.map do |name, klass_params|
|
26
|
+
result = "picky #{name}"
|
27
|
+
_, params = *klass_params
|
28
|
+
result << ' ' << [*params].map { |param| "<#{param}>" }.join(' ') if params
|
29
|
+
result
|
30
|
+
end.join(?\n) + ?\n
|
31
|
+
end
|
32
|
+
|
33
|
+
end
|
12
34
|
|
13
35
|
# This is a very simple project generator.
|
14
36
|
# Not at all like Padrino's or Rails'.
|
@@ -22,7 +44,7 @@ module Picky
|
|
22
44
|
|
23
45
|
def initialize
|
24
46
|
@types = {
|
25
|
-
:project => Project
|
47
|
+
:project => [Project, :project_name]
|
26
48
|
}
|
27
49
|
end
|
28
50
|
|
@@ -38,8 +60,9 @@ module Picky
|
|
38
60
|
#
|
39
61
|
#
|
40
62
|
def generator_for identifier, *args
|
41
|
-
|
42
|
-
raise
|
63
|
+
generator_info = types[identifier.to_sym]
|
64
|
+
raise NoGeneratorError.new(self) unless generator_info
|
65
|
+
generator_class = generator_info.first
|
43
66
|
generator_for_class generator_class, identifier, *args
|
44
67
|
end
|
45
68
|
|
data/lib/picky/index/bundle.rb
CHANGED
@@ -8,11 +8,13 @@ module Index
|
|
8
8
|
#
|
9
9
|
class Bundle
|
10
10
|
|
11
|
+
attr_reader :checker
|
11
12
|
attr_reader :name, :category, :type
|
12
13
|
attr_accessor :index, :weights, :similarity
|
13
14
|
attr_accessor :partial_strategy, :weights_strategy, :similarity_strategy
|
14
15
|
|
15
16
|
delegate :[], :[]=, :clear, :to => :index
|
17
|
+
delegate :raise_unless_cache_exists, :to => :checker
|
16
18
|
|
17
19
|
# Path is in which directory the cache is located.
|
18
20
|
#
|
@@ -28,6 +30,8 @@ module Index
|
|
28
30
|
@partial_strategy = partial_strategy
|
29
31
|
@weights_strategy = weights_strategy
|
30
32
|
@similarity_strategy = similarity_strategy
|
33
|
+
|
34
|
+
@checker = BundleChecker.new self
|
31
35
|
end
|
32
36
|
|
33
37
|
# Get the ids for the text.
|
@@ -59,46 +63,6 @@ module Index
|
|
59
63
|
File.join PICKY_ROOT, 'index'
|
60
64
|
# category.search_index_root
|
61
65
|
end
|
62
|
-
|
63
|
-
def size_of path
|
64
|
-
`ls -l #{path} | awk '{print $5}'`.to_i
|
65
|
-
end
|
66
|
-
# Check if the cache files are there and do not have size 0.
|
67
|
-
#
|
68
|
-
def caches_ok?
|
69
|
-
cache_ok?(index_cache_path) &&
|
70
|
-
cache_ok?(similarity_cache_path) &&
|
71
|
-
cache_ok?(weights_cache_path)
|
72
|
-
end
|
73
|
-
# Is the cache ok? I.e. larger than four in size.
|
74
|
-
#
|
75
|
-
def cache_ok? path
|
76
|
-
size_of(path) > 0
|
77
|
-
end
|
78
|
-
# Raises an appropriate error message.
|
79
|
-
#
|
80
|
-
def raise_cache_missing what
|
81
|
-
raise "#{what} cache for #{identifier} missing."
|
82
|
-
end
|
83
|
-
# Is the cache small?
|
84
|
-
#
|
85
|
-
def cache_small? path
|
86
|
-
size_of(path) < 16
|
87
|
-
end
|
88
|
-
def warn_cache_small what
|
89
|
-
puts "#{what} cache for #{identifier} smaller than 16 bytes."
|
90
|
-
end
|
91
|
-
# Check all index files and raise if necessary.
|
92
|
-
#
|
93
|
-
def raise_unless_cache_exists
|
94
|
-
warn_cache_small :index if cache_small?(index_cache_path)
|
95
|
-
# warn_cache_small :similarity if cache_small?(similarity_cache_path)
|
96
|
-
warn_cache_small :weights if cache_small?(weights_cache_path)
|
97
|
-
|
98
|
-
raise_cache_missing :index unless cache_ok?(index_cache_path)
|
99
|
-
raise_cache_missing :similarity unless cache_ok?(similarity_cache_path)
|
100
|
-
raise_cache_missing :weights unless cache_ok?(weights_cache_path)
|
101
|
-
end
|
102
66
|
|
103
67
|
# Copies the indexes to the "backup" directory.
|
104
68
|
#
|
@@ -257,7 +221,7 @@ module Index
|
|
257
221
|
# TODO Use config object?
|
258
222
|
#
|
259
223
|
def search_index_file_name
|
260
|
-
File.join cache_directory, "#{
|
224
|
+
File.join cache_directory, "prepared_#{category.name}_index.txt"
|
261
225
|
end
|
262
226
|
|
263
227
|
# Generators.
|
@@ -0,0 +1,58 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
#
|
3
|
+
module Index
|
4
|
+
|
5
|
+
# Checks bundle indexes.
|
6
|
+
#
|
7
|
+
class BundleChecker
|
8
|
+
|
9
|
+
attr_reader :bundle
|
10
|
+
|
11
|
+
def initialize bundle
|
12
|
+
@bundle = bundle
|
13
|
+
end
|
14
|
+
|
15
|
+
# Check all index files and raise if necessary.
|
16
|
+
#
|
17
|
+
def raise_unless_cache_exists
|
18
|
+
warn_cache_small :index if cache_small?(bundle.index_cache_path)
|
19
|
+
# warn_cache_small :similarity if cache_small?(similarity_cache_path)
|
20
|
+
warn_cache_small :weights if cache_small?(bundle.weights_cache_path)
|
21
|
+
|
22
|
+
raise_cache_missing :index unless cache_ok?(bundle.index_cache_path)
|
23
|
+
raise_cache_missing :similarity unless cache_ok?(bundle.similarity_cache_path)
|
24
|
+
raise_cache_missing :weights unless cache_ok?(bundle.weights_cache_path)
|
25
|
+
end
|
26
|
+
|
27
|
+
def size_of path
|
28
|
+
`ls -l #{path} | awk '{print $5}'`.to_i
|
29
|
+
end
|
30
|
+
# Check if the cache files are there and do not have size 0.
|
31
|
+
#
|
32
|
+
def caches_ok?
|
33
|
+
cache_ok?(bundle.index_cache_path) &&
|
34
|
+
cache_ok?(bundle.similarity_cache_path) &&
|
35
|
+
cache_ok?(bundle.weights_cache_path)
|
36
|
+
end
|
37
|
+
# Is the cache ok? I.e. larger than four in size.
|
38
|
+
#
|
39
|
+
def cache_ok? path
|
40
|
+
size_of(path) > 0
|
41
|
+
end
|
42
|
+
# Raises an appropriate error message.
|
43
|
+
#
|
44
|
+
def raise_cache_missing what
|
45
|
+
raise "#{what} cache for #{bundle.identifier} missing."
|
46
|
+
end
|
47
|
+
# Is the cache small?
|
48
|
+
#
|
49
|
+
def cache_small? path
|
50
|
+
size_of(path) < 16
|
51
|
+
end
|
52
|
+
def warn_cache_small what
|
53
|
+
puts "#{what} cache for #{bundle.identifier} smaller than 16 bytes."
|
54
|
+
end
|
55
|
+
|
56
|
+
end
|
57
|
+
|
58
|
+
end
|
data/lib/picky/index/type.rb
CHANGED
@@ -16,7 +16,10 @@ module Index
|
|
16
16
|
@name = name
|
17
17
|
@result_type = result_type # TODO Move.
|
18
18
|
@categories = categories # for each_delegate
|
19
|
-
@combinator =
|
19
|
+
@combinator = combinator_for categories, ignore_unassigned_tokens
|
20
|
+
end
|
21
|
+
def combinator_for categories, ignore_unassigned_tokens
|
22
|
+
Query::Combinator.new @categories, :ignore_unassigned_tokens => ignore_unassigned_tokens
|
20
23
|
end
|
21
24
|
|
22
25
|
#
|
@@ -0,0 +1,57 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
#
|
3
|
+
module Index
|
4
|
+
|
5
|
+
module Wrappers
|
6
|
+
|
7
|
+
# This index combines an exact and partial index.
|
8
|
+
# It serves to order the results such that exact hits are found first.
|
9
|
+
#
|
10
|
+
# TODO Need to use the right subtokens. Bake in?
|
11
|
+
#
|
12
|
+
class ExactFirst < Bundle
|
13
|
+
|
14
|
+
delegate :similar,
|
15
|
+
:identifier,
|
16
|
+
:name,
|
17
|
+
:to => :@exact
|
18
|
+
delegate :type,
|
19
|
+
:category,
|
20
|
+
:weight,
|
21
|
+
:generate_partial_from,
|
22
|
+
:generate_caches_from_memory,
|
23
|
+
:generate_derived,
|
24
|
+
:dump,
|
25
|
+
:load,
|
26
|
+
:to => :@partial
|
27
|
+
|
28
|
+
def initialize category
|
29
|
+
@exact = category.exact
|
30
|
+
@partial = category.partial
|
31
|
+
end
|
32
|
+
|
33
|
+
def self.wrap type_or_category
|
34
|
+
if type_or_category.respond_to? :categories
|
35
|
+
wrap_each_of type_or_category.categories
|
36
|
+
type_or_category
|
37
|
+
else
|
38
|
+
new type_or_category
|
39
|
+
end
|
40
|
+
end
|
41
|
+
def self.wrap_each_of categories
|
42
|
+
categories.collect! { |category| new(category) }
|
43
|
+
end
|
44
|
+
|
45
|
+
def ids text
|
46
|
+
@exact.ids(text) + @partial.ids(text)
|
47
|
+
end
|
48
|
+
|
49
|
+
def weight text
|
50
|
+
[@exact.weight(text) || 0, @partial.weight(text) || 0].max
|
51
|
+
end
|
52
|
+
|
53
|
+
end
|
54
|
+
|
55
|
+
end
|
56
|
+
|
57
|
+
end
|
data/lib/picky/indexes.rb
CHANGED
@@ -11,16 +11,17 @@ module Indexes
|
|
11
11
|
end
|
12
12
|
# Runs the indexers in parallel (index + cache).
|
13
13
|
#
|
14
|
-
def self.index
|
14
|
+
def self.index randomly = true
|
15
15
|
Indexes.take_snapshot
|
16
16
|
|
17
17
|
# Run in parallel.
|
18
18
|
#
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
19
|
+
# TODO Make option to also use non-random.
|
20
|
+
# rake index:random (default)
|
21
|
+
# rake index:ordered
|
22
|
+
#
|
23
|
+
timed_exclaim "INDEXING USING #{Cores.max_processors} PROCESSORS, IN #{randomly ? 'RANDOM' : 'GIVEN'} ORDER."
|
24
|
+
Cores.forked self.fields, { :randomly => randomly } do |field, cores|
|
24
25
|
field.index
|
25
26
|
field.cache
|
26
27
|
end
|
@@ -30,14 +31,6 @@ module Indexes
|
|
30
31
|
configuration.index_solr
|
31
32
|
end
|
32
33
|
|
33
|
-
# TODO Push into configuration.
|
34
|
-
#
|
35
|
-
def self.connect_backends
|
36
|
-
configuration.types.each do |type|
|
37
|
-
type.connect_backend
|
38
|
-
end
|
39
|
-
end
|
40
|
-
|
41
34
|
# Returns an array of fields.
|
42
35
|
#
|
43
36
|
# TODO Rewrite.
|
@@ -111,6 +104,8 @@ module Indexes
|
|
111
104
|
end
|
112
105
|
# Loads all indexes from the caches.
|
113
106
|
#
|
107
|
+
# TODO Rename load_indexes.
|
108
|
+
#
|
114
109
|
def self.load_from_cache
|
115
110
|
each &:load_from_cache
|
116
111
|
end
|
@@ -132,11 +127,9 @@ module Indexes
|
|
132
127
|
# Removes the cache files.
|
133
128
|
#
|
134
129
|
def self.clear_caches
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
category.partial.delete_all
|
139
|
-
end
|
130
|
+
each_bundle do |exact, partial|
|
131
|
+
exact.delete_all
|
132
|
+
partial.delete_all
|
140
133
|
end
|
141
134
|
end
|
142
135
|
|
data/lib/picky/loader.rb
CHANGED
@@ -161,7 +161,9 @@ module Loader
|
|
161
161
|
load_relative 'index/category'
|
162
162
|
load_relative 'index/type'
|
163
163
|
|
164
|
-
load_relative 'index/
|
164
|
+
load_relative 'index/bundle_checker'
|
165
|
+
|
166
|
+
load_relative 'index/wrappers/exact_first'
|
165
167
|
|
166
168
|
# Tokens.
|
167
169
|
#
|
@@ -173,7 +175,9 @@ module Loader
|
|
173
175
|
load_relative 'tokenizers/base'
|
174
176
|
load_relative 'tokenizers/index'
|
175
177
|
load_relative 'tokenizers/query'
|
176
|
-
|
178
|
+
|
179
|
+
load_relative 'tokenizers/default/index'
|
180
|
+
load_relative 'tokenizers/default/query'
|
177
181
|
|
178
182
|
# Query combinations, qualifiers, weigher.
|
179
183
|
#
|
@@ -208,6 +212,7 @@ module Loader
|
|
208
212
|
load_relative 'sources/base'
|
209
213
|
load_relative 'sources/db'
|
210
214
|
load_relative 'sources/csv'
|
215
|
+
load_relative 'sources/delicious'
|
211
216
|
|
212
217
|
# Indexes.
|
213
218
|
#
|
@@ -237,11 +242,5 @@ module Loader
|
|
237
242
|
#
|
238
243
|
load_relative 'generator'
|
239
244
|
end
|
240
|
-
|
241
|
-
# Silenceable puts.
|
242
|
-
#
|
243
|
-
def self.exclaim text
|
244
|
-
puts text
|
245
|
-
end
|
246
245
|
|
247
246
|
end
|
@@ -61,7 +61,7 @@ module Query
|
|
61
61
|
# Transform the allocation into result form.
|
62
62
|
#
|
63
63
|
def to_result
|
64
|
-
[self.result_type, self.score, count, @combinations.to_result, self.ids] if count = self.count > 0
|
64
|
+
[self.result_type, self.score, self.count, @combinations.to_result, self.ids] if count = self.count > 0
|
65
65
|
end
|
66
66
|
|
67
67
|
# Json representation of this allocation.
|
@@ -19,14 +19,17 @@ module Query
|
|
19
19
|
@combinations.hash
|
20
20
|
end
|
21
21
|
|
22
|
-
#
|
23
|
-
#
|
24
|
-
# TODO Rewrite.
|
22
|
+
# Uses user specific weights to calculate a score for the combinations.
|
25
23
|
#
|
26
24
|
def calculate_score weights
|
27
|
-
@score
|
28
|
-
@score
|
29
|
-
|
25
|
+
@score ||= sum_score
|
26
|
+
@score + add_score(weights) # TODO Ok to just cache the weights?
|
27
|
+
end
|
28
|
+
def sum_score
|
29
|
+
@combinations.sum &:weight
|
30
|
+
end
|
31
|
+
def add_score weights
|
32
|
+
weights.score @combinations
|
30
33
|
end
|
31
34
|
|
32
35
|
# Gets all ids for the allocations.
|
@@ -31,13 +31,19 @@ module Query
|
|
31
31
|
token.similar? ? similar_possible_for(token) : possible_for(token)
|
32
32
|
end
|
33
33
|
|
34
|
-
#
|
35
|
-
#
|
34
|
+
#
|
35
|
+
#
|
36
36
|
def similar_possible_for token
|
37
37
|
# Get as many similar tokens as necessary
|
38
38
|
#
|
39
|
+
tokens = similar_tokens_for token
|
40
|
+
# possible combinations
|
41
|
+
#
|
42
|
+
inject_possible_for tokens
|
43
|
+
end
|
44
|
+
def similar_tokens_for token
|
39
45
|
text = token.text
|
40
|
-
|
46
|
+
categories.inject([]) do |result, category|
|
41
47
|
next_token = token
|
42
48
|
# TODO adjust either this or the amount of similar in index
|
43
49
|
#
|
@@ -46,8 +52,8 @@ module Query
|
|
46
52
|
end
|
47
53
|
result
|
48
54
|
end
|
49
|
-
|
50
|
-
|
55
|
+
end
|
56
|
+
def inject_possible_for tokens
|
51
57
|
tokens.inject([]) do |result, token|
|
52
58
|
possible = possible_categories token
|
53
59
|
result + possible_for(token, possible)
|
data/lib/picky/rack/harakiri.rb
CHANGED
data/lib/picky/results/base.rb
CHANGED
@@ -17,21 +17,13 @@ module Results
|
|
17
17
|
@allocations = allocations || Query::Allocations.new
|
18
18
|
end
|
19
19
|
|
20
|
-
def add more_results
|
21
|
-
@added = more_results
|
22
|
-
self
|
23
|
-
end
|
24
|
-
def added
|
25
|
-
@added || {}
|
26
|
-
end
|
27
|
-
|
28
20
|
#
|
29
21
|
#
|
30
22
|
def serialize
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
23
|
+
{ :allocations => allocations.to_result,
|
24
|
+
:offset => offset,
|
25
|
+
:duration => duration,
|
26
|
+
:total => total }
|
35
27
|
end
|
36
28
|
# The default format is json.
|
37
29
|
#
|
data/lib/picky/results/live.rb
CHANGED
data/lib/picky/routing.rb
CHANGED
@@ -32,23 +32,23 @@ class Routing
|
|
32
32
|
routes.call env
|
33
33
|
end
|
34
34
|
|
35
|
-
# Set the defaults.
|
36
|
-
#
|
37
|
-
# Options are:
|
38
|
-
# * :query_key => :query # default
|
39
|
-
# * :offset_key => :offset # default
|
40
|
-
#
|
41
|
-
# * :tokenizer => Tokenizers::Query.new # default
|
42
|
-
#
|
43
|
-
def defaults options = {}
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
end
|
35
|
+
# # Set the defaults.
|
36
|
+
# #
|
37
|
+
# # Options are:
|
38
|
+
# # * :query_key => :query # default
|
39
|
+
# # * :offset_key => :offset # default
|
40
|
+
# #
|
41
|
+
# # * :tokenizer => Tokenizers::Query.new # default
|
42
|
+
# #
|
43
|
+
# def defaults options = {}
|
44
|
+
# @defaults[:query_key] = options[:query_key].to_s if options[:query_key]
|
45
|
+
# @defaults[:offset_key] = options[:offset_key].to_s if options[:offset_key]
|
46
|
+
#
|
47
|
+
# @defaults[:tokenizer] = options[:tokenizer] if options[:tokenizer]
|
48
|
+
# @defaults[:content_type] = options[:content_type] if options[:content_type]
|
49
|
+
#
|
50
|
+
# @defaults
|
51
|
+
# end
|
52
52
|
|
53
53
|
#
|
54
54
|
#
|