picky 0.0.9 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/picky/application.rb +38 -37
- data/lib/picky/cacher/partial/default.rb +1 -3
- data/lib/picky/cacher/partial/subtoken.rb +44 -18
- data/lib/picky/configuration/field.rb +6 -2
- data/lib/picky/configuration/indexes.rb +16 -7
- data/lib/picky/configuration/queries.rb +3 -13
- data/lib/picky/extensions/symbol.rb +19 -4
- data/lib/picky/generator.rb +9 -0
- data/lib/picky/helpers/measuring.rb +3 -3
- data/lib/picky/index/bundle.rb +5 -4
- data/lib/picky/index/category.rb +14 -7
- data/lib/picky/index/combined.rb +6 -1
- data/lib/picky/indexers/no_source_specified_error.rb +2 -0
- data/lib/picky/indexes.rb +3 -9
- data/lib/picky/query/allocation.rb +1 -1
- data/lib/picky/query/allocations.rb +2 -2
- data/lib/picky/rack/harakiri.rb +10 -8
- data/lib/picky/routing.rb +19 -21
- data/lib/picky/solr/schema_generator.rb +4 -4
- data/lib/picky/sources/base.rb +16 -4
- data/lib/picky/sources/csv.rb +3 -0
- data/lib/picky/sources/db.rb +30 -22
- data/lib/picky/tokenizers/base.rb +7 -5
- data/lib/picky/tokenizers/index.rb +5 -5
- data/lib/picky/tokenizers/query.rb +9 -9
- data/prototype_project/app/application.rb +36 -29
- data/prototype_project/app/db.yml +1 -1
- data/prototype_project/config.ru +3 -2
- data/spec/ext/performant_spec.rb +2 -2
- data/spec/lib/application_spec.rb +54 -8
- data/spec/lib/cacher/partial/default_spec.rb +15 -0
- data/spec/lib/cacher/partial/subtoken_spec.rb +54 -2
- data/spec/lib/extensions/symbol_spec.rb +124 -30
- data/spec/lib/index/bundle_partial_generation_speed_spec.rb +1 -1
- data/spec/lib/query/allocations_spec.rb +5 -5
- data/spec/lib/query/combinations_spec.rb +3 -3
- data/spec/lib/rack/harakiri_spec.rb +29 -0
- data/spec/lib/routing_spec.rb +22 -98
- data/spec/lib/tokenizers/index_spec.rb +1 -1
- data/spec/specific/speed_spec.rb +4 -5
- metadata +7 -3
data/lib/picky/application.rb
CHANGED
@@ -1,40 +1,41 @@
|
|
1
|
+
# The Picky application wherein the indexing and querying is defined.
|
2
|
+
#
|
1
3
|
class Application
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
4
|
+
class << self
|
5
|
+
|
6
|
+
# An application simply delegates to the routing to handle a request.
|
7
|
+
#
|
8
|
+
def call env
|
9
|
+
routing.call env
|
10
|
+
end
|
11
|
+
|
12
|
+
# Freezes the routes.
|
13
|
+
#
|
14
|
+
def finalize
|
15
|
+
routing.freeze
|
16
|
+
end
|
17
|
+
def routing
|
18
|
+
@routing ||= Routing.new
|
19
|
+
end
|
20
|
+
# Routes.
|
21
|
+
#
|
22
|
+
delegate :route, :root, :to => :routing
|
23
|
+
|
24
|
+
#
|
25
|
+
#
|
26
|
+
def indexing
|
27
|
+
@indexing ||= Configuration::Indexes.new
|
28
|
+
end
|
29
|
+
def index *args
|
30
|
+
self.type *args
|
31
|
+
end
|
32
|
+
delegate :type, :field, :to => :indexing
|
33
|
+
|
34
|
+
#
|
35
|
+
#
|
36
|
+
def querying
|
37
|
+
@queries ||= Configuration::Queries.new
|
38
|
+
end
|
39
|
+
|
7
40
|
end
|
8
|
-
def self.call env
|
9
|
-
routing.call env
|
10
|
-
end
|
11
|
-
|
12
|
-
#
|
13
|
-
#
|
14
|
-
def self.indexes &block
|
15
|
-
indexes_configuration.instance_eval &block
|
16
|
-
# TODO Uglyyyyyy.
|
17
|
-
::Indexes.configuration = indexes_configuration
|
18
|
-
::Indexes.setup # TODO Think about setup/reload.
|
19
|
-
end
|
20
|
-
def self.indexes_configuration
|
21
|
-
@indexes || reset_indexes
|
22
|
-
end
|
23
|
-
def self.reset_indexes
|
24
|
-
@indexes = Configuration::Indexes.new # Is instance a problem?
|
25
|
-
end
|
26
|
-
|
27
|
-
#
|
28
|
-
#
|
29
|
-
def self.queries &block
|
30
|
-
queries_configuration.instance_eval &block
|
31
|
-
routing.freeze
|
32
|
-
end
|
33
|
-
def self.queries_configuration
|
34
|
-
@queries || reset_queries
|
35
|
-
end
|
36
|
-
def self.reset_queries
|
37
|
-
@queries = Configuration::Queries.new routing # Is instance a problem?
|
38
|
-
end
|
39
|
-
|
40
41
|
end
|
@@ -2,6 +2,29 @@ module Cacher
|
|
2
2
|
|
3
3
|
module Partial
|
4
4
|
|
5
|
+
# Generates the right subtokens for use in the subtoken strategy.
|
6
|
+
#
|
7
|
+
class SubtokenGenerator
|
8
|
+
|
9
|
+
attr_reader :down_to, :starting_at
|
10
|
+
|
11
|
+
def initialize down_to, starting_at
|
12
|
+
@down_to, @starting_at = down_to, starting_at
|
13
|
+
|
14
|
+
if @starting_at.zero?
|
15
|
+
def each_subtoken token, &block
|
16
|
+
token.each_subtoken @down_to, &block
|
17
|
+
end
|
18
|
+
else
|
19
|
+
def each_subtoken token, &block
|
20
|
+
token[0..@starting_at].intern.each_subtoken @down_to, &block
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
27
|
+
|
5
28
|
# The subtoken partial strategy.
|
6
29
|
#
|
7
30
|
# If given
|
@@ -17,23 +40,27 @@ module Cacher
|
|
17
40
|
#
|
18
41
|
class Subtoken < Strategy
|
19
42
|
|
20
|
-
attr_reader :down_to, :starting_at
|
21
|
-
|
22
43
|
# Down to is how far it will go down in generating the subtokens.
|
23
44
|
#
|
24
45
|
# Examples:
|
25
|
-
# With :hello, and starting_at
|
46
|
+
# With :hello, and starting_at -1
|
26
47
|
# * down to == 1: [:hello, :hell, :hel, :he, :h]
|
27
48
|
# * down to == 4: [:hello, :hell]
|
28
49
|
#
|
29
|
-
# With :hello, and starting_at -
|
50
|
+
# With :hello, and starting_at -2
|
30
51
|
# * down to == 1: [:hell, :hel, :he, :h]
|
31
52
|
# * down to == 4: [:hell]
|
32
53
|
#
|
33
54
|
def initialize options = {}
|
34
|
-
|
35
|
-
starting_at
|
36
|
-
@
|
55
|
+
down_to = options[:down_to] || 1
|
56
|
+
starting_at = options[:starting_at] || -1
|
57
|
+
@generator = SubtokenGenerator.new down_to, starting_at
|
58
|
+
end
|
59
|
+
def down_to
|
60
|
+
@generator.down_to
|
61
|
+
end
|
62
|
+
def starting_at
|
63
|
+
@generator.starting_at
|
37
64
|
end
|
38
65
|
|
39
66
|
# Generates a partial index from the given index.
|
@@ -43,12 +70,12 @@ module Cacher
|
|
43
70
|
|
44
71
|
# Generate for each key token the subtokens.
|
45
72
|
#
|
46
|
-
i =
|
73
|
+
i = 0
|
47
74
|
index.each_key do |token|
|
48
|
-
i
|
49
|
-
if i ==
|
75
|
+
i += 1
|
76
|
+
if i == 5000
|
50
77
|
puts "#{Time.now}: Generating partial tokens for token #{token}. This appears every 5000 tokens."
|
51
|
-
i =
|
78
|
+
i = 0
|
52
79
|
end
|
53
80
|
generate_for token, index, result
|
54
81
|
end
|
@@ -58,7 +85,7 @@ module Cacher
|
|
58
85
|
# TODO If it is unique for a subtoken, it is
|
59
86
|
# unique for all derived longer tokens.
|
60
87
|
#
|
61
|
-
result.each_value &:uniq!
|
88
|
+
result.each_value &:uniq!
|
62
89
|
|
63
90
|
result
|
64
91
|
end
|
@@ -74,18 +101,17 @@ module Cacher
|
|
74
101
|
# TODO Could be improved by appending the aforegoing ids?
|
75
102
|
#
|
76
103
|
def generate_for token, index, result
|
77
|
-
|
78
|
-
clipped_token.subtokens(down_to).each do |subtoken|
|
104
|
+
@generator.each_subtoken(token) do |subtoken|
|
79
105
|
if result[subtoken]
|
80
106
|
result[subtoken] += index[token] # unique
|
81
107
|
else
|
82
|
-
result[subtoken] = index[token].dup
|
108
|
+
result[subtoken] = index[token].dup # TODO Spec this dup
|
83
109
|
end
|
84
110
|
end
|
85
111
|
end
|
86
|
-
|
112
|
+
|
87
113
|
end
|
88
|
-
|
114
|
+
|
89
115
|
end
|
90
|
-
|
116
|
+
|
91
117
|
end
|
@@ -1,5 +1,8 @@
|
|
1
1
|
module Configuration
|
2
|
-
|
2
|
+
|
3
|
+
# Describes the configuration of a "field", a category
|
4
|
+
# (title is a category of a books index, for example).
|
5
|
+
#
|
3
6
|
class Field
|
4
7
|
attr_reader :name, :indexed_name, :virtual
|
5
8
|
attr_accessor :type # convenience
|
@@ -15,7 +18,8 @@ module Configuration
|
|
15
18
|
@indexed_name = options.delete(:indexed_field) || name # TODO Rename to indexed_as?
|
16
19
|
@virtual = options.delete(:virtual) || false
|
17
20
|
|
18
|
-
|
21
|
+
qualifiers = options[:qualifiers]
|
22
|
+
Query::Qualifiers.add(name, qualifiers) if qualifiers
|
19
23
|
|
20
24
|
# @remove = options[:remove] || false
|
21
25
|
# @filter = options[:filter] || true
|
@@ -1,11 +1,13 @@
|
|
1
1
|
module Configuration
|
2
|
-
|
2
|
+
|
3
|
+
# Describes the container for all index configurations.
|
4
|
+
#
|
3
5
|
class Indexes
|
4
6
|
|
5
7
|
attr_reader :types
|
6
8
|
|
7
|
-
def initialize
|
8
|
-
@types =
|
9
|
+
def initialize
|
10
|
+
@types = []
|
9
11
|
end
|
10
12
|
|
11
13
|
#
|
@@ -16,12 +18,19 @@ module Configuration
|
|
16
18
|
|
17
19
|
# Delegates
|
18
20
|
#
|
19
|
-
delegate :
|
21
|
+
delegate :removes_characters, :contracts_expressions, :stopwords, :splits_text_on, :normalizes_words, :removes_characters_after_splitting, :to => :default_index
|
20
22
|
|
21
|
-
|
22
|
-
|
23
|
+
# TODO Rewrite all this configuration handling.
|
24
|
+
#
|
25
|
+
def type name, source, *fields
|
26
|
+
new_type = Type.new name, source, *fields
|
27
|
+
types << new_type
|
28
|
+
::Indexes.configuration ||= self
|
29
|
+
|
30
|
+
generated = new_type.generate
|
31
|
+
::Indexes.add generated
|
32
|
+
generated
|
23
33
|
end
|
24
|
-
alias add_index type
|
25
34
|
def field name, options = {}
|
26
35
|
Field.new name, options
|
27
36
|
end
|
@@ -1,31 +1,21 @@
|
|
1
1
|
module Configuration
|
2
2
|
|
3
|
+
#
|
4
|
+
#
|
3
5
|
class Queries
|
4
6
|
|
5
|
-
attr_reader :routing
|
6
|
-
|
7
|
-
#
|
8
|
-
#
|
9
|
-
def initialize routing
|
10
|
-
@routing = routing
|
11
|
-
end
|
12
|
-
|
13
7
|
#
|
14
8
|
#
|
15
9
|
def default_index
|
16
10
|
Tokenizers::Query
|
17
11
|
end
|
18
|
-
|
19
|
-
# Routes.
|
20
|
-
#
|
21
|
-
delegate :defaults, :route, :live, :full, :root, :default, :to => :routing
|
12
|
+
delegate :removes_characters, :contracts_expressions, :stopwords, :splits_text_on, :normalizes_words, :removes_characters_after_splitting, :to => :default_index
|
22
13
|
|
23
14
|
# Delegates.
|
24
15
|
#
|
25
16
|
def maximum_tokens amount
|
26
17
|
Query::Tokens.maximum = amount
|
27
18
|
end
|
28
|
-
delegate :illegal_characters, :contract_expressions, :stopwords, :split_text_on, :normalize_words, :illegal_characters_after_splitting, :to => :default_index
|
29
19
|
|
30
20
|
end
|
31
21
|
|
@@ -2,17 +2,32 @@
|
|
2
2
|
#
|
3
3
|
class Symbol
|
4
4
|
|
5
|
-
# :keys.subtokens # => [:key, :ke, :k]
|
6
|
-
# :keys.subtokens(2) # => [:key, :ke]
|
5
|
+
# :keys.subtokens # => [:keys, :key, :ke, :k]
|
6
|
+
# :keys.subtokens(2) # => [:keys, :key, :ke]
|
7
7
|
#
|
8
8
|
def subtokens down_to_length = 1
|
9
|
-
sub
|
9
|
+
sub = self.id2name
|
10
10
|
|
11
11
|
size = sub.size
|
12
|
+
down_to_length = size + down_to_length if down_to_length < 0
|
12
13
|
down_to_length = size if size < down_to_length
|
13
14
|
|
14
|
-
|
15
|
+
result = [self]
|
16
|
+
size.downto(down_to_length + 1) { result << sub.chop!.intern }
|
15
17
|
result
|
16
18
|
end
|
17
19
|
|
20
|
+
# TODO Duplicate code.
|
21
|
+
#
|
22
|
+
def each_subtoken down_to_length = 1
|
23
|
+
sub = self.id2name
|
24
|
+
|
25
|
+
size = sub.size
|
26
|
+
down_to_length = size + down_to_length if down_to_length < 0
|
27
|
+
down_to_length = size if size < down_to_length
|
28
|
+
|
29
|
+
yield self
|
30
|
+
size.downto(down_to_length + 1) { yield sub.chop!.intern }
|
31
|
+
end
|
32
|
+
|
18
33
|
end
|
data/lib/picky/generator.rb
CHANGED
@@ -2,6 +2,10 @@ require 'fileutils'
|
|
2
2
|
|
3
3
|
module Picky
|
4
4
|
|
5
|
+
# Thrown when no generator for the command
|
6
|
+
# picky <command> <options>
|
7
|
+
# is found.
|
8
|
+
#
|
5
9
|
class NoGeneratorException < Exception; end
|
6
10
|
|
7
11
|
# This is a very simple project generator.
|
@@ -43,6 +47,11 @@ module Picky
|
|
43
47
|
klass.new *args
|
44
48
|
end
|
45
49
|
|
50
|
+
# Generates a new Picky project.
|
51
|
+
#
|
52
|
+
# Example:
|
53
|
+
# > picky project my_lovely_project
|
54
|
+
#
|
46
55
|
class Project
|
47
56
|
|
48
57
|
attr_reader :name, :prototype_project_basedir
|
@@ -5,12 +5,12 @@ module Helpers
|
|
5
5
|
|
6
6
|
# Returns a duration in seconds.
|
7
7
|
#
|
8
|
-
def timed
|
9
|
-
block_to_be_measured = lambda
|
8
|
+
def timed *args, &block
|
9
|
+
block_to_be_measured = lambda &block
|
10
10
|
|
11
11
|
time_begin = Time.now.to_f
|
12
12
|
|
13
|
-
block_to_be_measured.call
|
13
|
+
block_to_be_measured.call *args
|
14
14
|
|
15
15
|
Time.now.to_f - time_begin
|
16
16
|
end
|
data/lib/picky/index/bundle.rb
CHANGED
@@ -103,10 +103,11 @@ module Index
|
|
103
103
|
# Copies the indexes to the "backup" directory.
|
104
104
|
#
|
105
105
|
def backup
|
106
|
-
|
107
|
-
FileUtils.
|
108
|
-
FileUtils.cp
|
109
|
-
FileUtils.cp
|
106
|
+
target = backup_path
|
107
|
+
FileUtils.mkdir target unless Dir.exists?(target)
|
108
|
+
FileUtils.cp index_cache_path, target, :verbose => true
|
109
|
+
FileUtils.cp similarity_cache_path, target, :verbose => true
|
110
|
+
FileUtils.cp weights_cache_path, target, :verbose => true
|
110
111
|
end
|
111
112
|
def backup_path
|
112
113
|
File.join File.dirname(index_cache_path), 'backup'
|
data/lib/picky/index/category.rb
CHANGED
@@ -22,8 +22,8 @@ module Index
|
|
22
22
|
@full = options[:full_bundle] || Bundle.new(:full, self, type, Cacher::Partial::None.new, weights, similarity)
|
23
23
|
@partial = options[:partial_bundle] || Bundle.new(:partial, self, type, partial, weights, Cacher::Similarity::None.new)
|
24
24
|
|
25
|
-
@full =
|
26
|
-
@partial =
|
25
|
+
@full = full_lambda.call(@full, @partial) if full_lambda = options[:full_lambda]
|
26
|
+
@partial = partial_lambda.call(@full, @partial) if partial_lambda = options[:partial_lambda]
|
27
27
|
end
|
28
28
|
|
29
29
|
# Loads the index from cache.
|
@@ -40,13 +40,13 @@ module Index
|
|
40
40
|
# Generates all caches for this category.
|
41
41
|
#
|
42
42
|
def generate_caches
|
43
|
-
|
43
|
+
timed_exclaim "Loading data from db for #{identifier}."
|
44
44
|
generate_caches_from_db
|
45
|
-
|
45
|
+
timed_exclaim "Generating partial for #{identifier}."
|
46
46
|
generate_partial
|
47
|
-
|
47
|
+
timed_exclaim "Generating caches from memory for #{identifier}."
|
48
48
|
generate_caches_from_memory
|
49
|
-
|
49
|
+
timed_exclaim "Dumping all caches for #{identifier}."
|
50
50
|
dump_caches
|
51
51
|
end
|
52
52
|
def generate_caches_from_db
|
@@ -62,10 +62,17 @@ module Index
|
|
62
62
|
full.dump
|
63
63
|
partial.dump
|
64
64
|
end
|
65
|
+
# TODO move to Kernel?
|
66
|
+
#
|
67
|
+
def timed_exclaim text
|
68
|
+
exclaim "#{Time.now}: #{text}"
|
69
|
+
end
|
70
|
+
# TODO move to Kernel?
|
71
|
+
#
|
65
72
|
def exclaim text
|
66
73
|
puts text
|
67
74
|
end
|
68
|
-
|
75
|
+
|
69
76
|
# Used for testing.
|
70
77
|
#
|
71
78
|
def generate_indexes_from_full_index
|
data/lib/picky/index/combined.rb
CHANGED
@@ -2,7 +2,12 @@
|
|
2
2
|
#
|
3
3
|
module Index
|
4
4
|
|
5
|
-
#
|
5
|
+
# This index combines a full and partial index.
|
6
|
+
# It serves to order the results such that exact (full) hits are found first.
|
7
|
+
#
|
8
|
+
# TODO Rename full -> exact. exact/partial?
|
9
|
+
#
|
10
|
+
# TODO Need to use the right subtokens. Bake in?
|
6
11
|
#
|
7
12
|
class Combined < Bundle
|
8
13
|
|