picky 0.0.9 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/picky/application.rb +38 -37
- data/lib/picky/cacher/partial/default.rb +1 -3
- data/lib/picky/cacher/partial/subtoken.rb +44 -18
- data/lib/picky/configuration/field.rb +6 -2
- data/lib/picky/configuration/indexes.rb +16 -7
- data/lib/picky/configuration/queries.rb +3 -13
- data/lib/picky/extensions/symbol.rb +19 -4
- data/lib/picky/generator.rb +9 -0
- data/lib/picky/helpers/measuring.rb +3 -3
- data/lib/picky/index/bundle.rb +5 -4
- data/lib/picky/index/category.rb +14 -7
- data/lib/picky/index/combined.rb +6 -1
- data/lib/picky/indexers/no_source_specified_error.rb +2 -0
- data/lib/picky/indexes.rb +3 -9
- data/lib/picky/query/allocation.rb +1 -1
- data/lib/picky/query/allocations.rb +2 -2
- data/lib/picky/rack/harakiri.rb +10 -8
- data/lib/picky/routing.rb +19 -21
- data/lib/picky/solr/schema_generator.rb +4 -4
- data/lib/picky/sources/base.rb +16 -4
- data/lib/picky/sources/csv.rb +3 -0
- data/lib/picky/sources/db.rb +30 -22
- data/lib/picky/tokenizers/base.rb +7 -5
- data/lib/picky/tokenizers/index.rb +5 -5
- data/lib/picky/tokenizers/query.rb +9 -9
- data/prototype_project/app/application.rb +36 -29
- data/prototype_project/app/db.yml +1 -1
- data/prototype_project/config.ru +3 -2
- data/spec/ext/performant_spec.rb +2 -2
- data/spec/lib/application_spec.rb +54 -8
- data/spec/lib/cacher/partial/default_spec.rb +15 -0
- data/spec/lib/cacher/partial/subtoken_spec.rb +54 -2
- data/spec/lib/extensions/symbol_spec.rb +124 -30
- data/spec/lib/index/bundle_partial_generation_speed_spec.rb +1 -1
- data/spec/lib/query/allocations_spec.rb +5 -5
- data/spec/lib/query/combinations_spec.rb +3 -3
- data/spec/lib/rack/harakiri_spec.rb +29 -0
- data/spec/lib/routing_spec.rb +22 -98
- data/spec/lib/tokenizers/index_spec.rb +1 -1
- data/spec/specific/speed_spec.rb +4 -5
- metadata +7 -3
data/lib/picky/application.rb
CHANGED
@@ -1,40 +1,41 @@
|
|
1
|
+
# The Picky application wherein the indexing and querying is defined.
|
2
|
+
#
|
1
3
|
class Application
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
4
|
+
class << self
|
5
|
+
|
6
|
+
# An application simply delegates to the routing to handle a request.
|
7
|
+
#
|
8
|
+
def call env
|
9
|
+
routing.call env
|
10
|
+
end
|
11
|
+
|
12
|
+
# Freezes the routes.
|
13
|
+
#
|
14
|
+
def finalize
|
15
|
+
routing.freeze
|
16
|
+
end
|
17
|
+
def routing
|
18
|
+
@routing ||= Routing.new
|
19
|
+
end
|
20
|
+
# Routes.
|
21
|
+
#
|
22
|
+
delegate :route, :root, :to => :routing
|
23
|
+
|
24
|
+
#
|
25
|
+
#
|
26
|
+
def indexing
|
27
|
+
@indexing ||= Configuration::Indexes.new
|
28
|
+
end
|
29
|
+
def index *args
|
30
|
+
self.type *args
|
31
|
+
end
|
32
|
+
delegate :type, :field, :to => :indexing
|
33
|
+
|
34
|
+
#
|
35
|
+
#
|
36
|
+
def querying
|
37
|
+
@queries ||= Configuration::Queries.new
|
38
|
+
end
|
39
|
+
|
7
40
|
end
|
8
|
-
def self.call env
|
9
|
-
routing.call env
|
10
|
-
end
|
11
|
-
|
12
|
-
#
|
13
|
-
#
|
14
|
-
def self.indexes &block
|
15
|
-
indexes_configuration.instance_eval &block
|
16
|
-
# TODO Uglyyyyyy.
|
17
|
-
::Indexes.configuration = indexes_configuration
|
18
|
-
::Indexes.setup # TODO Think about setup/reload.
|
19
|
-
end
|
20
|
-
def self.indexes_configuration
|
21
|
-
@indexes || reset_indexes
|
22
|
-
end
|
23
|
-
def self.reset_indexes
|
24
|
-
@indexes = Configuration::Indexes.new # Is instance a problem?
|
25
|
-
end
|
26
|
-
|
27
|
-
#
|
28
|
-
#
|
29
|
-
def self.queries &block
|
30
|
-
queries_configuration.instance_eval &block
|
31
|
-
routing.freeze
|
32
|
-
end
|
33
|
-
def self.queries_configuration
|
34
|
-
@queries || reset_queries
|
35
|
-
end
|
36
|
-
def self.reset_queries
|
37
|
-
@queries = Configuration::Queries.new routing # Is instance a problem?
|
38
|
-
end
|
39
|
-
|
40
41
|
end
|
@@ -2,6 +2,29 @@ module Cacher
|
|
2
2
|
|
3
3
|
module Partial
|
4
4
|
|
5
|
+
# Generates the right subtokens for use in the subtoken strategy.
|
6
|
+
#
|
7
|
+
class SubtokenGenerator
|
8
|
+
|
9
|
+
attr_reader :down_to, :starting_at
|
10
|
+
|
11
|
+
def initialize down_to, starting_at
|
12
|
+
@down_to, @starting_at = down_to, starting_at
|
13
|
+
|
14
|
+
if @starting_at.zero?
|
15
|
+
def each_subtoken token, &block
|
16
|
+
token.each_subtoken @down_to, &block
|
17
|
+
end
|
18
|
+
else
|
19
|
+
def each_subtoken token, &block
|
20
|
+
token[0..@starting_at].intern.each_subtoken @down_to, &block
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
27
|
+
|
5
28
|
# The subtoken partial strategy.
|
6
29
|
#
|
7
30
|
# If given
|
@@ -17,23 +40,27 @@ module Cacher
|
|
17
40
|
#
|
18
41
|
class Subtoken < Strategy
|
19
42
|
|
20
|
-
attr_reader :down_to, :starting_at
|
21
|
-
|
22
43
|
# Down to is how far it will go down in generating the subtokens.
|
23
44
|
#
|
24
45
|
# Examples:
|
25
|
-
# With :hello, and starting_at
|
46
|
+
# With :hello, and starting_at -1
|
26
47
|
# * down to == 1: [:hello, :hell, :hel, :he, :h]
|
27
48
|
# * down to == 4: [:hello, :hell]
|
28
49
|
#
|
29
|
-
# With :hello, and starting_at -
|
50
|
+
# With :hello, and starting_at -2
|
30
51
|
# * down to == 1: [:hell, :hel, :he, :h]
|
31
52
|
# * down to == 4: [:hell]
|
32
53
|
#
|
33
54
|
def initialize options = {}
|
34
|
-
|
35
|
-
starting_at
|
36
|
-
@
|
55
|
+
down_to = options[:down_to] || 1
|
56
|
+
starting_at = options[:starting_at] || -1
|
57
|
+
@generator = SubtokenGenerator.new down_to, starting_at
|
58
|
+
end
|
59
|
+
def down_to
|
60
|
+
@generator.down_to
|
61
|
+
end
|
62
|
+
def starting_at
|
63
|
+
@generator.starting_at
|
37
64
|
end
|
38
65
|
|
39
66
|
# Generates a partial index from the given index.
|
@@ -43,12 +70,12 @@ module Cacher
|
|
43
70
|
|
44
71
|
# Generate for each key token the subtokens.
|
45
72
|
#
|
46
|
-
i =
|
73
|
+
i = 0
|
47
74
|
index.each_key do |token|
|
48
|
-
i
|
49
|
-
if i ==
|
75
|
+
i += 1
|
76
|
+
if i == 5000
|
50
77
|
puts "#{Time.now}: Generating partial tokens for token #{token}. This appears every 5000 tokens."
|
51
|
-
i =
|
78
|
+
i = 0
|
52
79
|
end
|
53
80
|
generate_for token, index, result
|
54
81
|
end
|
@@ -58,7 +85,7 @@ module Cacher
|
|
58
85
|
# TODO If it is unique for a subtoken, it is
|
59
86
|
# unique for all derived longer tokens.
|
60
87
|
#
|
61
|
-
result.each_value &:uniq!
|
88
|
+
result.each_value &:uniq!
|
62
89
|
|
63
90
|
result
|
64
91
|
end
|
@@ -74,18 +101,17 @@ module Cacher
|
|
74
101
|
# TODO Could be improved by appending the aforegoing ids?
|
75
102
|
#
|
76
103
|
def generate_for token, index, result
|
77
|
-
|
78
|
-
clipped_token.subtokens(down_to).each do |subtoken|
|
104
|
+
@generator.each_subtoken(token) do |subtoken|
|
79
105
|
if result[subtoken]
|
80
106
|
result[subtoken] += index[token] # unique
|
81
107
|
else
|
82
|
-
result[subtoken] = index[token].dup
|
108
|
+
result[subtoken] = index[token].dup # TODO Spec this dup
|
83
109
|
end
|
84
110
|
end
|
85
111
|
end
|
86
|
-
|
112
|
+
|
87
113
|
end
|
88
|
-
|
114
|
+
|
89
115
|
end
|
90
|
-
|
116
|
+
|
91
117
|
end
|
@@ -1,5 +1,8 @@
|
|
1
1
|
module Configuration
|
2
|
-
|
2
|
+
|
3
|
+
# Describes the configuration of a "field", a category
|
4
|
+
# (title is a category of a books index, for example).
|
5
|
+
#
|
3
6
|
class Field
|
4
7
|
attr_reader :name, :indexed_name, :virtual
|
5
8
|
attr_accessor :type # convenience
|
@@ -15,7 +18,8 @@ module Configuration
|
|
15
18
|
@indexed_name = options.delete(:indexed_field) || name # TODO Rename to indexed_as?
|
16
19
|
@virtual = options.delete(:virtual) || false
|
17
20
|
|
18
|
-
|
21
|
+
qualifiers = options[:qualifiers]
|
22
|
+
Query::Qualifiers.add(name, qualifiers) if qualifiers
|
19
23
|
|
20
24
|
# @remove = options[:remove] || false
|
21
25
|
# @filter = options[:filter] || true
|
@@ -1,11 +1,13 @@
|
|
1
1
|
module Configuration
|
2
|
-
|
2
|
+
|
3
|
+
# Describes the container for all index configurations.
|
4
|
+
#
|
3
5
|
class Indexes
|
4
6
|
|
5
7
|
attr_reader :types
|
6
8
|
|
7
|
-
def initialize
|
8
|
-
@types =
|
9
|
+
def initialize
|
10
|
+
@types = []
|
9
11
|
end
|
10
12
|
|
11
13
|
#
|
@@ -16,12 +18,19 @@ module Configuration
|
|
16
18
|
|
17
19
|
# Delegates
|
18
20
|
#
|
19
|
-
delegate :
|
21
|
+
delegate :removes_characters, :contracts_expressions, :stopwords, :splits_text_on, :normalizes_words, :removes_characters_after_splitting, :to => :default_index
|
20
22
|
|
21
|
-
|
22
|
-
|
23
|
+
# TODO Rewrite all this configuration handling.
|
24
|
+
#
|
25
|
+
def type name, source, *fields
|
26
|
+
new_type = Type.new name, source, *fields
|
27
|
+
types << new_type
|
28
|
+
::Indexes.configuration ||= self
|
29
|
+
|
30
|
+
generated = new_type.generate
|
31
|
+
::Indexes.add generated
|
32
|
+
generated
|
23
33
|
end
|
24
|
-
alias add_index type
|
25
34
|
def field name, options = {}
|
26
35
|
Field.new name, options
|
27
36
|
end
|
@@ -1,31 +1,21 @@
|
|
1
1
|
module Configuration
|
2
2
|
|
3
|
+
#
|
4
|
+
#
|
3
5
|
class Queries
|
4
6
|
|
5
|
-
attr_reader :routing
|
6
|
-
|
7
|
-
#
|
8
|
-
#
|
9
|
-
def initialize routing
|
10
|
-
@routing = routing
|
11
|
-
end
|
12
|
-
|
13
7
|
#
|
14
8
|
#
|
15
9
|
def default_index
|
16
10
|
Tokenizers::Query
|
17
11
|
end
|
18
|
-
|
19
|
-
# Routes.
|
20
|
-
#
|
21
|
-
delegate :defaults, :route, :live, :full, :root, :default, :to => :routing
|
12
|
+
delegate :removes_characters, :contracts_expressions, :stopwords, :splits_text_on, :normalizes_words, :removes_characters_after_splitting, :to => :default_index
|
22
13
|
|
23
14
|
# Delegates.
|
24
15
|
#
|
25
16
|
def maximum_tokens amount
|
26
17
|
Query::Tokens.maximum = amount
|
27
18
|
end
|
28
|
-
delegate :illegal_characters, :contract_expressions, :stopwords, :split_text_on, :normalize_words, :illegal_characters_after_splitting, :to => :default_index
|
29
19
|
|
30
20
|
end
|
31
21
|
|
@@ -2,17 +2,32 @@
|
|
2
2
|
#
|
3
3
|
class Symbol
|
4
4
|
|
5
|
-
# :keys.subtokens # => [:key, :ke, :k]
|
6
|
-
# :keys.subtokens(2) # => [:key, :ke]
|
5
|
+
# :keys.subtokens # => [:keys, :key, :ke, :k]
|
6
|
+
# :keys.subtokens(2) # => [:keys, :key, :ke]
|
7
7
|
#
|
8
8
|
def subtokens down_to_length = 1
|
9
|
-
sub
|
9
|
+
sub = self.id2name
|
10
10
|
|
11
11
|
size = sub.size
|
12
|
+
down_to_length = size + down_to_length if down_to_length < 0
|
12
13
|
down_to_length = size if size < down_to_length
|
13
14
|
|
14
|
-
|
15
|
+
result = [self]
|
16
|
+
size.downto(down_to_length + 1) { result << sub.chop!.intern }
|
15
17
|
result
|
16
18
|
end
|
17
19
|
|
20
|
+
# TODO Duplicate code.
|
21
|
+
#
|
22
|
+
def each_subtoken down_to_length = 1
|
23
|
+
sub = self.id2name
|
24
|
+
|
25
|
+
size = sub.size
|
26
|
+
down_to_length = size + down_to_length if down_to_length < 0
|
27
|
+
down_to_length = size if size < down_to_length
|
28
|
+
|
29
|
+
yield self
|
30
|
+
size.downto(down_to_length + 1) { yield sub.chop!.intern }
|
31
|
+
end
|
32
|
+
|
18
33
|
end
|
data/lib/picky/generator.rb
CHANGED
@@ -2,6 +2,10 @@ require 'fileutils'
|
|
2
2
|
|
3
3
|
module Picky
|
4
4
|
|
5
|
+
# Thrown when no generator for the command
|
6
|
+
# picky <command> <options>
|
7
|
+
# is found.
|
8
|
+
#
|
5
9
|
class NoGeneratorException < Exception; end
|
6
10
|
|
7
11
|
# This is a very simple project generator.
|
@@ -43,6 +47,11 @@ module Picky
|
|
43
47
|
klass.new *args
|
44
48
|
end
|
45
49
|
|
50
|
+
# Generates a new Picky project.
|
51
|
+
#
|
52
|
+
# Example:
|
53
|
+
# > picky project my_lovely_project
|
54
|
+
#
|
46
55
|
class Project
|
47
56
|
|
48
57
|
attr_reader :name, :prototype_project_basedir
|
@@ -5,12 +5,12 @@ module Helpers
|
|
5
5
|
|
6
6
|
# Returns a duration in seconds.
|
7
7
|
#
|
8
|
-
def timed
|
9
|
-
block_to_be_measured = lambda
|
8
|
+
def timed *args, &block
|
9
|
+
block_to_be_measured = lambda &block
|
10
10
|
|
11
11
|
time_begin = Time.now.to_f
|
12
12
|
|
13
|
-
block_to_be_measured.call
|
13
|
+
block_to_be_measured.call *args
|
14
14
|
|
15
15
|
Time.now.to_f - time_begin
|
16
16
|
end
|
data/lib/picky/index/bundle.rb
CHANGED
@@ -103,10 +103,11 @@ module Index
|
|
103
103
|
# Copies the indexes to the "backup" directory.
|
104
104
|
#
|
105
105
|
def backup
|
106
|
-
|
107
|
-
FileUtils.
|
108
|
-
FileUtils.cp
|
109
|
-
FileUtils.cp
|
106
|
+
target = backup_path
|
107
|
+
FileUtils.mkdir target unless Dir.exists?(target)
|
108
|
+
FileUtils.cp index_cache_path, target, :verbose => true
|
109
|
+
FileUtils.cp similarity_cache_path, target, :verbose => true
|
110
|
+
FileUtils.cp weights_cache_path, target, :verbose => true
|
110
111
|
end
|
111
112
|
def backup_path
|
112
113
|
File.join File.dirname(index_cache_path), 'backup'
|
data/lib/picky/index/category.rb
CHANGED
@@ -22,8 +22,8 @@ module Index
|
|
22
22
|
@full = options[:full_bundle] || Bundle.new(:full, self, type, Cacher::Partial::None.new, weights, similarity)
|
23
23
|
@partial = options[:partial_bundle] || Bundle.new(:partial, self, type, partial, weights, Cacher::Similarity::None.new)
|
24
24
|
|
25
|
-
@full =
|
26
|
-
@partial =
|
25
|
+
@full = full_lambda.call(@full, @partial) if full_lambda = options[:full_lambda]
|
26
|
+
@partial = partial_lambda.call(@full, @partial) if partial_lambda = options[:partial_lambda]
|
27
27
|
end
|
28
28
|
|
29
29
|
# Loads the index from cache.
|
@@ -40,13 +40,13 @@ module Index
|
|
40
40
|
# Generates all caches for this category.
|
41
41
|
#
|
42
42
|
def generate_caches
|
43
|
-
|
43
|
+
timed_exclaim "Loading data from db for #{identifier}."
|
44
44
|
generate_caches_from_db
|
45
|
-
|
45
|
+
timed_exclaim "Generating partial for #{identifier}."
|
46
46
|
generate_partial
|
47
|
-
|
47
|
+
timed_exclaim "Generating caches from memory for #{identifier}."
|
48
48
|
generate_caches_from_memory
|
49
|
-
|
49
|
+
timed_exclaim "Dumping all caches for #{identifier}."
|
50
50
|
dump_caches
|
51
51
|
end
|
52
52
|
def generate_caches_from_db
|
@@ -62,10 +62,17 @@ module Index
|
|
62
62
|
full.dump
|
63
63
|
partial.dump
|
64
64
|
end
|
65
|
+
# TODO move to Kernel?
|
66
|
+
#
|
67
|
+
def timed_exclaim text
|
68
|
+
exclaim "#{Time.now}: #{text}"
|
69
|
+
end
|
70
|
+
# TODO move to Kernel?
|
71
|
+
#
|
65
72
|
def exclaim text
|
66
73
|
puts text
|
67
74
|
end
|
68
|
-
|
75
|
+
|
69
76
|
# Used for testing.
|
70
77
|
#
|
71
78
|
def generate_indexes_from_full_index
|
data/lib/picky/index/combined.rb
CHANGED
@@ -2,7 +2,12 @@
|
|
2
2
|
#
|
3
3
|
module Index
|
4
4
|
|
5
|
-
#
|
5
|
+
# This index combines a full and partial index.
|
6
|
+
# It serves to order the results such that exact (full) hits are found first.
|
7
|
+
#
|
8
|
+
# TODO Rename full -> exact. exact/partial?
|
9
|
+
#
|
10
|
+
# TODO Need to use the right subtokens. Bake in?
|
6
11
|
#
|
7
12
|
class Combined < Bundle
|
8
13
|
|