picky 0.0.9 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (41) hide show
  1. data/lib/picky/application.rb +38 -37
  2. data/lib/picky/cacher/partial/default.rb +1 -3
  3. data/lib/picky/cacher/partial/subtoken.rb +44 -18
  4. data/lib/picky/configuration/field.rb +6 -2
  5. data/lib/picky/configuration/indexes.rb +16 -7
  6. data/lib/picky/configuration/queries.rb +3 -13
  7. data/lib/picky/extensions/symbol.rb +19 -4
  8. data/lib/picky/generator.rb +9 -0
  9. data/lib/picky/helpers/measuring.rb +3 -3
  10. data/lib/picky/index/bundle.rb +5 -4
  11. data/lib/picky/index/category.rb +14 -7
  12. data/lib/picky/index/combined.rb +6 -1
  13. data/lib/picky/indexers/no_source_specified_error.rb +2 -0
  14. data/lib/picky/indexes.rb +3 -9
  15. data/lib/picky/query/allocation.rb +1 -1
  16. data/lib/picky/query/allocations.rb +2 -2
  17. data/lib/picky/rack/harakiri.rb +10 -8
  18. data/lib/picky/routing.rb +19 -21
  19. data/lib/picky/solr/schema_generator.rb +4 -4
  20. data/lib/picky/sources/base.rb +16 -4
  21. data/lib/picky/sources/csv.rb +3 -0
  22. data/lib/picky/sources/db.rb +30 -22
  23. data/lib/picky/tokenizers/base.rb +7 -5
  24. data/lib/picky/tokenizers/index.rb +5 -5
  25. data/lib/picky/tokenizers/query.rb +9 -9
  26. data/prototype_project/app/application.rb +36 -29
  27. data/prototype_project/app/db.yml +1 -1
  28. data/prototype_project/config.ru +3 -2
  29. data/spec/ext/performant_spec.rb +2 -2
  30. data/spec/lib/application_spec.rb +54 -8
  31. data/spec/lib/cacher/partial/default_spec.rb +15 -0
  32. data/spec/lib/cacher/partial/subtoken_spec.rb +54 -2
  33. data/spec/lib/extensions/symbol_spec.rb +124 -30
  34. data/spec/lib/index/bundle_partial_generation_speed_spec.rb +1 -1
  35. data/spec/lib/query/allocations_spec.rb +5 -5
  36. data/spec/lib/query/combinations_spec.rb +3 -3
  37. data/spec/lib/rack/harakiri_spec.rb +29 -0
  38. data/spec/lib/routing_spec.rb +22 -98
  39. data/spec/lib/tokenizers/index_spec.rb +1 -1
  40. data/spec/specific/speed_spec.rb +4 -5
  41. metadata +7 -3
@@ -1,40 +1,41 @@
1
+ # The Picky application wherein the indexing and querying is defined.
2
+ #
1
3
  class Application
2
-
3
- # An application simply delegates to the routing to handle a request.
4
- #
5
- def self.routing
6
- @routing ||= Routing.new
4
+ class << self
5
+
6
+ # An application simply delegates to the routing to handle a request.
7
+ #
8
+ def call env
9
+ routing.call env
10
+ end
11
+
12
+ # Freezes the routes.
13
+ #
14
+ def finalize
15
+ routing.freeze
16
+ end
17
+ def routing
18
+ @routing ||= Routing.new
19
+ end
20
+ # Routes.
21
+ #
22
+ delegate :route, :root, :to => :routing
23
+
24
+ #
25
+ #
26
+ def indexing
27
+ @indexing ||= Configuration::Indexes.new
28
+ end
29
+ def index *args
30
+ self.type *args
31
+ end
32
+ delegate :type, :field, :to => :indexing
33
+
34
+ #
35
+ #
36
+ def querying
37
+ @queries ||= Configuration::Queries.new
38
+ end
39
+
7
40
  end
8
- def self.call env
9
- routing.call env
10
- end
11
-
12
- #
13
- #
14
- def self.indexes &block
15
- indexes_configuration.instance_eval &block
16
- # TODO Uglyyyyyy.
17
- ::Indexes.configuration = indexes_configuration
18
- ::Indexes.setup # TODO Think about setup/reload.
19
- end
20
- def self.indexes_configuration
21
- @indexes || reset_indexes
22
- end
23
- def self.reset_indexes
24
- @indexes = Configuration::Indexes.new # Is instance a problem?
25
- end
26
-
27
- #
28
- #
29
- def self.queries &block
30
- queries_configuration.instance_eval &block
31
- routing.freeze
32
- end
33
- def self.queries_configuration
34
- @queries || reset_queries
35
- end
36
- def self.reset_queries
37
- @queries = Configuration::Queries.new routing # Is instance a problem?
38
- end
39
-
40
41
  end
@@ -1,7 +1,5 @@
1
1
  module Cacher
2
2
  module Partial
3
- # Default is Subtoken, down to 1.
4
- #
5
- Default = Subtoken.new :down_to => 1
3
+ Default = Subtoken.new :down_to => 1, :starting_at => -1
6
4
  end
7
5
  end
@@ -2,6 +2,29 @@ module Cacher
2
2
 
3
3
  module Partial
4
4
 
5
+ # Generates the right subtokens for use in the subtoken strategy.
6
+ #
7
+ class SubtokenGenerator
8
+
9
+ attr_reader :down_to, :starting_at
10
+
11
+ def initialize down_to, starting_at
12
+ @down_to, @starting_at = down_to, starting_at
13
+
14
+ if @starting_at.zero?
15
+ def each_subtoken token, &block
16
+ token.each_subtoken @down_to, &block
17
+ end
18
+ else
19
+ def each_subtoken token, &block
20
+ token[0..@starting_at].intern.each_subtoken @down_to, &block
21
+ end
22
+ end
23
+
24
+ end
25
+
26
+ end
27
+
5
28
  # The subtoken partial strategy.
6
29
  #
7
30
  # If given
@@ -17,23 +40,27 @@ module Cacher
17
40
  #
18
41
  class Subtoken < Strategy
19
42
 
20
- attr_reader :down_to, :starting_at
21
-
22
43
  # Down to is how far it will go down in generating the subtokens.
23
44
  #
24
45
  # Examples:
25
- # With :hello, and starting_at 0
46
+ # With :hello, and starting_at -1
26
47
  # * down to == 1: [:hello, :hell, :hel, :he, :h]
27
48
  # * down to == 4: [:hello, :hell]
28
49
  #
29
- # With :hello, and starting_at -1
50
+ # With :hello, and starting_at -2
30
51
  # * down to == 1: [:hell, :hel, :he, :h]
31
52
  # * down to == 4: [:hell]
32
53
  #
33
54
  def initialize options = {}
34
- @down_to = options[:down_to] || 1
35
- starting_at = options[:starting_at] || 0
36
- @starting_at = starting_at.zero? ? 0 : starting_at - 1
55
+ down_to = options[:down_to] || 1
56
+ starting_at = options[:starting_at] || -1
57
+ @generator = SubtokenGenerator.new down_to, starting_at
58
+ end
59
+ def down_to
60
+ @generator.down_to
61
+ end
62
+ def starting_at
63
+ @generator.starting_at
37
64
  end
38
65
 
39
66
  # Generates a partial index from the given index.
@@ -43,12 +70,12 @@ module Cacher
43
70
 
44
71
  # Generate for each key token the subtokens.
45
72
  #
46
- i = 5000
73
+ i = 0
47
74
  index.each_key do |token|
48
- i -= 1
49
- if i == 0
75
+ i += 1
76
+ if i == 5000
50
77
  puts "#{Time.now}: Generating partial tokens for token #{token}. This appears every 5000 tokens."
51
- i = 5000
78
+ i = 0
52
79
  end
53
80
  generate_for token, index, result
54
81
  end
@@ -58,7 +85,7 @@ module Cacher
58
85
  # TODO If it is unique for a subtoken, it is
59
86
  # unique for all derived longer tokens.
60
87
  #
61
- result.each_value &:uniq! # Removed because of the set combination operation below
88
+ result.each_value &:uniq!
62
89
 
63
90
  result
64
91
  end
@@ -74,18 +101,17 @@ module Cacher
74
101
  # TODO Could be improved by appending the aforegoing ids?
75
102
  #
76
103
  def generate_for token, index, result
77
- clipped_token = starting_at.zero? ? token : token[0..starting_at].to_sym
78
- clipped_token.subtokens(down_to).each do |subtoken|
104
+ @generator.each_subtoken(token) do |subtoken|
79
105
  if result[subtoken]
80
106
  result[subtoken] += index[token] # unique
81
107
  else
82
- result[subtoken] = index[token].dup
108
+ result[subtoken] = index[token].dup # TODO Spec this dup
83
109
  end
84
110
  end
85
111
  end
86
-
112
+
87
113
  end
88
-
114
+
89
115
  end
90
-
116
+
91
117
  end
@@ -1,5 +1,8 @@
1
1
  module Configuration
2
-
2
+
3
+ # Describes the configuration of a "field", a category
4
+ # (title is a category of a books index, for example).
5
+ #
3
6
  class Field
4
7
  attr_reader :name, :indexed_name, :virtual
5
8
  attr_accessor :type # convenience
@@ -15,7 +18,8 @@ module Configuration
15
18
  @indexed_name = options.delete(:indexed_field) || name # TODO Rename to indexed_as?
16
19
  @virtual = options.delete(:virtual) || false
17
20
 
18
- Query::Qualifiers.add(name, options[:qualifiers]) if options[:qualifiers]
21
+ qualifiers = options[:qualifiers]
22
+ Query::Qualifiers.add(name, qualifiers) if qualifiers
19
23
 
20
24
  # @remove = options[:remove] || false
21
25
  # @filter = options[:filter] || true
@@ -1,11 +1,13 @@
1
1
  module Configuration
2
-
2
+
3
+ # Describes the container for all index configurations.
4
+ #
3
5
  class Indexes
4
6
 
5
7
  attr_reader :types
6
8
 
7
- def initialize *types
8
- @types = types
9
+ def initialize
10
+ @types = []
9
11
  end
10
12
 
11
13
  #
@@ -16,12 +18,19 @@ module Configuration
16
18
 
17
19
  # Delegates
18
20
  #
19
- delegate :illegal_characters, :contract_expressions, :stopwords, :split_text_on, :normalize_words, :illegal_characters_after_splitting, :to => :default_index
21
+ delegate :removes_characters, :contracts_expressions, :stopwords, :splits_text_on, :normalizes_words, :removes_characters_after_splitting, :to => :default_index
20
22
 
21
- def type name, *fields
22
- types << Type.new(name, *fields)
23
+ # TODO Rewrite all this configuration handling.
24
+ #
25
+ def type name, source, *fields
26
+ new_type = Type.new name, source, *fields
27
+ types << new_type
28
+ ::Indexes.configuration ||= self
29
+
30
+ generated = new_type.generate
31
+ ::Indexes.add generated
32
+ generated
23
33
  end
24
- alias add_index type
25
34
  def field name, options = {}
26
35
  Field.new name, options
27
36
  end
@@ -1,31 +1,21 @@
1
1
  module Configuration
2
2
 
3
+ #
4
+ #
3
5
  class Queries
4
6
 
5
- attr_reader :routing
6
-
7
- #
8
- #
9
- def initialize routing
10
- @routing = routing
11
- end
12
-
13
7
  #
14
8
  #
15
9
  def default_index
16
10
  Tokenizers::Query
17
11
  end
18
-
19
- # Routes.
20
- #
21
- delegate :defaults, :route, :live, :full, :root, :default, :to => :routing
12
+ delegate :removes_characters, :contracts_expressions, :stopwords, :splits_text_on, :normalizes_words, :removes_characters_after_splitting, :to => :default_index
22
13
 
23
14
  # Delegates.
24
15
  #
25
16
  def maximum_tokens amount
26
17
  Query::Tokens.maximum = amount
27
18
  end
28
- delegate :illegal_characters, :contract_expressions, :stopwords, :split_text_on, :normalize_words, :illegal_characters_after_splitting, :to => :default_index
29
19
 
30
20
  end
31
21
 
@@ -2,17 +2,32 @@
2
2
  #
3
3
  class Symbol
4
4
 
5
- # :keys.subtokens # => [:key, :ke, :k]
6
- # :keys.subtokens(2) # => [:key, :ke]
5
+ # :keys.subtokens # => [:keys, :key, :ke, :k]
6
+ # :keys.subtokens(2) # => [:keys, :key, :ke]
7
7
  #
8
8
  def subtokens down_to_length = 1
9
- sub, result = self.to_s, [self]
9
+ sub = self.id2name
10
10
 
11
11
  size = sub.size
12
+ down_to_length = size + down_to_length if down_to_length < 0
12
13
  down_to_length = size if size < down_to_length
13
14
 
14
- size.downto(down_to_length + 1) { result << sub.chop!.to_sym }
15
+ result = [self]
16
+ size.downto(down_to_length + 1) { result << sub.chop!.intern }
15
17
  result
16
18
  end
17
19
 
20
+ # TODO Duplicate code.
21
+ #
22
+ def each_subtoken down_to_length = 1
23
+ sub = self.id2name
24
+
25
+ size = sub.size
26
+ down_to_length = size + down_to_length if down_to_length < 0
27
+ down_to_length = size if size < down_to_length
28
+
29
+ yield self
30
+ size.downto(down_to_length + 1) { yield sub.chop!.intern }
31
+ end
32
+
18
33
  end
@@ -2,6 +2,10 @@ require 'fileutils'
2
2
 
3
3
  module Picky
4
4
 
5
+ # Thrown when no generator for the command
6
+ # picky <command> <options>
7
+ # is found.
8
+ #
5
9
  class NoGeneratorException < Exception; end
6
10
 
7
11
  # This is a very simple project generator.
@@ -43,6 +47,11 @@ module Picky
43
47
  klass.new *args
44
48
  end
45
49
 
50
+ # Generates a new Picky project.
51
+ #
52
+ # Example:
53
+ # > picky project my_lovely_project
54
+ #
46
55
  class Project
47
56
 
48
57
  attr_reader :name, :prototype_project_basedir
@@ -5,12 +5,12 @@ module Helpers
5
5
 
6
6
  # Returns a duration in seconds.
7
7
  #
8
- def timed(*args, &block)
9
- block_to_be_measured = lambda(&block)
8
+ def timed *args, &block
9
+ block_to_be_measured = lambda &block
10
10
 
11
11
  time_begin = Time.now.to_f
12
12
 
13
- block_to_be_measured.call(*args)
13
+ block_to_be_measured.call *args
14
14
 
15
15
  Time.now.to_f - time_begin
16
16
  end
@@ -103,10 +103,11 @@ module Index
103
103
  # Copies the indexes to the "backup" directory.
104
104
  #
105
105
  def backup
106
- FileUtils.mkdir backup_path unless Dir.exists?(backup_path)
107
- FileUtils.cp index_cache_path, backup_path, :verbose => true
108
- FileUtils.cp similarity_cache_path, backup_path, :verbose => true
109
- FileUtils.cp weights_cache_path, backup_path, :verbose => true
106
+ target = backup_path
107
+ FileUtils.mkdir target unless Dir.exists?(target)
108
+ FileUtils.cp index_cache_path, target, :verbose => true
109
+ FileUtils.cp similarity_cache_path, target, :verbose => true
110
+ FileUtils.cp weights_cache_path, target, :verbose => true
110
111
  end
111
112
  def backup_path
112
113
  File.join File.dirname(index_cache_path), 'backup'
@@ -22,8 +22,8 @@ module Index
22
22
  @full = options[:full_bundle] || Bundle.new(:full, self, type, Cacher::Partial::None.new, weights, similarity)
23
23
  @partial = options[:partial_bundle] || Bundle.new(:partial, self, type, partial, weights, Cacher::Similarity::None.new)
24
24
 
25
- @full = options[:full_lambda].call(@full, @partial) if options[:full_lambda]
26
- @partial = options[:partial_lambda].call(@full, @partial) if options[:partial_lambda]
25
+ @full = full_lambda.call(@full, @partial) if full_lambda = options[:full_lambda]
26
+ @partial = partial_lambda.call(@full, @partial) if partial_lambda = options[:partial_lambda]
27
27
  end
28
28
 
29
29
  # Loads the index from cache.
@@ -40,13 +40,13 @@ module Index
40
40
  # Generates all caches for this category.
41
41
  #
42
42
  def generate_caches
43
- exclaim "#{Time.now}: Loading data from db for #{identifier}."
43
+ timed_exclaim "Loading data from db for #{identifier}."
44
44
  generate_caches_from_db
45
- exclaim "#{Time.now}: Generating partial for #{identifier}."
45
+ timed_exclaim "Generating partial for #{identifier}."
46
46
  generate_partial
47
- exclaim "#{Time.now}: Generating caches from memory for #{identifier}."
47
+ timed_exclaim "Generating caches from memory for #{identifier}."
48
48
  generate_caches_from_memory
49
- exclaim "#{Time.now}: Dumping all caches for #{identifier}."
49
+ timed_exclaim "Dumping all caches for #{identifier}."
50
50
  dump_caches
51
51
  end
52
52
  def generate_caches_from_db
@@ -62,10 +62,17 @@ module Index
62
62
  full.dump
63
63
  partial.dump
64
64
  end
65
+ # TODO move to Kernel?
66
+ #
67
+ def timed_exclaim text
68
+ exclaim "#{Time.now}: #{text}"
69
+ end
70
+ # TODO move to Kernel?
71
+ #
65
72
  def exclaim text
66
73
  puts text
67
74
  end
68
-
75
+
69
76
  # Used for testing.
70
77
  #
71
78
  def generate_indexes_from_full_index
@@ -2,7 +2,12 @@
2
2
  #
3
3
  module Index
4
4
 
5
- #
5
+ # This index combines a full and partial index.
6
+ # It serves to order the results such that exact (full) hits are found first.
7
+ #
8
+ # TODO Rename full -> exact. exact/partial?
9
+ #
10
+ # TODO Need to use the right subtokens. Bake in?
6
11
  #
7
12
  class Combined < Bundle
8
13