picky 0.0.9 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. data/lib/picky/application.rb +38 -37
  2. data/lib/picky/cacher/partial/default.rb +1 -3
  3. data/lib/picky/cacher/partial/subtoken.rb +44 -18
  4. data/lib/picky/configuration/field.rb +6 -2
  5. data/lib/picky/configuration/indexes.rb +16 -7
  6. data/lib/picky/configuration/queries.rb +3 -13
  7. data/lib/picky/extensions/symbol.rb +19 -4
  8. data/lib/picky/generator.rb +9 -0
  9. data/lib/picky/helpers/measuring.rb +3 -3
  10. data/lib/picky/index/bundle.rb +5 -4
  11. data/lib/picky/index/category.rb +14 -7
  12. data/lib/picky/index/combined.rb +6 -1
  13. data/lib/picky/indexers/no_source_specified_error.rb +2 -0
  14. data/lib/picky/indexes.rb +3 -9
  15. data/lib/picky/query/allocation.rb +1 -1
  16. data/lib/picky/query/allocations.rb +2 -2
  17. data/lib/picky/rack/harakiri.rb +10 -8
  18. data/lib/picky/routing.rb +19 -21
  19. data/lib/picky/solr/schema_generator.rb +4 -4
  20. data/lib/picky/sources/base.rb +16 -4
  21. data/lib/picky/sources/csv.rb +3 -0
  22. data/lib/picky/sources/db.rb +30 -22
  23. data/lib/picky/tokenizers/base.rb +7 -5
  24. data/lib/picky/tokenizers/index.rb +5 -5
  25. data/lib/picky/tokenizers/query.rb +9 -9
  26. data/prototype_project/app/application.rb +36 -29
  27. data/prototype_project/app/db.yml +1 -1
  28. data/prototype_project/config.ru +3 -2
  29. data/spec/ext/performant_spec.rb +2 -2
  30. data/spec/lib/application_spec.rb +54 -8
  31. data/spec/lib/cacher/partial/default_spec.rb +15 -0
  32. data/spec/lib/cacher/partial/subtoken_spec.rb +54 -2
  33. data/spec/lib/extensions/symbol_spec.rb +124 -30
  34. data/spec/lib/index/bundle_partial_generation_speed_spec.rb +1 -1
  35. data/spec/lib/query/allocations_spec.rb +5 -5
  36. data/spec/lib/query/combinations_spec.rb +3 -3
  37. data/spec/lib/rack/harakiri_spec.rb +29 -0
  38. data/spec/lib/routing_spec.rb +22 -98
  39. data/spec/lib/tokenizers/index_spec.rb +1 -1
  40. data/spec/specific/speed_spec.rb +4 -5
  41. metadata +7 -3
@@ -1,40 +1,41 @@
1
+ # The Picky application wherein the indexing and querying is defined.
2
+ #
1
3
  class Application
2
-
3
- # An application simply delegates to the routing to handle a request.
4
- #
5
- def self.routing
6
- @routing ||= Routing.new
4
+ class << self
5
+
6
+ # An application simply delegates to the routing to handle a request.
7
+ #
8
+ def call env
9
+ routing.call env
10
+ end
11
+
12
+ # Freezes the routes.
13
+ #
14
+ def finalize
15
+ routing.freeze
16
+ end
17
+ def routing
18
+ @routing ||= Routing.new
19
+ end
20
+ # Routes.
21
+ #
22
+ delegate :route, :root, :to => :routing
23
+
24
+ #
25
+ #
26
+ def indexing
27
+ @indexing ||= Configuration::Indexes.new
28
+ end
29
+ def index *args
30
+ self.type *args
31
+ end
32
+ delegate :type, :field, :to => :indexing
33
+
34
+ #
35
+ #
36
+ def querying
37
+ @queries ||= Configuration::Queries.new
38
+ end
39
+
7
40
  end
8
- def self.call env
9
- routing.call env
10
- end
11
-
12
- #
13
- #
14
- def self.indexes &block
15
- indexes_configuration.instance_eval &block
16
- # TODO Uglyyyyyy.
17
- ::Indexes.configuration = indexes_configuration
18
- ::Indexes.setup # TODO Think about setup/reload.
19
- end
20
- def self.indexes_configuration
21
- @indexes || reset_indexes
22
- end
23
- def self.reset_indexes
24
- @indexes = Configuration::Indexes.new # Is instance a problem?
25
- end
26
-
27
- #
28
- #
29
- def self.queries &block
30
- queries_configuration.instance_eval &block
31
- routing.freeze
32
- end
33
- def self.queries_configuration
34
- @queries || reset_queries
35
- end
36
- def self.reset_queries
37
- @queries = Configuration::Queries.new routing # Is instance a problem?
38
- end
39
-
40
41
  end
@@ -1,7 +1,5 @@
1
1
  module Cacher
2
2
  module Partial
3
- # Default is Subtoken, down to 1.
4
- #
5
- Default = Subtoken.new :down_to => 1
3
+ Default = Subtoken.new :down_to => 1, :starting_at => -1
6
4
  end
7
5
  end
@@ -2,6 +2,29 @@ module Cacher
2
2
 
3
3
  module Partial
4
4
 
5
+ # Generates the right subtokens for use in the subtoken strategy.
6
+ #
7
+ class SubtokenGenerator
8
+
9
+ attr_reader :down_to, :starting_at
10
+
11
+ def initialize down_to, starting_at
12
+ @down_to, @starting_at = down_to, starting_at
13
+
14
+ if @starting_at.zero?
15
+ def each_subtoken token, &block
16
+ token.each_subtoken @down_to, &block
17
+ end
18
+ else
19
+ def each_subtoken token, &block
20
+ token[0..@starting_at].intern.each_subtoken @down_to, &block
21
+ end
22
+ end
23
+
24
+ end
25
+
26
+ end
27
+
5
28
  # The subtoken partial strategy.
6
29
  #
7
30
  # If given
@@ -17,23 +40,27 @@ module Cacher
17
40
  #
18
41
  class Subtoken < Strategy
19
42
 
20
- attr_reader :down_to, :starting_at
21
-
22
43
  # Down to is how far it will go down in generating the subtokens.
23
44
  #
24
45
  # Examples:
25
- # With :hello, and starting_at 0
46
+ # With :hello, and starting_at -1
26
47
  # * down to == 1: [:hello, :hell, :hel, :he, :h]
27
48
  # * down to == 4: [:hello, :hell]
28
49
  #
29
- # With :hello, and starting_at -1
50
+ # With :hello, and starting_at -2
30
51
  # * down to == 1: [:hell, :hel, :he, :h]
31
52
  # * down to == 4: [:hell]
32
53
  #
33
54
  def initialize options = {}
34
- @down_to = options[:down_to] || 1
35
- starting_at = options[:starting_at] || 0
36
- @starting_at = starting_at.zero? ? 0 : starting_at - 1
55
+ down_to = options[:down_to] || 1
56
+ starting_at = options[:starting_at] || -1
57
+ @generator = SubtokenGenerator.new down_to, starting_at
58
+ end
59
+ def down_to
60
+ @generator.down_to
61
+ end
62
+ def starting_at
63
+ @generator.starting_at
37
64
  end
38
65
 
39
66
  # Generates a partial index from the given index.
@@ -43,12 +70,12 @@ module Cacher
43
70
 
44
71
  # Generate for each key token the subtokens.
45
72
  #
46
- i = 5000
73
+ i = 0
47
74
  index.each_key do |token|
48
- i -= 1
49
- if i == 0
75
+ i += 1
76
+ if i == 5000
50
77
  puts "#{Time.now}: Generating partial tokens for token #{token}. This appears every 5000 tokens."
51
- i = 5000
78
+ i = 0
52
79
  end
53
80
  generate_for token, index, result
54
81
  end
@@ -58,7 +85,7 @@ module Cacher
58
85
  # TODO If it is unique for a subtoken, it is
59
86
  # unique for all derived longer tokens.
60
87
  #
61
- result.each_value &:uniq! # Removed because of the set combination operation below
88
+ result.each_value &:uniq!
62
89
 
63
90
  result
64
91
  end
@@ -74,18 +101,17 @@ module Cacher
74
101
  # TODO Could be improved by appending the aforegoing ids?
75
102
  #
76
103
  def generate_for token, index, result
77
- clipped_token = starting_at.zero? ? token : token[0..starting_at].to_sym
78
- clipped_token.subtokens(down_to).each do |subtoken|
104
+ @generator.each_subtoken(token) do |subtoken|
79
105
  if result[subtoken]
80
106
  result[subtoken] += index[token] # unique
81
107
  else
82
- result[subtoken] = index[token].dup
108
+ result[subtoken] = index[token].dup # TODO Spec this dup
83
109
  end
84
110
  end
85
111
  end
86
-
112
+
87
113
  end
88
-
114
+
89
115
  end
90
-
116
+
91
117
  end
@@ -1,5 +1,8 @@
1
1
  module Configuration
2
-
2
+
3
+ # Describes the configuration of a "field", a category
4
+ # (title is a category of a books index, for example).
5
+ #
3
6
  class Field
4
7
  attr_reader :name, :indexed_name, :virtual
5
8
  attr_accessor :type # convenience
@@ -15,7 +18,8 @@ module Configuration
15
18
  @indexed_name = options.delete(:indexed_field) || name # TODO Rename to indexed_as?
16
19
  @virtual = options.delete(:virtual) || false
17
20
 
18
- Query::Qualifiers.add(name, options[:qualifiers]) if options[:qualifiers]
21
+ qualifiers = options[:qualifiers]
22
+ Query::Qualifiers.add(name, qualifiers) if qualifiers
19
23
 
20
24
  # @remove = options[:remove] || false
21
25
  # @filter = options[:filter] || true
@@ -1,11 +1,13 @@
1
1
  module Configuration
2
-
2
+
3
+ # Describes the container for all index configurations.
4
+ #
3
5
  class Indexes
4
6
 
5
7
  attr_reader :types
6
8
 
7
- def initialize *types
8
- @types = types
9
+ def initialize
10
+ @types = []
9
11
  end
10
12
 
11
13
  #
@@ -16,12 +18,19 @@ module Configuration
16
18
 
17
19
  # Delegates
18
20
  #
19
- delegate :illegal_characters, :contract_expressions, :stopwords, :split_text_on, :normalize_words, :illegal_characters_after_splitting, :to => :default_index
21
+ delegate :removes_characters, :contracts_expressions, :stopwords, :splits_text_on, :normalizes_words, :removes_characters_after_splitting, :to => :default_index
20
22
 
21
- def type name, *fields
22
- types << Type.new(name, *fields)
23
+ # TODO Rewrite all this configuration handling.
24
+ #
25
+ def type name, source, *fields
26
+ new_type = Type.new name, source, *fields
27
+ types << new_type
28
+ ::Indexes.configuration ||= self
29
+
30
+ generated = new_type.generate
31
+ ::Indexes.add generated
32
+ generated
23
33
  end
24
- alias add_index type
25
34
  def field name, options = {}
26
35
  Field.new name, options
27
36
  end
@@ -1,31 +1,21 @@
1
1
  module Configuration
2
2
 
3
+ #
4
+ #
3
5
  class Queries
4
6
 
5
- attr_reader :routing
6
-
7
- #
8
- #
9
- def initialize routing
10
- @routing = routing
11
- end
12
-
13
7
  #
14
8
  #
15
9
  def default_index
16
10
  Tokenizers::Query
17
11
  end
18
-
19
- # Routes.
20
- #
21
- delegate :defaults, :route, :live, :full, :root, :default, :to => :routing
12
+ delegate :removes_characters, :contracts_expressions, :stopwords, :splits_text_on, :normalizes_words, :removes_characters_after_splitting, :to => :default_index
22
13
 
23
14
  # Delegates.
24
15
  #
25
16
  def maximum_tokens amount
26
17
  Query::Tokens.maximum = amount
27
18
  end
28
- delegate :illegal_characters, :contract_expressions, :stopwords, :split_text_on, :normalize_words, :illegal_characters_after_splitting, :to => :default_index
29
19
 
30
20
  end
31
21
 
@@ -2,17 +2,32 @@
2
2
  #
3
3
  class Symbol
4
4
 
5
- # :keys.subtokens # => [:key, :ke, :k]
6
- # :keys.subtokens(2) # => [:key, :ke]
5
+ # :keys.subtokens # => [:keys, :key, :ke, :k]
6
+ # :keys.subtokens(2) # => [:keys, :key, :ke]
7
7
  #
8
8
  def subtokens down_to_length = 1
9
- sub, result = self.to_s, [self]
9
+ sub = self.id2name
10
10
 
11
11
  size = sub.size
12
+ down_to_length = size + down_to_length if down_to_length < 0
12
13
  down_to_length = size if size < down_to_length
13
14
 
14
- size.downto(down_to_length + 1) { result << sub.chop!.to_sym }
15
+ result = [self]
16
+ size.downto(down_to_length + 1) { result << sub.chop!.intern }
15
17
  result
16
18
  end
17
19
 
20
+ # TODO Duplicate code.
21
+ #
22
+ def each_subtoken down_to_length = 1
23
+ sub = self.id2name
24
+
25
+ size = sub.size
26
+ down_to_length = size + down_to_length if down_to_length < 0
27
+ down_to_length = size if size < down_to_length
28
+
29
+ yield self
30
+ size.downto(down_to_length + 1) { yield sub.chop!.intern }
31
+ end
32
+
18
33
  end
@@ -2,6 +2,10 @@ require 'fileutils'
2
2
 
3
3
  module Picky
4
4
 
5
+ # Thrown when no generator for the command
6
+ # picky <command> <options>
7
+ # is found.
8
+ #
5
9
  class NoGeneratorException < Exception; end
6
10
 
7
11
  # This is a very simple project generator.
@@ -43,6 +47,11 @@ module Picky
43
47
  klass.new *args
44
48
  end
45
49
 
50
+ # Generates a new Picky project.
51
+ #
52
+ # Example:
53
+ # > picky project my_lovely_project
54
+ #
46
55
  class Project
47
56
 
48
57
  attr_reader :name, :prototype_project_basedir
@@ -5,12 +5,12 @@ module Helpers
5
5
 
6
6
  # Returns a duration in seconds.
7
7
  #
8
- def timed(*args, &block)
9
- block_to_be_measured = lambda(&block)
8
+ def timed *args, &block
9
+ block_to_be_measured = lambda &block
10
10
 
11
11
  time_begin = Time.now.to_f
12
12
 
13
- block_to_be_measured.call(*args)
13
+ block_to_be_measured.call *args
14
14
 
15
15
  Time.now.to_f - time_begin
16
16
  end
@@ -103,10 +103,11 @@ module Index
103
103
  # Copies the indexes to the "backup" directory.
104
104
  #
105
105
  def backup
106
- FileUtils.mkdir backup_path unless Dir.exists?(backup_path)
107
- FileUtils.cp index_cache_path, backup_path, :verbose => true
108
- FileUtils.cp similarity_cache_path, backup_path, :verbose => true
109
- FileUtils.cp weights_cache_path, backup_path, :verbose => true
106
+ target = backup_path
107
+ FileUtils.mkdir target unless Dir.exists?(target)
108
+ FileUtils.cp index_cache_path, target, :verbose => true
109
+ FileUtils.cp similarity_cache_path, target, :verbose => true
110
+ FileUtils.cp weights_cache_path, target, :verbose => true
110
111
  end
111
112
  def backup_path
112
113
  File.join File.dirname(index_cache_path), 'backup'
@@ -22,8 +22,8 @@ module Index
22
22
  @full = options[:full_bundle] || Bundle.new(:full, self, type, Cacher::Partial::None.new, weights, similarity)
23
23
  @partial = options[:partial_bundle] || Bundle.new(:partial, self, type, partial, weights, Cacher::Similarity::None.new)
24
24
 
25
- @full = options[:full_lambda].call(@full, @partial) if options[:full_lambda]
26
- @partial = options[:partial_lambda].call(@full, @partial) if options[:partial_lambda]
25
+ @full = full_lambda.call(@full, @partial) if full_lambda = options[:full_lambda]
26
+ @partial = partial_lambda.call(@full, @partial) if partial_lambda = options[:partial_lambda]
27
27
  end
28
28
 
29
29
  # Loads the index from cache.
@@ -40,13 +40,13 @@ module Index
40
40
  # Generates all caches for this category.
41
41
  #
42
42
  def generate_caches
43
- exclaim "#{Time.now}: Loading data from db for #{identifier}."
43
+ timed_exclaim "Loading data from db for #{identifier}."
44
44
  generate_caches_from_db
45
- exclaim "#{Time.now}: Generating partial for #{identifier}."
45
+ timed_exclaim "Generating partial for #{identifier}."
46
46
  generate_partial
47
- exclaim "#{Time.now}: Generating caches from memory for #{identifier}."
47
+ timed_exclaim "Generating caches from memory for #{identifier}."
48
48
  generate_caches_from_memory
49
- exclaim "#{Time.now}: Dumping all caches for #{identifier}."
49
+ timed_exclaim "Dumping all caches for #{identifier}."
50
50
  dump_caches
51
51
  end
52
52
  def generate_caches_from_db
@@ -62,10 +62,17 @@ module Index
62
62
  full.dump
63
63
  partial.dump
64
64
  end
65
+ # TODO move to Kernel?
66
+ #
67
+ def timed_exclaim text
68
+ exclaim "#{Time.now}: #{text}"
69
+ end
70
+ # TODO move to Kernel?
71
+ #
65
72
  def exclaim text
66
73
  puts text
67
74
  end
68
-
75
+
69
76
  # Used for testing.
70
77
  #
71
78
  def generate_indexes_from_full_index
@@ -2,7 +2,12 @@
2
2
  #
3
3
  module Index
4
4
 
5
- #
5
+ # This index combines a full and partial index.
6
+ # It serves to order the results such that exact (full) hits are found first.
7
+ #
8
+ # TODO Rename full -> exact. exact/partial?
9
+ #
10
+ # TODO Need to use the right subtokens. Bake in?
6
11
  #
7
12
  class Combined < Bundle
8
13