picky 0.10.5 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. data/lib/picky/alias_instances.rb +1 -0
  2. data/lib/picky/application.rb +6 -7
  3. data/lib/picky/bundle.rb +31 -0
  4. data/lib/picky/configuration/indexes.rb +30 -41
  5. data/lib/picky/configuration/type.rb +6 -40
  6. data/lib/picky/ext/maybe_compile.rb +9 -0
  7. data/lib/picky/index/bundle.rb +1 -139
  8. data/lib/picky/{query/combinator.rb → index/categories.rb} +16 -18
  9. data/lib/picky/index/category.rb +20 -46
  10. data/lib/picky/index/type.rb +16 -12
  11. data/lib/picky/index/types.rb +41 -0
  12. data/lib/picky/index/wrappers/exact_first.rb +5 -1
  13. data/lib/picky/indexers/base.rb +9 -8
  14. data/lib/picky/indexing/bundle.rb +152 -0
  15. data/lib/picky/indexing/categories.rb +36 -0
  16. data/lib/picky/indexing/category.rb +145 -0
  17. data/lib/picky/indexing/type.rb +45 -0
  18. data/lib/picky/indexing/types.rb +74 -0
  19. data/lib/picky/loader.rb +17 -7
  20. data/lib/picky/query/base.rb +5 -4
  21. data/lib/picky/sources/wrappers/base.rb +23 -0
  22. data/lib/picky/sources/wrappers/location.rb +92 -0
  23. data/lib/picky/tokenizers/index.rb +4 -1
  24. data/lib/picky/type.rb +46 -0
  25. data/lib/picky/types.rb +38 -0
  26. data/lib/tasks/index.rake +4 -0
  27. data/project_prototype/Gemfile +1 -1
  28. data/project_prototype/app/application.rb +12 -12
  29. data/spec/lib/application_spec.rb +6 -9
  30. data/spec/lib/configuration/indexes_spec.rb +0 -85
  31. data/spec/lib/index/bundle_spec.rb +2 -94
  32. data/spec/lib/index/category_spec.rb +7 -86
  33. data/spec/lib/index/type_spec.rb +14 -26
  34. data/spec/lib/index/wrappers/exact_first_spec.rb +12 -12
  35. data/spec/lib/{index → indexing}/bundle_partial_generation_speed_spec.rb +2 -2
  36. data/spec/lib/indexing/bundle_spec.rb +174 -0
  37. data/spec/lib/{query/combinator_spec.rb → indexing/categories_spec.rb} +30 -34
  38. data/spec/lib/indexing/category_spec.rb +257 -0
  39. data/spec/lib/indexing/type_spec.rb +32 -0
  40. data/spec/lib/loader_spec.rb +0 -2
  41. data/spec/lib/query/base_spec.rb +8 -17
  42. data/spec/lib/query/full_spec.rb +3 -6
  43. data/spec/lib/query/live_spec.rb +4 -3
  44. data/spec/lib/sources/wrappers/base_spec.rb +35 -0
  45. data/spec/lib/sources/wrappers/location_spec.rb +68 -0
  46. data/spec/lib/tokenizers/index_spec.rb +2 -5
  47. metadata +32 -16
  48. data/lib/picky/configuration/field.rb +0 -73
  49. data/lib/picky/indexes.rb +0 -179
  50. data/lib/picky/initializers/ext.rb +0 -1
  51. data/spec/lib/configuration/field_spec.rb +0 -208
  52. data/spec/lib/configuration/type_spec.rb +0 -49
@@ -0,0 +1 @@
1
+ Indexes = Types.new
@@ -21,13 +21,6 @@ class Application
21
21
  # Routes.
22
22
  #
23
23
  delegate :route, :root, :to => :routing
24
- # Index, Field.
25
- #
26
- # TODO Rename category.
27
- #
28
- delegate :field, :to => :indexing
29
- def category *args; indexing.field *args; end
30
- def index *args; indexing.define_index *args; end
31
24
 
32
25
  # An application simply delegates to the routing to handle a request.
33
26
  #
@@ -44,6 +37,12 @@ class Application
44
37
  @queries ||= Configuration::Queries.new
45
38
  end
46
39
 
40
+ # "API".
41
+ #
42
+ def index *args
43
+ ::Type.new *args
44
+ end
45
+
47
46
  # Finalize the subclass as soon as it
48
47
  # has finished loading.
49
48
  #
@@ -0,0 +1,31 @@
1
+ class Bundle
2
+
3
+ attr_reader :identifier, :files
4
+ attr_accessor :index, :weights, :similarity, :similarity_strategy
5
+
6
+ delegate :[], :[]=, :clear, :to => :index
7
+
8
+ def initialize name, category, type, similarity_strategy
9
+ @identifier = "#{name}: #{type.name} #{category.name}"
10
+
11
+ @index = {}
12
+ @weights = {}
13
+ @similarity = {}
14
+
15
+ @similarity_strategy = similarity_strategy
16
+
17
+ # TODO inject files.
18
+ #
19
+ # TODO Move Files somewhere. Shared?
20
+ #
21
+ @files = Index::Files.new name, category.name, type.name
22
+ end
23
+
24
+ # Get a list of similar texts.
25
+ #
26
+ def similar text
27
+ code = similarity_strategy.encoded text
28
+ code && @similarity[code] || []
29
+ end
30
+
31
+ end
@@ -1,3 +1,5 @@
1
+ # TODO Remove?
2
+ #
1
3
  module Configuration
2
4
 
3
5
  # Describes the container for all index configurations.
@@ -14,48 +16,35 @@ module Configuration
14
16
  Tokenizers::Index.default = Tokenizers::Index.new(options)
15
17
  end
16
18
 
17
- # TODO Rewrite all this configuration handling.
19
+ # TODO Move this to … where?
18
20
  #
19
- def define_index name, source, *fields
20
- new_type = Type.new name, source, *fields
21
- types << new_type
22
- ::Indexes.configuration ||= self
23
-
24
- generated = new_type.generate
25
- ::Indexes.add generated
26
- generated
27
- end
28
- def field name, options = {}
29
- Field.new name, options
30
- end
31
-
32
- #
33
- #
34
- def take_snapshot *type_names
35
- only_if_included_in type_names do |type|
36
- type.take_snapshot
37
- end
38
- end
39
- def index *type_names
40
- only_if_included_in type_names do |type|
41
- type.index
42
- end
43
- end
44
- def index_solr *type_names
45
- only_if_included_in type_names do |type|
46
- type.index_solr
47
- end
48
- end
49
-
50
- #
51
- #
52
- def only_if_included_in type_names = []
53
- type_names = types.map(&:name) if type_names.empty?
54
- types.each do |type|
55
- next unless type_names.include?(type.name)
56
- yield type
57
- end
58
- end
21
+ # #
22
+ # #
23
+ # def take_snapshot *type_names
24
+ # only_if_included_in type_names do |type|
25
+ # type.take_snapshot
26
+ # end
27
+ # end
28
+ # def index *type_names
29
+ # only_if_included_in type_names do |type|
30
+ # type.index
31
+ # end
32
+ # end
33
+ # def index_solr *type_names
34
+ # only_if_included_in type_names do |type|
35
+ # type.index_solr
36
+ # end
37
+ # end
38
+ #
39
+ # #
40
+ # #
41
+ # def only_if_included_in type_names = []
42
+ # type_names = types.map(&:name) if type_names.empty?
43
+ # types.each do |type|
44
+ # next unless type_names.include?(type.name)
45
+ # yield type
46
+ # end
47
+ # end
59
48
 
60
49
  end
61
50
 
@@ -2,53 +2,19 @@ module Configuration
2
2
  class Type
3
3
  attr_reader :name,
4
4
  :source,
5
- :fields,
6
- :after_indexing,
7
5
  :result_type,
8
- :ignore_unassigned_tokens,
9
- :solr
10
- def initialize name, source, *fields, options
11
- if Configuration::Field === options
12
- fields << options
13
- options = {}
14
- end
15
-
6
+ :after_indexing,
7
+ :ignore_unassigned_tokens
8
+ def initialize name, source, options
16
9
  @name = name
17
10
  @source = source
18
- # dup, if field is reused. TODO Rewrite.
19
- @fields = fields.map { |field| field = field.dup; field.type = self; field }
20
11
 
21
- @after_indexing = options[:after_indexing]
22
12
  @result_type = options[:result_type] || name
23
- @ignore_unassigned_tokens = options[:ignore_unassigned_tokens] || false # TODO Move to query?
24
- # @solr = options[:solr] || nil
13
+ @after_indexing = options[:after_indexing] # Where do I use this?
14
+ @ignore_unassigned_tokens = options[:ignore_unassigned_tokens] || false # TODO Move to query?
25
15
  end
26
16
  def generate
27
- categories = fields.map { |field| field.generate }
28
- Index::Type.new name, result_type, ignore_unassigned_tokens, *categories
29
- end
30
- def take_snapshot
31
- source.take_snapshot self
32
- end
33
- def index
34
- fields.each do |field|
35
- field.index
36
- end
37
- end
38
- # def solr_fields
39
- # solr ? fields.select { |field| !field.virtual? } : []
40
- # end
41
- # # TODO Delegate to Solr handler.
42
- # #
43
- # def index_solr
44
- # return unless solr
45
- # @indexer = Indexers::Solr.new self
46
- # @indexer.index
47
- # end
48
- # TODO Spec!
49
- #
50
- def connect_backend
51
- @source.connect_backend
17
+ Index::Type.new name, source, result_type, ignore_unassigned_tokens
52
18
  end
53
19
  end
54
20
  end
@@ -0,0 +1,9 @@
1
+ begin
2
+ require File.expand_path '../ruby19/performant', __FILE__
3
+ rescue LoadError
4
+ require File.expand_path '../ruby19/extconf.rb', __FILE__
5
+ Dir.chdir File.expand_path('../ruby19', __FILE__) do
6
+ %x{ ruby extconf.rb && make }
7
+ end
8
+ retry
9
+ end
@@ -8,35 +8,7 @@ module Index
8
8
  #
9
9
  # Delegates file handling and checking to a Index::Files object.
10
10
  #
11
- class Bundle
12
-
13
- attr_reader :identifier, :category
14
- attr_accessor :index, :weights, :similarity
15
- attr_accessor :partial_strategy, :weights_strategy, :similarity_strategy
16
- attr_reader :files
17
-
18
- delegate :[], :[]=, :clear, :to => :index
19
- delegate :raise_unless_cache_exists, :to => :checker
20
-
21
- # Path is in which directory the cache is located.
22
- #
23
- def initialize name, category, type, partial_strategy, weights_strategy, similarity_strategy
24
- @identifier = "#{name}: #{type.name} #{category.name}"
25
-
26
- @index = {}
27
- @weights = {}
28
- @similarity = {}
29
-
30
- # TODO Used in weights, try to remove!
31
- #
32
- @category = category
33
-
34
- @partial_strategy = partial_strategy
35
- @weights_strategy = weights_strategy
36
- @similarity_strategy = similarity_strategy
37
-
38
- @files = Files.new name, category.name, type.name
39
- end
11
+ class Bundle < ::Bundle
40
12
 
41
13
  # Get the ids for the text.
42
14
  #
@@ -48,43 +20,6 @@ module Index
48
20
  def weight text
49
21
  @weights[text]
50
22
  end
51
- # Get a list of similar texts.
52
- #
53
- def similar text
54
- code = similarity_strategy.encoded text
55
- code && @similarity[code] || []
56
- end
57
-
58
- # Generation
59
- #
60
-
61
- # This method
62
- # * loads the base index from the db
63
- # * generates derived indexes
64
- # * dumps all the indexes into files
65
- #
66
- def generate_caches_from_source
67
- load_from_index_file
68
- generate_caches_from_memory
69
- end
70
- # Generates derived indexes from the index and dumps.
71
- #
72
- # Note: assumes that there is something in the index
73
- #
74
- def generate_caches_from_memory
75
- cache_from_memory_generation_message
76
- generate_derived
77
- end
78
- def cache_from_memory_generation_message
79
- timed_exclaim "CACHE FROM MEMORY #{identifier}."
80
- end
81
-
82
- # Generates the weights and similarity from the main index.
83
- #
84
- def generate_derived
85
- generate_weights
86
- generate_similarity
87
- end
88
23
 
89
24
  # Load the data from the db.
90
25
  #
@@ -108,59 +43,6 @@ module Index
108
43
  index[token] ||= []
109
44
  end
110
45
 
111
- # Generators.
112
- #
113
- # TODO Move somewhere more fitting.
114
- #
115
-
116
- # Generates a new index (writes its index) using the
117
- # given partial caching strategy.
118
- #
119
- def generate_partial
120
- generator = Cacher::PartialGenerator.new self.index
121
- self.index = generator.generate self.partial_strategy
122
- end
123
- def generate_partial_from exact_index
124
- timed_exclaim "PARTIAL GENERATE #{identifier}."
125
- self.index = exact_index
126
- self.generate_partial
127
- self
128
- end
129
- # Generates a new similarity index (writes its index) using the
130
- # given similarity caching strategy.
131
- #
132
- def generate_similarity
133
- generator = Cacher::SimilarityGenerator.new self.index
134
- self.similarity = generator.generate self.similarity_strategy
135
- end
136
- # Generates a new weights index (writes its index) using the
137
- # given weight caching strategy.
138
- #
139
- def generate_weights
140
- generator = Cacher::WeightsGenerator.new self.index
141
- self.weights = generator.generate self.weights_strategy
142
- end
143
-
144
- # Saves the index in a dump file.
145
- #
146
- def dump
147
- dump_index
148
- dump_similarity
149
- dump_weights
150
- end
151
- def dump_index
152
- timed_exclaim "DUMP INDEX #{identifier}."
153
- files.dump_index index
154
- end
155
- def dump_similarity
156
- timed_exclaim "DUMP SIMILARITY #{identifier}."
157
- files.dump_similarity similarity
158
- end
159
- def dump_weights
160
- timed_exclaim "DUMP WEIGHTS #{identifier}."
161
- files.dump_weights weights
162
- end
163
-
164
46
  # Loads all indexes into this category.
165
47
  #
166
48
  def load
@@ -178,25 +60,5 @@ module Index
178
60
  self.weights = files.load_weights
179
61
  end
180
62
 
181
- # Alerts the user if an index is missing.
182
- #
183
- def raise_unless_cache_exists
184
- warn_cache_small :index if files.index_cache_small?
185
- warn_cache_small :similarity if files.similarity_cache_small?
186
- warn_cache_small :weights if files.weights_cache_small?
187
-
188
- raise_cache_missing :index unless files.index_cache_ok?
189
- raise_cache_missing :similarity unless files.similarity_cache_ok?
190
- raise_cache_missing :weights unless files.weights_cache_ok?
191
- end
192
- def warn_cache_small what
193
- puts "#{what} cache for #{identifier} smaller than 16 bytes."
194
- end
195
- # Raises an appropriate error message.
196
- #
197
- def raise_cache_missing what
198
- raise "#{what} cache for #{identifier} missing."
199
- end
200
-
201
63
  end
202
64
  end
@@ -1,28 +1,26 @@
1
- module Query
1
+ module Index
2
2
 
3
- # Combines tokens and category indexes into combinations.
4
- #
5
- class Combinator
3
+ class Categories
6
4
 
7
- attr_reader :categories, :category_hash
8
- attr_reader :ignore_unassigned_tokens # TODO Should this actually be determined by the query? Probably, yes.
5
+ attr_reader :categories, :category_hash, :ignore_unassigned_tokens
9
6
 
10
- def initialize categories, options = {}
11
- @categories = categories
12
- @category_hash = hashify categories
7
+ each_delegate :load_from_cache,
8
+ :to => :categories
9
+
10
+ def initialize options = {}
11
+ clear
13
12
 
14
13
  @ignore_unassigned_tokens = options[:ignore_unassigned_tokens] || false
15
14
  end
16
15
 
17
- # TODO Move somewhere else.
18
- #
19
- # TODO Or use active_support's?
20
- #
21
- def hashify category_array
22
- category_array.inject({}) do |hash, category|
23
- hash[category.name] = [category]
24
- hash
25
- end
16
+ def clear
17
+ @categories = []
18
+ @category_hash = {}
19
+ end
20
+
21
+ def << category
22
+ categories << category
23
+ category_hash[category.name] = [category] # TODO Why an Array?
26
24
  end
27
25
 
28
26
  #
@@ -15,19 +15,30 @@ module Index
15
15
  @name = name
16
16
  @type = type
17
17
 
18
- partial = options[:partial] || Cacher::Partial::Default
19
- weights = options[:weights] || Cacher::Weights::Default
20
18
  similarity = options[:similarity] || Cacher::Similarity::Default
21
19
 
22
- @exact = options[:exact_bundle] || Bundle.new(:exact, self, type, Cacher::Partial::None.new, weights, similarity)
23
- @partial = options[:partial_bundle] || Bundle.new(:partial, self, type, partial, weights, Cacher::Similarity::None.new)
20
+ @exact = options[:exact_index_bundle] || Bundle.new(:exact, self, type, similarity)
21
+ @partial = options[:partial_index_bundle] || Bundle.new(:partial, self, type, similarity)
24
22
 
25
23
  @exact = exact_lambda.call(@exact, @partial) if exact_lambda = options[:exact_lambda]
26
24
  @partial = partial_lambda.call(@exact, @partial) if partial_lambda = options[:partial_lambda]
25
+
26
+ # Extract?
27
+ #
28
+ qualifiers = generate_qualifiers_from options
29
+ Query::Qualifiers.add(name, qualifiers) if qualifiers
30
+ end
31
+
32
+ # TODO Move to Index.
33
+ #
34
+ def generate_qualifiers_from options
35
+ options[:qualifiers] || options[:qualifier] && [options[:qualifier]] || [name]
27
36
  end
28
37
 
29
38
  # Loads the index from cache.
30
39
  #
40
+ # TODO Metaprogram delegation? each_delegate?
41
+ #
31
42
  def load_from_cache
32
43
  timed_exclaim "Loading index #{identifier}."
33
44
  exact.load
@@ -40,67 +51,30 @@ module Index
40
51
  @identifier ||= "#{type.name} #{name}"
41
52
  end
42
53
 
43
- # Generates all caches for this category.
44
- #
45
- def generate_caches
46
- generate_caches_from_source
47
- generate_partial
48
- generate_caches_from_memory
49
- dump_caches
50
- timed_exclaim "CACHE FINISHED #{identifier}."
51
- end
52
- def generate_caches_from_source
53
- exact.generate_caches_from_source
54
- end
55
- def generate_partial
56
- partial.generate_partial_from exact.index
57
- end
58
- def generate_caches_from_memory
59
- partial.generate_caches_from_memory
60
- end
61
- def dump_caches
62
- exact.dump
63
- partial.dump
64
- end
65
-
66
- # Used for testing.
67
- #
68
- def generate_indexes_from_exact_index
69
- generate_derived_exact
70
- generate_partial
71
- generate_derived_partial
72
- end
73
- def generate_derived_exact
74
- exact.generate_derived
75
- end
76
- def generate_derived_partial
77
- partial.generate_derived
78
- end
79
-
80
54
  # Gets the weight for this token's text.
81
55
  #
82
56
  def weight token
83
57
  bundle_for(token).weight token.text
84
58
  end
85
-
59
+
86
60
  # Gets the ids for this token's text.
87
61
  #
88
62
  def ids token
89
63
  bundle_for(token).ids token.text
90
64
  end
91
-
65
+
92
66
  # Returns the right index bundle for this token.
93
67
  #
94
68
  def bundle_for token
95
69
  token.partial? ? partial : exact
96
70
  end
97
-
71
+
98
72
  #
99
73
  #
100
74
  def combination_for token
101
75
  weight(token) && ::Query::Combination.new(token, self)
102
76
  end
103
-
77
+
104
78
  end
105
-
79
+
106
80
  end
@@ -6,26 +6,30 @@ module Index
6
6
  #
7
7
  class Type
8
8
 
9
- attr_reader :name, :result_type, :categories, :combinator
9
+ attr_reader :name, :result_type, :combinator, :categories
10
10
 
11
- each_delegate :generate_caches, :load_from_cache, :to => :categories
11
+ delegate :load_from_cache,
12
+ :to => :categories
12
13
 
13
- # TODO Use config
14
- #
15
- def initialize name, result_type, ignore_unassigned_tokens, *categories
16
- @name = name
17
- @result_type = result_type # TODO Move.
18
- @categories = categories # for each_delegate
19
- @combinator = combinator_for categories, ignore_unassigned_tokens
14
+ def initialize name, options = {}
15
+ @name = name
16
+
17
+ @result_type = options[:result_type] || name
18
+ ignore_unassigned_tokens = options[:ignore_unassigned_tokens] || false # TODO Move to query, somehow.
19
+
20
+ @categories = Categories.new ignore_unassigned_tokens: ignore_unassigned_tokens
20
21
  end
21
- def combinator_for categories, ignore_unassigned_tokens
22
- Query::Combinator.new @categories, ignore_unassigned_tokens: ignore_unassigned_tokens
22
+
23
+ # TODO Spec. Doc.
24
+ #
25
+ def add_category name, options = {}
26
+ categories << Category.new(name, self, options)
23
27
  end
24
28
 
25
29
  #
26
30
  #
27
31
  def possible_combinations token
28
- @combinator.possible_combinations_for token
32
+ categories.possible_combinations_for token
29
33
  end
30
34
 
31
35
  end
@@ -0,0 +1,41 @@
1
+ module Index
2
+
3
+ class Types
4
+
5
+ attr_reader :types, :type_mapping
6
+
7
+ each_delegate :load_from_cache,
8
+ :to => :types
9
+
10
+ def initialize
11
+ clear
12
+ end
13
+
14
+ # TODO Spec.
15
+ #
16
+ def clear
17
+ @types = []
18
+ @type_mapping = {}
19
+ end
20
+
21
+ # TODO Spec.
22
+ #
23
+ def reload
24
+ load_from_cache
25
+ end
26
+
27
+ # TODO Spec
28
+ #
29
+ def register type
30
+ self.types << type
31
+ self.type_mapping[type.name] = type
32
+ end
33
+ def [] name
34
+ name = name.to_sym
35
+
36
+ type_mapping[name]
37
+ end
38
+
39
+ end
40
+
41
+ end
@@ -2,6 +2,8 @@
2
2
  #
3
3
  module Index
4
4
 
5
+ # FIXME and spec
6
+ #
5
7
  module Wrappers
6
8
 
7
9
  # This index combines an exact and partial index.
@@ -38,8 +40,10 @@ module Index
38
40
  new type_or_category
39
41
  end
40
42
  end
43
+ # TODO Do not extract categories!
44
+ #
41
45
  def self.wrap_each_of categories
42
- categories.collect! { |category| new(category) }
46
+ categories.categories.collect! { |category| new(category) }
43
47
  end
44
48
 
45
49
  def ids text