picky 0.10.5 → 0.11.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (52) hide show
  1. data/lib/picky/alias_instances.rb +1 -0
  2. data/lib/picky/application.rb +6 -7
  3. data/lib/picky/bundle.rb +31 -0
  4. data/lib/picky/configuration/indexes.rb +30 -41
  5. data/lib/picky/configuration/type.rb +6 -40
  6. data/lib/picky/ext/maybe_compile.rb +9 -0
  7. data/lib/picky/index/bundle.rb +1 -139
  8. data/lib/picky/{query/combinator.rb → index/categories.rb} +16 -18
  9. data/lib/picky/index/category.rb +20 -46
  10. data/lib/picky/index/type.rb +16 -12
  11. data/lib/picky/index/types.rb +41 -0
  12. data/lib/picky/index/wrappers/exact_first.rb +5 -1
  13. data/lib/picky/indexers/base.rb +9 -8
  14. data/lib/picky/indexing/bundle.rb +152 -0
  15. data/lib/picky/indexing/categories.rb +36 -0
  16. data/lib/picky/indexing/category.rb +145 -0
  17. data/lib/picky/indexing/type.rb +45 -0
  18. data/lib/picky/indexing/types.rb +74 -0
  19. data/lib/picky/loader.rb +17 -7
  20. data/lib/picky/query/base.rb +5 -4
  21. data/lib/picky/sources/wrappers/base.rb +23 -0
  22. data/lib/picky/sources/wrappers/location.rb +92 -0
  23. data/lib/picky/tokenizers/index.rb +4 -1
  24. data/lib/picky/type.rb +46 -0
  25. data/lib/picky/types.rb +38 -0
  26. data/lib/tasks/index.rake +4 -0
  27. data/project_prototype/Gemfile +1 -1
  28. data/project_prototype/app/application.rb +12 -12
  29. data/spec/lib/application_spec.rb +6 -9
  30. data/spec/lib/configuration/indexes_spec.rb +0 -85
  31. data/spec/lib/index/bundle_spec.rb +2 -94
  32. data/spec/lib/index/category_spec.rb +7 -86
  33. data/spec/lib/index/type_spec.rb +14 -26
  34. data/spec/lib/index/wrappers/exact_first_spec.rb +12 -12
  35. data/spec/lib/{index → indexing}/bundle_partial_generation_speed_spec.rb +2 -2
  36. data/spec/lib/indexing/bundle_spec.rb +174 -0
  37. data/spec/lib/{query/combinator_spec.rb → indexing/categories_spec.rb} +30 -34
  38. data/spec/lib/indexing/category_spec.rb +257 -0
  39. data/spec/lib/indexing/type_spec.rb +32 -0
  40. data/spec/lib/loader_spec.rb +0 -2
  41. data/spec/lib/query/base_spec.rb +8 -17
  42. data/spec/lib/query/full_spec.rb +3 -6
  43. data/spec/lib/query/live_spec.rb +4 -3
  44. data/spec/lib/sources/wrappers/base_spec.rb +35 -0
  45. data/spec/lib/sources/wrappers/location_spec.rb +68 -0
  46. data/spec/lib/tokenizers/index_spec.rb +2 -5
  47. metadata +32 -16
  48. data/lib/picky/configuration/field.rb +0 -73
  49. data/lib/picky/indexes.rb +0 -179
  50. data/lib/picky/initializers/ext.rb +0 -1
  51. data/spec/lib/configuration/field_spec.rb +0 -208
  52. data/spec/lib/configuration/type_spec.rb +0 -49
@@ -0,0 +1 @@
1
+ Indexes = Types.new
@@ -21,13 +21,6 @@ class Application
21
21
  # Routes.
22
22
  #
23
23
  delegate :route, :root, :to => :routing
24
- # Index, Field.
25
- #
26
- # TODO Rename category.
27
- #
28
- delegate :field, :to => :indexing
29
- def category *args; indexing.field *args; end
30
- def index *args; indexing.define_index *args; end
31
24
 
32
25
  # An application simply delegates to the routing to handle a request.
33
26
  #
@@ -44,6 +37,12 @@ class Application
44
37
  @queries ||= Configuration::Queries.new
45
38
  end
46
39
 
40
+ # "API".
41
+ #
42
+ def index *args
43
+ ::Type.new *args
44
+ end
45
+
47
46
  # Finalize the subclass as soon as it
48
47
  # has finished loading.
49
48
  #
@@ -0,0 +1,31 @@
1
+ class Bundle
2
+
3
+ attr_reader :identifier, :files
4
+ attr_accessor :index, :weights, :similarity, :similarity_strategy
5
+
6
+ delegate :[], :[]=, :clear, :to => :index
7
+
8
+ def initialize name, category, type, similarity_strategy
9
+ @identifier = "#{name}: #{type.name} #{category.name}"
10
+
11
+ @index = {}
12
+ @weights = {}
13
+ @similarity = {}
14
+
15
+ @similarity_strategy = similarity_strategy
16
+
17
+ # TODO inject files.
18
+ #
19
+ # TODO Move Files somewhere. Shared?
20
+ #
21
+ @files = Index::Files.new name, category.name, type.name
22
+ end
23
+
24
+ # Get a list of similar texts.
25
+ #
26
+ def similar text
27
+ code = similarity_strategy.encoded text
28
+ code && @similarity[code] || []
29
+ end
30
+
31
+ end
@@ -1,3 +1,5 @@
1
+ # TODO Remove?
2
+ #
1
3
  module Configuration
2
4
 
3
5
  # Describes the container for all index configurations.
@@ -14,48 +16,35 @@ module Configuration
14
16
  Tokenizers::Index.default = Tokenizers::Index.new(options)
15
17
  end
16
18
 
17
- # TODO Rewrite all this configuration handling.
19
+ # TODO Move this to … where?
18
20
  #
19
- def define_index name, source, *fields
20
- new_type = Type.new name, source, *fields
21
- types << new_type
22
- ::Indexes.configuration ||= self
23
-
24
- generated = new_type.generate
25
- ::Indexes.add generated
26
- generated
27
- end
28
- def field name, options = {}
29
- Field.new name, options
30
- end
31
-
32
- #
33
- #
34
- def take_snapshot *type_names
35
- only_if_included_in type_names do |type|
36
- type.take_snapshot
37
- end
38
- end
39
- def index *type_names
40
- only_if_included_in type_names do |type|
41
- type.index
42
- end
43
- end
44
- def index_solr *type_names
45
- only_if_included_in type_names do |type|
46
- type.index_solr
47
- end
48
- end
49
-
50
- #
51
- #
52
- def only_if_included_in type_names = []
53
- type_names = types.map(&:name) if type_names.empty?
54
- types.each do |type|
55
- next unless type_names.include?(type.name)
56
- yield type
57
- end
58
- end
21
+ # #
22
+ # #
23
+ # def take_snapshot *type_names
24
+ # only_if_included_in type_names do |type|
25
+ # type.take_snapshot
26
+ # end
27
+ # end
28
+ # def index *type_names
29
+ # only_if_included_in type_names do |type|
30
+ # type.index
31
+ # end
32
+ # end
33
+ # def index_solr *type_names
34
+ # only_if_included_in type_names do |type|
35
+ # type.index_solr
36
+ # end
37
+ # end
38
+ #
39
+ # #
40
+ # #
41
+ # def only_if_included_in type_names = []
42
+ # type_names = types.map(&:name) if type_names.empty?
43
+ # types.each do |type|
44
+ # next unless type_names.include?(type.name)
45
+ # yield type
46
+ # end
47
+ # end
59
48
 
60
49
  end
61
50
 
@@ -2,53 +2,19 @@ module Configuration
2
2
  class Type
3
3
  attr_reader :name,
4
4
  :source,
5
- :fields,
6
- :after_indexing,
7
5
  :result_type,
8
- :ignore_unassigned_tokens,
9
- :solr
10
- def initialize name, source, *fields, options
11
- if Configuration::Field === options
12
- fields << options
13
- options = {}
14
- end
15
-
6
+ :after_indexing,
7
+ :ignore_unassigned_tokens
8
+ def initialize name, source, options
16
9
  @name = name
17
10
  @source = source
18
- # dup, if field is reused. TODO Rewrite.
19
- @fields = fields.map { |field| field = field.dup; field.type = self; field }
20
11
 
21
- @after_indexing = options[:after_indexing]
22
12
  @result_type = options[:result_type] || name
23
- @ignore_unassigned_tokens = options[:ignore_unassigned_tokens] || false # TODO Move to query?
24
- # @solr = options[:solr] || nil
13
+ @after_indexing = options[:after_indexing] # Where do I use this?
14
+ @ignore_unassigned_tokens = options[:ignore_unassigned_tokens] || false # TODO Move to query?
25
15
  end
26
16
  def generate
27
- categories = fields.map { |field| field.generate }
28
- Index::Type.new name, result_type, ignore_unassigned_tokens, *categories
29
- end
30
- def take_snapshot
31
- source.take_snapshot self
32
- end
33
- def index
34
- fields.each do |field|
35
- field.index
36
- end
37
- end
38
- # def solr_fields
39
- # solr ? fields.select { |field| !field.virtual? } : []
40
- # end
41
- # # TODO Delegate to Solr handler.
42
- # #
43
- # def index_solr
44
- # return unless solr
45
- # @indexer = Indexers::Solr.new self
46
- # @indexer.index
47
- # end
48
- # TODO Spec!
49
- #
50
- def connect_backend
51
- @source.connect_backend
17
+ Index::Type.new name, source, result_type, ignore_unassigned_tokens
52
18
  end
53
19
  end
54
20
  end
@@ -0,0 +1,9 @@
1
+ begin
2
+ require File.expand_path '../ruby19/performant', __FILE__
3
+ rescue LoadError
4
+ require File.expand_path '../ruby19/extconf.rb', __FILE__
5
+ Dir.chdir File.expand_path('../ruby19', __FILE__) do
6
+ %x{ ruby extconf.rb && make }
7
+ end
8
+ retry
9
+ end
@@ -8,35 +8,7 @@ module Index
8
8
  #
9
9
  # Delegates file handling and checking to a Index::Files object.
10
10
  #
11
- class Bundle
12
-
13
- attr_reader :identifier, :category
14
- attr_accessor :index, :weights, :similarity
15
- attr_accessor :partial_strategy, :weights_strategy, :similarity_strategy
16
- attr_reader :files
17
-
18
- delegate :[], :[]=, :clear, :to => :index
19
- delegate :raise_unless_cache_exists, :to => :checker
20
-
21
- # Path is in which directory the cache is located.
22
- #
23
- def initialize name, category, type, partial_strategy, weights_strategy, similarity_strategy
24
- @identifier = "#{name}: #{type.name} #{category.name}"
25
-
26
- @index = {}
27
- @weights = {}
28
- @similarity = {}
29
-
30
- # TODO Used in weights, try to remove!
31
- #
32
- @category = category
33
-
34
- @partial_strategy = partial_strategy
35
- @weights_strategy = weights_strategy
36
- @similarity_strategy = similarity_strategy
37
-
38
- @files = Files.new name, category.name, type.name
39
- end
11
+ class Bundle < ::Bundle
40
12
 
41
13
  # Get the ids for the text.
42
14
  #
@@ -48,43 +20,6 @@ module Index
48
20
  def weight text
49
21
  @weights[text]
50
22
  end
51
- # Get a list of similar texts.
52
- #
53
- def similar text
54
- code = similarity_strategy.encoded text
55
- code && @similarity[code] || []
56
- end
57
-
58
- # Generation
59
- #
60
-
61
- # This method
62
- # * loads the base index from the db
63
- # * generates derived indexes
64
- # * dumps all the indexes into files
65
- #
66
- def generate_caches_from_source
67
- load_from_index_file
68
- generate_caches_from_memory
69
- end
70
- # Generates derived indexes from the index and dumps.
71
- #
72
- # Note: assumes that there is something in the index
73
- #
74
- def generate_caches_from_memory
75
- cache_from_memory_generation_message
76
- generate_derived
77
- end
78
- def cache_from_memory_generation_message
79
- timed_exclaim "CACHE FROM MEMORY #{identifier}."
80
- end
81
-
82
- # Generates the weights and similarity from the main index.
83
- #
84
- def generate_derived
85
- generate_weights
86
- generate_similarity
87
- end
88
23
 
89
24
  # Load the data from the db.
90
25
  #
@@ -108,59 +43,6 @@ module Index
108
43
  index[token] ||= []
109
44
  end
110
45
 
111
- # Generators.
112
- #
113
- # TODO Move somewhere more fitting.
114
- #
115
-
116
- # Generates a new index (writes its index) using the
117
- # given partial caching strategy.
118
- #
119
- def generate_partial
120
- generator = Cacher::PartialGenerator.new self.index
121
- self.index = generator.generate self.partial_strategy
122
- end
123
- def generate_partial_from exact_index
124
- timed_exclaim "PARTIAL GENERATE #{identifier}."
125
- self.index = exact_index
126
- self.generate_partial
127
- self
128
- end
129
- # Generates a new similarity index (writes its index) using the
130
- # given similarity caching strategy.
131
- #
132
- def generate_similarity
133
- generator = Cacher::SimilarityGenerator.new self.index
134
- self.similarity = generator.generate self.similarity_strategy
135
- end
136
- # Generates a new weights index (writes its index) using the
137
- # given weight caching strategy.
138
- #
139
- def generate_weights
140
- generator = Cacher::WeightsGenerator.new self.index
141
- self.weights = generator.generate self.weights_strategy
142
- end
143
-
144
- # Saves the index in a dump file.
145
- #
146
- def dump
147
- dump_index
148
- dump_similarity
149
- dump_weights
150
- end
151
- def dump_index
152
- timed_exclaim "DUMP INDEX #{identifier}."
153
- files.dump_index index
154
- end
155
- def dump_similarity
156
- timed_exclaim "DUMP SIMILARITY #{identifier}."
157
- files.dump_similarity similarity
158
- end
159
- def dump_weights
160
- timed_exclaim "DUMP WEIGHTS #{identifier}."
161
- files.dump_weights weights
162
- end
163
-
164
46
  # Loads all indexes into this category.
165
47
  #
166
48
  def load
@@ -178,25 +60,5 @@ module Index
178
60
  self.weights = files.load_weights
179
61
  end
180
62
 
181
- # Alerts the user if an index is missing.
182
- #
183
- def raise_unless_cache_exists
184
- warn_cache_small :index if files.index_cache_small?
185
- warn_cache_small :similarity if files.similarity_cache_small?
186
- warn_cache_small :weights if files.weights_cache_small?
187
-
188
- raise_cache_missing :index unless files.index_cache_ok?
189
- raise_cache_missing :similarity unless files.similarity_cache_ok?
190
- raise_cache_missing :weights unless files.weights_cache_ok?
191
- end
192
- def warn_cache_small what
193
- puts "#{what} cache for #{identifier} smaller than 16 bytes."
194
- end
195
- # Raises an appropriate error message.
196
- #
197
- def raise_cache_missing what
198
- raise "#{what} cache for #{identifier} missing."
199
- end
200
-
201
63
  end
202
64
  end
@@ -1,28 +1,26 @@
1
- module Query
1
+ module Index
2
2
 
3
- # Combines tokens and category indexes into combinations.
4
- #
5
- class Combinator
3
+ class Categories
6
4
 
7
- attr_reader :categories, :category_hash
8
- attr_reader :ignore_unassigned_tokens # TODO Should this actually be determined by the query? Probably, yes.
5
+ attr_reader :categories, :category_hash, :ignore_unassigned_tokens
9
6
 
10
- def initialize categories, options = {}
11
- @categories = categories
12
- @category_hash = hashify categories
7
+ each_delegate :load_from_cache,
8
+ :to => :categories
9
+
10
+ def initialize options = {}
11
+ clear
13
12
 
14
13
  @ignore_unassigned_tokens = options[:ignore_unassigned_tokens] || false
15
14
  end
16
15
 
17
- # TODO Move somewhere else.
18
- #
19
- # TODO Or use active_support's?
20
- #
21
- def hashify category_array
22
- category_array.inject({}) do |hash, category|
23
- hash[category.name] = [category]
24
- hash
25
- end
16
+ def clear
17
+ @categories = []
18
+ @category_hash = {}
19
+ end
20
+
21
+ def << category
22
+ categories << category
23
+ category_hash[category.name] = [category] # TODO Why an Array?
26
24
  end
27
25
 
28
26
  #
@@ -15,19 +15,30 @@ module Index
15
15
  @name = name
16
16
  @type = type
17
17
 
18
- partial = options[:partial] || Cacher::Partial::Default
19
- weights = options[:weights] || Cacher::Weights::Default
20
18
  similarity = options[:similarity] || Cacher::Similarity::Default
21
19
 
22
- @exact = options[:exact_bundle] || Bundle.new(:exact, self, type, Cacher::Partial::None.new, weights, similarity)
23
- @partial = options[:partial_bundle] || Bundle.new(:partial, self, type, partial, weights, Cacher::Similarity::None.new)
20
+ @exact = options[:exact_index_bundle] || Bundle.new(:exact, self, type, similarity)
21
+ @partial = options[:partial_index_bundle] || Bundle.new(:partial, self, type, similarity)
24
22
 
25
23
  @exact = exact_lambda.call(@exact, @partial) if exact_lambda = options[:exact_lambda]
26
24
  @partial = partial_lambda.call(@exact, @partial) if partial_lambda = options[:partial_lambda]
25
+
26
+ # Extract?
27
+ #
28
+ qualifiers = generate_qualifiers_from options
29
+ Query::Qualifiers.add(name, qualifiers) if qualifiers
30
+ end
31
+
32
+ # TODO Move to Index.
33
+ #
34
+ def generate_qualifiers_from options
35
+ options[:qualifiers] || options[:qualifier] && [options[:qualifier]] || [name]
27
36
  end
28
37
 
29
38
  # Loads the index from cache.
30
39
  #
40
+ # TODO Metaprogram delegation? each_delegate?
41
+ #
31
42
  def load_from_cache
32
43
  timed_exclaim "Loading index #{identifier}."
33
44
  exact.load
@@ -40,67 +51,30 @@ module Index
40
51
  @identifier ||= "#{type.name} #{name}"
41
52
  end
42
53
 
43
- # Generates all caches for this category.
44
- #
45
- def generate_caches
46
- generate_caches_from_source
47
- generate_partial
48
- generate_caches_from_memory
49
- dump_caches
50
- timed_exclaim "CACHE FINISHED #{identifier}."
51
- end
52
- def generate_caches_from_source
53
- exact.generate_caches_from_source
54
- end
55
- def generate_partial
56
- partial.generate_partial_from exact.index
57
- end
58
- def generate_caches_from_memory
59
- partial.generate_caches_from_memory
60
- end
61
- def dump_caches
62
- exact.dump
63
- partial.dump
64
- end
65
-
66
- # Used for testing.
67
- #
68
- def generate_indexes_from_exact_index
69
- generate_derived_exact
70
- generate_partial
71
- generate_derived_partial
72
- end
73
- def generate_derived_exact
74
- exact.generate_derived
75
- end
76
- def generate_derived_partial
77
- partial.generate_derived
78
- end
79
-
80
54
  # Gets the weight for this token's text.
81
55
  #
82
56
  def weight token
83
57
  bundle_for(token).weight token.text
84
58
  end
85
-
59
+
86
60
  # Gets the ids for this token's text.
87
61
  #
88
62
  def ids token
89
63
  bundle_for(token).ids token.text
90
64
  end
91
-
65
+
92
66
  # Returns the right index bundle for this token.
93
67
  #
94
68
  def bundle_for token
95
69
  token.partial? ? partial : exact
96
70
  end
97
-
71
+
98
72
  #
99
73
  #
100
74
  def combination_for token
101
75
  weight(token) && ::Query::Combination.new(token, self)
102
76
  end
103
-
77
+
104
78
  end
105
-
79
+
106
80
  end
@@ -6,26 +6,30 @@ module Index
6
6
  #
7
7
  class Type
8
8
 
9
- attr_reader :name, :result_type, :categories, :combinator
9
+ attr_reader :name, :result_type, :combinator, :categories
10
10
 
11
- each_delegate :generate_caches, :load_from_cache, :to => :categories
11
+ delegate :load_from_cache,
12
+ :to => :categories
12
13
 
13
- # TODO Use config
14
- #
15
- def initialize name, result_type, ignore_unassigned_tokens, *categories
16
- @name = name
17
- @result_type = result_type # TODO Move.
18
- @categories = categories # for each_delegate
19
- @combinator = combinator_for categories, ignore_unassigned_tokens
14
+ def initialize name, options = {}
15
+ @name = name
16
+
17
+ @result_type = options[:result_type] || name
18
+ ignore_unassigned_tokens = options[:ignore_unassigned_tokens] || false # TODO Move to query, somehow.
19
+
20
+ @categories = Categories.new ignore_unassigned_tokens: ignore_unassigned_tokens
20
21
  end
21
- def combinator_for categories, ignore_unassigned_tokens
22
- Query::Combinator.new @categories, ignore_unassigned_tokens: ignore_unassigned_tokens
22
+
23
+ # TODO Spec. Doc.
24
+ #
25
+ def add_category name, options = {}
26
+ categories << Category.new(name, self, options)
23
27
  end
24
28
 
25
29
  #
26
30
  #
27
31
  def possible_combinations token
28
- @combinator.possible_combinations_for token
32
+ categories.possible_combinations_for token
29
33
  end
30
34
 
31
35
  end
@@ -0,0 +1,41 @@
1
+ module Index
2
+
3
+ class Types
4
+
5
+ attr_reader :types, :type_mapping
6
+
7
+ each_delegate :load_from_cache,
8
+ :to => :types
9
+
10
+ def initialize
11
+ clear
12
+ end
13
+
14
+ # TODO Spec.
15
+ #
16
+ def clear
17
+ @types = []
18
+ @type_mapping = {}
19
+ end
20
+
21
+ # TODO Spec.
22
+ #
23
+ def reload
24
+ load_from_cache
25
+ end
26
+
27
+ # TODO Spec
28
+ #
29
+ def register type
30
+ self.types << type
31
+ self.type_mapping[type.name] = type
32
+ end
33
+ def [] name
34
+ name = name.to_sym
35
+
36
+ type_mapping[name]
37
+ end
38
+
39
+ end
40
+
41
+ end
@@ -2,6 +2,8 @@
2
2
  #
3
3
  module Index
4
4
 
5
+ # FIXME and spec
6
+ #
5
7
  module Wrappers
6
8
 
7
9
  # This index combines an exact and partial index.
@@ -38,8 +40,10 @@ module Index
38
40
  new type_or_category
39
41
  end
40
42
  end
43
+ # TODO Do not extract categories!
44
+ #
41
45
  def self.wrap_each_of categories
42
- categories.collect! { |category| new(category) }
46
+ categories.categories.collect! { |category| new(category) }
43
47
  end
44
48
 
45
49
  def ids text