picky 1.5.2 → 1.5.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (69) hide show
  1. data/lib/picky/analyzer.rb +154 -0
  2. data/lib/picky/application.rb +53 -33
  3. data/lib/picky/character_substituters/west_european.rb +10 -6
  4. data/lib/picky/cli.rb +18 -18
  5. data/lib/picky/index/base.rb +44 -13
  6. data/lib/picky/index_bundle.rb +13 -4
  7. data/lib/picky/indexed/indexes.rb +26 -10
  8. data/lib/picky/indexing/indexes.rb +26 -24
  9. data/lib/picky/interfaces/live_parameters.rb +23 -16
  10. data/lib/picky/internals/extensions/object.rb +13 -6
  11. data/lib/picky/internals/frontend_adapters/rack.rb +30 -34
  12. data/lib/picky/internals/index/backend.rb +1 -2
  13. data/lib/picky/internals/index/file/basic.rb +18 -14
  14. data/lib/picky/internals/index/files.rb +16 -6
  15. data/lib/picky/internals/index/redis/basic.rb +12 -5
  16. data/lib/picky/internals/index/redis.rb +2 -2
  17. data/lib/picky/internals/indexed/bundle/base.rb +58 -14
  18. data/lib/picky/internals/indexed/bundle/memory.rb +40 -14
  19. data/lib/picky/internals/indexed/bundle/redis.rb +9 -30
  20. data/lib/picky/internals/indexed/categories.rb +19 -14
  21. data/lib/picky/internals/indexed/category.rb +44 -20
  22. data/lib/picky/internals/indexed/index.rb +23 -13
  23. data/lib/picky/internals/indexed/wrappers/bundle/wrapper.rb +27 -9
  24. data/lib/picky/internals/indexers/serial.rb +1 -1
  25. data/lib/picky/internals/indexing/bundle/base.rb +28 -28
  26. data/lib/picky/internals/indexing/bundle/memory.rb +14 -7
  27. data/lib/picky/internals/indexing/categories.rb +15 -11
  28. data/lib/picky/internals/indexing/category.rb +30 -20
  29. data/lib/picky/internals/indexing/index.rb +22 -14
  30. data/lib/picky/internals/query/allocations.rb +0 -15
  31. data/lib/picky/internals/query/combinations/base.rb +0 -4
  32. data/lib/picky/internals/query/combinations/redis.rb +19 -8
  33. data/lib/picky/internals/query/indexes.rb +3 -6
  34. data/lib/picky/internals/query/token.rb +0 -4
  35. data/lib/picky/internals/query/weights.rb +2 -11
  36. data/lib/picky/internals/results/base.rb +3 -10
  37. data/lib/picky/internals/tokenizers/base.rb +64 -28
  38. data/lib/picky/internals/tokenizers/index.rb +8 -8
  39. data/lib/picky/loader.rb +59 -53
  40. data/lib/picky/query/base.rb +23 -29
  41. data/lib/picky/sources/base.rb +10 -10
  42. data/lib/picky/sources/couch.rb +14 -10
  43. data/lib/picky/sources/csv.rb +21 -14
  44. data/lib/picky/sources/db.rb +37 -31
  45. data/lib/picky/sources/delicious.rb +11 -8
  46. data/lib/picky/sources/wrappers/base.rb +3 -1
  47. data/lib/picky/statistics.rb +66 -0
  48. data/lib/tasks/application.rake +3 -0
  49. data/lib/tasks/checks.rake +11 -0
  50. data/lib/tasks/framework.rake +3 -0
  51. data/lib/tasks/index.rake +9 -11
  52. data/lib/tasks/routes.rake +3 -2
  53. data/lib/tasks/shortcuts.rake +17 -5
  54. data/lib/tasks/statistics.rake +20 -12
  55. data/lib/tasks/try.rake +14 -14
  56. data/spec/lib/application_spec.rb +3 -3
  57. data/spec/lib/index/base_spec.rb +25 -3
  58. data/spec/lib/internals/extensions/object_spec.rb +46 -20
  59. data/spec/lib/internals/frontend_adapters/rack_spec.rb +3 -3
  60. data/spec/lib/internals/index/redis/basic_spec.rb +67 -0
  61. data/spec/lib/internals/indexers/serial_spec.rb +1 -1
  62. data/spec/lib/internals/results/base_spec.rb +0 -12
  63. data/spec/lib/internals/tokenizers/base_spec.rb +49 -1
  64. data/spec/lib/query/allocations_spec.rb +0 -56
  65. data/spec/lib/query/base_spec.rb +25 -21
  66. data/spec/lib/query/combinations/redis_spec.rb +6 -1
  67. data/spec/lib/sources/delicious_spec.rb +2 -2
  68. data/spec/lib/statistics_spec.rb +31 -0
  69. metadata +9 -2
@@ -1,7 +1,7 @@
1
1
  module Internals
2
2
 
3
3
  module Indexed # :nodoc:all
4
-
4
+
5
5
  # TODO Rewrite.
6
6
  #
7
7
  # A Bundle is a number of indexes
@@ -11,7 +11,7 @@ module Internals
11
11
  # * *core* index (always used)
12
12
  # * *weights* index (always used)
13
13
  # * *similarity* index (used with similarity)
14
- #
14
+ #
15
15
  # In Picky, indexing is separated from the index
16
16
  # handling itself through a parallel structure.
17
17
  #
@@ -25,24 +25,26 @@ module Internals
25
25
  # memory and looking up search data as fast as possible.
26
26
  #
27
27
  module Bundle
28
-
28
+
29
29
  class Base
30
-
30
+
31
31
  attr_reader :identifier, :configuration
32
32
  attr_accessor :similarity_strategy
33
-
33
+ attr_accessor :index, :weights, :similarity, :configuration
34
+
34
35
  delegate :[], :to => :configuration
35
-
36
+ delegate :size, :to => :index
37
+
36
38
  def initialize name, configuration, similarity_strategy
37
39
  @identifier = "#{configuration.identifier}:#{name}"
38
-
40
+
39
41
  @index = {}
40
42
  @weights = {}
41
43
  @similarity = {}
42
-
44
+
43
45
  @similarity_strategy = similarity_strategy
44
46
  end
45
-
47
+
46
48
  # Get a list of similar texts.
47
49
  #
48
50
  # Note: Does not return itself.
@@ -53,7 +55,7 @@ module Internals
53
55
  similar_codes.delete text if similar_codes
54
56
  similar_codes || []
55
57
  end
56
-
58
+
57
59
  # Loads all indexes.
58
60
  #
59
61
  def load
@@ -62,11 +64,53 @@ module Internals
62
64
  load_similarity
63
65
  load_configuration
64
66
  end
65
-
67
+
68
+ # Loads the core index.
69
+ #
70
+ def load_index
71
+ # No loading needed.
72
+ end
73
+ # Loads the weights index.
74
+ #
75
+ def load_weights
76
+ # No loading needed.
77
+ end
78
+ # Loads the similarity index.
79
+ #
80
+ def load_similarity
81
+ # No loading needed.
82
+ end
83
+ # Loads the configuration.
84
+ #
85
+ def load_configuration
86
+ # No loading needed.
87
+ end
88
+
89
+ # Loads the core index.
90
+ #
91
+ def clear_index
92
+ # No loading needed.
93
+ end
94
+ # Loads the weights index.
95
+ #
96
+ def clear_weights
97
+ # No loading needed.
98
+ end
99
+ # Loads the similarity index.
100
+ #
101
+ def clear_similarity
102
+ # No loading needed.
103
+ end
104
+ # Loads the configuration.
105
+ #
106
+ def clear_configuration
107
+ # No loading needed.
108
+ end
109
+
66
110
  end
67
-
111
+
68
112
  end
69
-
113
+
70
114
  end
71
-
115
+
72
116
  end
@@ -3,11 +3,11 @@ module Internals
3
3
  # encoding: utf-8
4
4
  #
5
5
  module Indexed # :nodoc:all
6
-
6
+
7
7
  #
8
8
  #
9
9
  module Bundle
10
-
10
+
11
11
  # This is the _actual_ index (based on memory).
12
12
  #
13
13
  # Handles exact/partial index, weights index, and similarity index.
@@ -15,19 +15,24 @@ module Internals
15
15
  # Delegates file handling and checking to an *Indexed*::*Files* object.
16
16
  #
17
17
  class Memory < Base
18
-
19
- attr_accessor :index, :weights, :similarity, :configuration
20
-
18
+
21
19
  delegate :[], :to => :configuration
22
-
20
+
23
21
  def initialize name, configuration, *args
24
22
  super name, configuration, *args
25
-
23
+
26
24
  @configuration = {} # A hash with config options.
27
-
25
+
28
26
  @backend = Internals::Index::Files.new name, configuration
29
27
  end
30
-
28
+
29
+ def to_s
30
+ <<-MEMORY
31
+ Memory
32
+ #{@backend.indented_to_s}
33
+ MEMORY
34
+ end
35
+
31
36
  # Get the ids for the given symbol.
32
37
  #
33
38
  def ids sym
@@ -38,7 +43,7 @@ module Internals
38
43
  def weight sym
39
44
  @weights[sym]
40
45
  end
41
-
46
+
42
47
  # Loads the core index.
43
48
  #
44
49
  def load_index
@@ -59,11 +64,32 @@ module Internals
59
64
  def load_configuration
60
65
  self.configuration = @backend.load_configuration
61
66
  end
62
-
67
+
68
+ # Loads the core index.
69
+ #
70
+ def clear_index
71
+ self.index = {}
72
+ end
73
+ # Loads the weights index.
74
+ #
75
+ def clear_weights
76
+ self.weights = {}
77
+ end
78
+ # Loads the similarity index.
79
+ #
80
+ def clear_similarity
81
+ self.similarity = {}
82
+ end
83
+ # Loads the configuration.
84
+ #
85
+ def clear_configuration
86
+ self.configuration = {}
87
+ end
88
+
63
89
  end
64
-
90
+
65
91
  end
66
-
92
+
67
93
  end
68
-
94
+
69
95
  end
@@ -3,23 +3,23 @@ module Internals
3
3
  # encoding: utf-8
4
4
  #
5
5
  module Indexed # :nodoc:all
6
-
6
+
7
7
  #
8
8
  #
9
9
  module Bundle
10
-
10
+
11
11
  # This is the _actual_ index (based on Redis).
12
12
  #
13
13
  # Handles exact/partial index, weights index, and similarity index.
14
14
  #
15
15
  class Redis < Base
16
-
16
+
17
17
  def initialize name, configuration, *args
18
18
  super name, configuration, *args
19
-
19
+
20
20
  @backend = Internals::Index::Redis.new name, configuration
21
21
  end
22
-
22
+
23
23
  # Get the ids for the given symbol.
24
24
  #
25
25
  # Ids are an array of string values in Redis.
@@ -39,32 +39,11 @@ module Internals
39
39
  def [] sym
40
40
  @backend.setting sym
41
41
  end
42
-
43
- # Loads the core index.
44
- #
45
- def load_index
46
- # TODO check if it is there.
47
- end
48
- # Loads the weights index.
49
- #
50
- def load_weights
51
- # TODO check if it is there.
52
- end
53
- # Loads the similarity index.
54
- #
55
- def load_similarity
56
- # TODO check if it is there.
57
- end
58
- # Loads the configuration.
59
- #
60
- def load_configuration
61
- # TODO check if it is there.
62
- end
63
-
42
+
64
43
  end
65
-
44
+
66
45
  end
67
-
46
+
68
47
  end
69
-
48
+
70
49
  end
@@ -1,14 +1,15 @@
1
1
  module Internals
2
2
 
3
3
  module Indexed
4
-
4
+
5
5
  class Categories
6
-
6
+
7
7
  attr_reader :categories, :category_hash, :ignore_unassigned_tokens
8
-
8
+
9
9
  each_delegate :load_from_cache,
10
+ :analyze,
10
11
  :to => :categories
11
-
12
+
12
13
  # A list of indexed categories.
13
14
  #
14
15
  # Options:
@@ -26,17 +27,21 @@ module Internals
26
27
  #
27
28
  def initialize options = {}
28
29
  clear
29
-
30
+
30
31
  @ignore_unassigned_tokens = options[:ignore_unassigned_tokens] || false
31
32
  end
32
-
33
+
34
+ def to_s
35
+ categories.indented_to_s
36
+ end
37
+
33
38
  # Clears both the array of categories and the hash of categories.
34
39
  #
35
40
  def clear
36
41
  @categories = []
37
42
  @category_hash = {}
38
43
  end
39
-
44
+
40
45
  # Add the given category to the list of categories.
41
46
  #
42
47
  def << category
@@ -48,7 +53,7 @@ module Internals
48
53
  #
49
54
  category_hash[category.name] = [category]
50
55
  end
51
-
56
+
52
57
  # Return all possible combinations for the given token.
53
58
  #
54
59
  # This checks if it needs to also search through similar
@@ -60,7 +65,7 @@ module Internals
60
65
  end
61
66
  # Gets all similar tokens and puts together the possible combinations
62
67
  # for each found similar token.
63
- #
68
+ #
64
69
  def similar_possible_for token
65
70
  # Get as many similar tokens as necessary
66
71
  #
@@ -90,7 +95,7 @@ module Internals
90
95
  result + possible_for(token, possible)
91
96
  end
92
97
  end
93
-
98
+
94
99
  # Returns possible Combinations for the token.
95
100
  #
96
101
  # Note: The preselected_categories param is an optimization.
@@ -122,14 +127,14 @@ module Internals
122
127
  # an existing category.
123
128
  #
124
129
  # Note: Returns nil if the user did not define one
125
- # or if he/she has defined a non-existing one.
130
+ # or if he/she has defined a non-existing one.
126
131
  #
127
132
  def user_defined_categories token
128
133
  category_hash[token.user_defined_category_name]
129
134
  end
130
-
135
+
131
136
  end
132
-
137
+
133
138
  end
134
-
139
+
135
140
  end
@@ -1,90 +1,114 @@
1
1
  module Internals
2
2
 
3
3
  module Indexed
4
-
4
+
5
5
  # An index category holds a exact and a partial index for a given category.
6
6
  #
7
7
  # For example an index category for names holds a exact and
8
8
  # a partial index bundle for names.
9
9
  #
10
10
  class Category
11
-
11
+
12
12
  attr_accessor :exact
13
13
  attr_reader :identifier, :name
14
14
  attr_writer :partial
15
-
15
+
16
16
  #
17
17
  #
18
18
  def initialize name, index, options = {}
19
19
  @name = name
20
-
20
+
21
21
  configuration = Configuration::Index.new index, self
22
-
22
+
23
23
  @identifier = configuration.identifier
24
-
24
+
25
25
  # TODO Push the defaults out into the index.
26
26
  #
27
27
  @partial_strategy = options[:partial] || Internals::Generators::Partial::Default
28
28
  similarity = options[:similarity] || Internals::Generators::Similarity::Default
29
-
29
+
30
30
  bundle_class = options[:indexed_bundle_class] || Bundle::Memory
31
31
  @exact = bundle_class.new :exact, configuration, similarity
32
32
  @partial = bundle_class.new :partial, configuration, similarity
33
-
33
+
34
34
  # @exact = exact_lambda.call(@exact, @partial) if exact_lambda = options[:exact_lambda]
35
35
  # @partial = partial_lambda.call(@exact, @partial) if partial_lambda = options[:partial_lambda]
36
-
36
+
37
37
  # TODO Extract?
38
38
  #
39
39
  Query::Qualifiers.add(configuration.category_name, generate_qualifiers_from(options) || [name])
40
40
  end
41
-
41
+
42
+ def to_s
43
+ <<-CATEGORY
44
+ Category(#{name}):
45
+ Exact:
46
+ #{exact.indented_to_s(4)}
47
+ Partial:
48
+ #{partial.indented_to_s(4)}
49
+ CATEGORY
50
+ end
51
+
42
52
  # TODO Move to Index.
43
53
  #
44
54
  def generate_qualifiers_from options
45
55
  options[:qualifiers] || options[:qualifier] && [options[:qualifier]]
46
56
  end
47
-
57
+
48
58
  # Loads the index from cache.
49
59
  #
50
60
  def load_from_cache
51
- timed_exclaim "Loading index #{identifier}."
61
+ timed_exclaim %Q{"#{identifier}": Loading index.}
52
62
  exact.load
53
63
  partial.load
54
64
  end
55
-
65
+
66
+ # Loads, analyzes, and clears the index.
67
+ #
68
+ # Note: The idea is not to run this while the search engine is running.
69
+ #
70
+ # TODO Spec. Identifier is ok?
71
+ #
72
+ def analyze collector
73
+ collector[identifier] = {
74
+ :exact => Analyzer.new.analyze(exact),
75
+ :partial => Analyzer.new.analyze(partial)
76
+ }
77
+ collector
78
+ end
79
+
56
80
  # Gets the weight for this token's text.
57
81
  #
58
82
  def weight token
59
83
  bundle_for(token).weight token.text
60
84
  end
61
-
85
+
62
86
  # Gets the ids for this token's text.
63
87
  #
64
88
  def ids token
65
89
  bundle_for(token).ids token.text
66
90
  end
67
-
91
+
68
92
  # Returns the right index bundle for this token.
69
93
  #
70
94
  def bundle_for token
71
95
  token.partial?? partial : exact
72
96
  end
73
-
97
+
74
98
  # The partial strategy defines whether to really use the partial index.
75
99
  #
76
100
  def partial
77
101
  @partial_strategy.use_exact_for_partial?? @exact : @partial
78
102
  end
79
-
103
+
80
104
  #
81
105
  #
82
106
  def combination_for token
83
107
  weight(token) && Internals::Query::Combination.new(token, self)
84
108
  end
85
-
109
+
86
110
  end
87
-
111
+
88
112
  end
89
-
113
+
90
114
  end
@@ -1,38 +1,39 @@
1
1
  module Internals
2
2
 
3
3
  module Indexed
4
-
4
+
5
5
  #
6
6
  #
7
7
  class Index
8
-
8
+
9
9
  attr_reader :name, :result_identifier, :combinator, :categories
10
-
10
+
11
11
  delegate :load_from_cache,
12
+ :analyze,
12
13
  :to => :categories
13
-
14
+
14
15
  # TODO Externalize?
15
16
  #
16
17
  def initialize name, options = {}
17
18
  @name = name
18
-
19
+
19
20
  @result_identifier = options[:result_identifier] || name
20
21
  @bundle_class = options[:indexed_bundle_class] # TODO This should actually be a fixed parameter.
21
22
  ignore_unassigned_tokens = options[:ignore_unassigned_tokens] || false # TODO Move to query, somehow.
22
-
23
+
23
24
  @categories = Categories.new ignore_unassigned_tokens: ignore_unassigned_tokens
24
25
  end
25
-
26
+
26
27
  # TODO Doc. Externalize?
27
28
  #
28
29
  def define_category category_name, options = {}
29
30
  options = default_category_options.merge options
30
-
31
+
31
32
  new_category = Category.new category_name, self, options
32
33
  categories << new_category
33
34
  new_category
34
35
  end
35
-
36
+
36
37
  # By default, the category uses
37
38
  # * the index's bundle type.
38
39
  #
@@ -41,7 +42,7 @@ module Internals
41
42
  :indexed_bundle_class => @bundle_class
42
43
  }
43
44
  end
44
-
45
+
45
46
  # Return the possible combinations for this token.
46
47
  #
47
48
  # A combination is a tuple <token, index_bundle>.
@@ -49,9 +50,18 @@ module Internals
49
50
  def possible_combinations token
50
51
  categories.possible_combinations_for token
51
52
  end
52
-
53
+
54
+ def to_s
55
+ <<-INDEX
56
+ Indexed(#{name}):
57
+ Result identifier: "#{result_identifier}"
58
+ Categories:
59
+ #{categories.indented_to_s}
60
+ INDEX
61
+ end
62
+
53
63
  end
54
-
64
+
55
65
  end
56
-
66
+
57
67
  end
@@ -1,25 +1,43 @@
1
1
  module Indexed
2
2
  module Wrappers
3
-
3
+
4
4
  # Per Bundle wrappers.
5
5
  #
6
6
  module Bundle
7
-
7
+
8
8
  # Base wrapper. Just delegates all methods to the bundle.
9
9
  #
10
10
  class Wrapper
11
-
11
+
12
12
  attr_reader :bundle
13
-
13
+
14
14
  def initialize bundle
15
15
  @bundle = bundle
16
16
  end
17
-
18
- delegate :load, :ids, :weight, :identifier, :to => :@bundle
19
-
17
+
18
+ delegate :load,
19
+ :load_index,
20
+ :load_weights,
21
+ :load_similarity,
22
+ :load_configuration,
23
+ :clear_index,
24
+ :clear_weights,
25
+ :clear_similarity,
26
+ :clear_configuration,
27
+ :ids,
28
+ :weight,
29
+ :identifier,
30
+ :analyze,
31
+ :size,
32
+ :index,
33
+ :weights,
34
+ :similarity,
35
+ :configuration,
36
+ :to => :@bundle
37
+
20
38
  end
21
-
39
+
22
40
  end
23
-
41
+
24
42
  end
25
43
  end
@@ -65,7 +65,7 @@ module Indexers
65
65
  end
66
66
  end
67
67
  def indexing_message
68
- timed_exclaim "INDEX #{@configuration}" # TODO from ...
68
+ timed_exclaim %Q{"#{@configuration.identifier}": Starting indexing.}
69
69
  end
70
70
 
71
71
  end