picky 1.5.2 → 1.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. data/lib/picky/analyzer.rb +154 -0
  2. data/lib/picky/application.rb +53 -33
  3. data/lib/picky/character_substituters/west_european.rb +10 -6
  4. data/lib/picky/cli.rb +18 -18
  5. data/lib/picky/index/base.rb +44 -13
  6. data/lib/picky/index_bundle.rb +13 -4
  7. data/lib/picky/indexed/indexes.rb +26 -10
  8. data/lib/picky/indexing/indexes.rb +26 -24
  9. data/lib/picky/interfaces/live_parameters.rb +23 -16
  10. data/lib/picky/internals/extensions/object.rb +13 -6
  11. data/lib/picky/internals/frontend_adapters/rack.rb +30 -34
  12. data/lib/picky/internals/index/backend.rb +1 -2
  13. data/lib/picky/internals/index/file/basic.rb +18 -14
  14. data/lib/picky/internals/index/files.rb +16 -6
  15. data/lib/picky/internals/index/redis/basic.rb +12 -5
  16. data/lib/picky/internals/index/redis.rb +2 -2
  17. data/lib/picky/internals/indexed/bundle/base.rb +58 -14
  18. data/lib/picky/internals/indexed/bundle/memory.rb +40 -14
  19. data/lib/picky/internals/indexed/bundle/redis.rb +9 -30
  20. data/lib/picky/internals/indexed/categories.rb +19 -14
  21. data/lib/picky/internals/indexed/category.rb +44 -20
  22. data/lib/picky/internals/indexed/index.rb +23 -13
  23. data/lib/picky/internals/indexed/wrappers/bundle/wrapper.rb +27 -9
  24. data/lib/picky/internals/indexers/serial.rb +1 -1
  25. data/lib/picky/internals/indexing/bundle/base.rb +28 -28
  26. data/lib/picky/internals/indexing/bundle/memory.rb +14 -7
  27. data/lib/picky/internals/indexing/categories.rb +15 -11
  28. data/lib/picky/internals/indexing/category.rb +30 -20
  29. data/lib/picky/internals/indexing/index.rb +22 -14
  30. data/lib/picky/internals/query/allocations.rb +0 -15
  31. data/lib/picky/internals/query/combinations/base.rb +0 -4
  32. data/lib/picky/internals/query/combinations/redis.rb +19 -8
  33. data/lib/picky/internals/query/indexes.rb +3 -6
  34. data/lib/picky/internals/query/token.rb +0 -4
  35. data/lib/picky/internals/query/weights.rb +2 -11
  36. data/lib/picky/internals/results/base.rb +3 -10
  37. data/lib/picky/internals/tokenizers/base.rb +64 -28
  38. data/lib/picky/internals/tokenizers/index.rb +8 -8
  39. data/lib/picky/loader.rb +59 -53
  40. data/lib/picky/query/base.rb +23 -29
  41. data/lib/picky/sources/base.rb +10 -10
  42. data/lib/picky/sources/couch.rb +14 -10
  43. data/lib/picky/sources/csv.rb +21 -14
  44. data/lib/picky/sources/db.rb +37 -31
  45. data/lib/picky/sources/delicious.rb +11 -8
  46. data/lib/picky/sources/wrappers/base.rb +3 -1
  47. data/lib/picky/statistics.rb +66 -0
  48. data/lib/tasks/application.rake +3 -0
  49. data/lib/tasks/checks.rake +11 -0
  50. data/lib/tasks/framework.rake +3 -0
  51. data/lib/tasks/index.rake +9 -11
  52. data/lib/tasks/routes.rake +3 -2
  53. data/lib/tasks/shortcuts.rake +17 -5
  54. data/lib/tasks/statistics.rake +20 -12
  55. data/lib/tasks/try.rake +14 -14
  56. data/spec/lib/application_spec.rb +3 -3
  57. data/spec/lib/index/base_spec.rb +25 -3
  58. data/spec/lib/internals/extensions/object_spec.rb +46 -20
  59. data/spec/lib/internals/frontend_adapters/rack_spec.rb +3 -3
  60. data/spec/lib/internals/index/redis/basic_spec.rb +67 -0
  61. data/spec/lib/internals/indexers/serial_spec.rb +1 -1
  62. data/spec/lib/internals/results/base_spec.rb +0 -12
  63. data/spec/lib/internals/tokenizers/base_spec.rb +49 -1
  64. data/spec/lib/query/allocations_spec.rb +0 -56
  65. data/spec/lib/query/base_spec.rb +25 -21
  66. data/spec/lib/query/combinations/redis_spec.rb +6 -1
  67. data/spec/lib/sources/delicious_spec.rb +2 -2
  68. data/spec/lib/statistics_spec.rb +31 -0
  69. metadata +9 -2
@@ -1,7 +1,7 @@
1
1
  module Internals
2
2
 
3
3
  module Indexed # :nodoc:all
4
-
4
+
5
5
  # TODO Rewrite.
6
6
  #
7
7
  # A Bundle is a number of indexes
@@ -11,7 +11,7 @@ module Internals
11
11
  # * *core* index (always used)
12
12
  # * *weights* index (always used)
13
13
  # * *similarity* index (used with similarity)
14
- #
14
+ #
15
15
  # In Picky, indexing is separated from the index
16
16
  # handling itself through a parallel structure.
17
17
  #
@@ -25,24 +25,26 @@ module Internals
25
25
  # memory and looking up search data as fast as possible.
26
26
  #
27
27
  module Bundle
28
-
28
+
29
29
  class Base
30
-
30
+
31
31
  attr_reader :identifier, :configuration
32
32
  attr_accessor :similarity_strategy
33
-
33
+ attr_accessor :index, :weights, :similarity, :configuration
34
+
34
35
  delegate :[], :to => :configuration
35
-
36
+ delegate :size, :to => :index
37
+
36
38
  def initialize name, configuration, similarity_strategy
37
39
  @identifier = "#{configuration.identifier}:#{name}"
38
-
40
+
39
41
  @index = {}
40
42
  @weights = {}
41
43
  @similarity = {}
42
-
44
+
43
45
  @similarity_strategy = similarity_strategy
44
46
  end
45
-
47
+
46
48
  # Get a list of similar texts.
47
49
  #
48
50
  # Note: Does not return itself.
@@ -53,7 +55,7 @@ module Internals
53
55
  similar_codes.delete text if similar_codes
54
56
  similar_codes || []
55
57
  end
56
-
58
+
57
59
  # Loads all indexes.
58
60
  #
59
61
  def load
@@ -62,11 +64,53 @@ module Internals
62
64
  load_similarity
63
65
  load_configuration
64
66
  end
65
-
67
+
68
+ # Loads the core index.
69
+ #
70
+ def load_index
71
+ # No loading needed.
72
+ end
73
+ # Loads the weights index.
74
+ #
75
+ def load_weights
76
+ # No loading needed.
77
+ end
78
+ # Loads the similarity index.
79
+ #
80
+ def load_similarity
81
+ # No loading needed.
82
+ end
83
+ # Loads the configuration.
84
+ #
85
+ def load_configuration
86
+ # No loading needed.
87
+ end
88
+
89
+ # Loads the core index.
90
+ #
91
+ def clear_index
92
+ # No loading needed.
93
+ end
94
+ # Loads the weights index.
95
+ #
96
+ def clear_weights
97
+ # No loading needed.
98
+ end
99
+ # Loads the similarity index.
100
+ #
101
+ def clear_similarity
102
+ # No loading needed.
103
+ end
104
+ # Loads the configuration.
105
+ #
106
+ def clear_configuration
107
+ # No loading needed.
108
+ end
109
+
66
110
  end
67
-
111
+
68
112
  end
69
-
113
+
70
114
  end
71
-
115
+
72
116
  end
@@ -3,11 +3,11 @@ module Internals
3
3
  # encoding: utf-8
4
4
  #
5
5
  module Indexed # :nodoc:all
6
-
6
+
7
7
  #
8
8
  #
9
9
  module Bundle
10
-
10
+
11
11
  # This is the _actual_ index (based on memory).
12
12
  #
13
13
  # Handles exact/partial index, weights index, and similarity index.
@@ -15,19 +15,24 @@ module Internals
15
15
  # Delegates file handling and checking to an *Indexed*::*Files* object.
16
16
  #
17
17
  class Memory < Base
18
-
19
- attr_accessor :index, :weights, :similarity, :configuration
20
-
18
+
21
19
  delegate :[], :to => :configuration
22
-
20
+
23
21
  def initialize name, configuration, *args
24
22
  super name, configuration, *args
25
-
23
+
26
24
  @configuration = {} # A hash with config options.
27
-
25
+
28
26
  @backend = Internals::Index::Files.new name, configuration
29
27
  end
30
-
28
+
29
+ def to_s
30
+ <<-MEMORY
31
+ Memory
32
+ #{@backend.indented_to_s}
33
+ MEMORY
34
+ end
35
+
31
36
  # Get the ids for the given symbol.
32
37
  #
33
38
  def ids sym
@@ -38,7 +43,7 @@ module Internals
38
43
  def weight sym
39
44
  @weights[sym]
40
45
  end
41
-
46
+
42
47
  # Loads the core index.
43
48
  #
44
49
  def load_index
@@ -59,11 +64,32 @@ module Internals
59
64
  def load_configuration
60
65
  self.configuration = @backend.load_configuration
61
66
  end
62
-
67
+
68
+ # Loads the core index.
69
+ #
70
+ def clear_index
71
+ self.index = {}
72
+ end
73
+ # Loads the weights index.
74
+ #
75
+ def clear_weights
76
+ self.weights = {}
77
+ end
78
+ # Loads the similarity index.
79
+ #
80
+ def clear_similarity
81
+ self.similarity = {}
82
+ end
83
+ # Loads the configuration.
84
+ #
85
+ def clear_configuration
86
+ self.configuration = {}
87
+ end
88
+
63
89
  end
64
-
90
+
65
91
  end
66
-
92
+
67
93
  end
68
-
94
+
69
95
  end
@@ -3,23 +3,23 @@ module Internals
3
3
  # encoding: utf-8
4
4
  #
5
5
  module Indexed # :nodoc:all
6
-
6
+
7
7
  #
8
8
  #
9
9
  module Bundle
10
-
10
+
11
11
  # This is the _actual_ index (based on Redis).
12
12
  #
13
13
  # Handles exact/partial index, weights index, and similarity index.
14
14
  #
15
15
  class Redis < Base
16
-
16
+
17
17
  def initialize name, configuration, *args
18
18
  super name, configuration, *args
19
-
19
+
20
20
  @backend = Internals::Index::Redis.new name, configuration
21
21
  end
22
-
22
+
23
23
  # Get the ids for the given symbol.
24
24
  #
25
25
  # Ids are an array of string values in Redis.
@@ -39,32 +39,11 @@ module Internals
39
39
  def [] sym
40
40
  @backend.setting sym
41
41
  end
42
-
43
- # Loads the core index.
44
- #
45
- def load_index
46
- # TODO check if it is there.
47
- end
48
- # Loads the weights index.
49
- #
50
- def load_weights
51
- # TODO check if it is there.
52
- end
53
- # Loads the similarity index.
54
- #
55
- def load_similarity
56
- # TODO check if it is there.
57
- end
58
- # Loads the configuration.
59
- #
60
- def load_configuration
61
- # TODO check if it is there.
62
- end
63
-
42
+
64
43
  end
65
-
44
+
66
45
  end
67
-
46
+
68
47
  end
69
-
48
+
70
49
  end
@@ -1,14 +1,15 @@
1
1
  module Internals
2
2
 
3
3
  module Indexed
4
-
4
+
5
5
  class Categories
6
-
6
+
7
7
  attr_reader :categories, :category_hash, :ignore_unassigned_tokens
8
-
8
+
9
9
  each_delegate :load_from_cache,
10
+ :analyze,
10
11
  :to => :categories
11
-
12
+
12
13
  # A list of indexed categories.
13
14
  #
14
15
  # Options:
@@ -26,17 +27,21 @@ module Internals
26
27
  #
27
28
  def initialize options = {}
28
29
  clear
29
-
30
+
30
31
  @ignore_unassigned_tokens = options[:ignore_unassigned_tokens] || false
31
32
  end
32
-
33
+
34
+ def to_s
35
+ categories.indented_to_s
36
+ end
37
+
33
38
  # Clears both the array of categories and the hash of categories.
34
39
  #
35
40
  def clear
36
41
  @categories = []
37
42
  @category_hash = {}
38
43
  end
39
-
44
+
40
45
  # Add the given category to the list of categories.
41
46
  #
42
47
  def << category
@@ -48,7 +53,7 @@ module Internals
48
53
  #
49
54
  category_hash[category.name] = [category]
50
55
  end
51
-
56
+
52
57
  # Return all possible combinations for the given token.
53
58
  #
54
59
  # This checks if it needs to also search through similar
@@ -60,7 +65,7 @@ module Internals
60
65
  end
61
66
  # Gets all similar tokens and puts together the possible combinations
62
67
  # for each found similar token.
63
- #
68
+ #
64
69
  def similar_possible_for token
65
70
  # Get as many similar tokens as necessary
66
71
  #
@@ -90,7 +95,7 @@ module Internals
90
95
  result + possible_for(token, possible)
91
96
  end
92
97
  end
93
-
98
+
94
99
  # Returns possible Combinations for the token.
95
100
  #
96
101
  # Note: The preselected_categories param is an optimization.
@@ -122,14 +127,14 @@ module Internals
122
127
  # an existing category.
123
128
  #
124
129
  # Note: Returns nil if the user did not define one
125
- # or if he/she has defined a non-existing one.
130
+ # or if he/she has defined a non-existing one.
126
131
  #
127
132
  def user_defined_categories token
128
133
  category_hash[token.user_defined_category_name]
129
134
  end
130
-
135
+
131
136
  end
132
-
137
+
133
138
  end
134
-
139
+
135
140
  end
@@ -1,90 +1,114 @@
1
1
  module Internals
2
2
 
3
3
  module Indexed
4
-
4
+
5
5
  # An index category holds a exact and a partial index for a given category.
6
6
  #
7
7
  # For example an index category for names holds a exact and
8
8
  # a partial index bundle for names.
9
9
  #
10
10
  class Category
11
-
11
+
12
12
  attr_accessor :exact
13
13
  attr_reader :identifier, :name
14
14
  attr_writer :partial
15
-
15
+
16
16
  #
17
17
  #
18
18
  def initialize name, index, options = {}
19
19
  @name = name
20
-
20
+
21
21
  configuration = Configuration::Index.new index, self
22
-
22
+
23
23
  @identifier = configuration.identifier
24
-
24
+
25
25
  # TODO Push the defaults out into the index.
26
26
  #
27
27
  @partial_strategy = options[:partial] || Internals::Generators::Partial::Default
28
28
  similarity = options[:similarity] || Internals::Generators::Similarity::Default
29
-
29
+
30
30
  bundle_class = options[:indexed_bundle_class] || Bundle::Memory
31
31
  @exact = bundle_class.new :exact, configuration, similarity
32
32
  @partial = bundle_class.new :partial, configuration, similarity
33
-
33
+
34
34
  # @exact = exact_lambda.call(@exact, @partial) if exact_lambda = options[:exact_lambda]
35
35
  # @partial = partial_lambda.call(@exact, @partial) if partial_lambda = options[:partial_lambda]
36
-
36
+
37
37
  # TODO Extract?
38
38
  #
39
39
  Query::Qualifiers.add(configuration.category_name, generate_qualifiers_from(options) || [name])
40
40
  end
41
-
41
+
42
+ def to_s
43
+ <<-CATEGORY
44
+ Category(#{name}):
45
+ Exact:
46
+ #{exact.indented_to_s(4)}
47
+ Partial:
48
+ #{partial.indented_to_s(4)}
49
+ CATEGORY
50
+ end
51
+
42
52
  # TODO Move to Index.
43
53
  #
44
54
  def generate_qualifiers_from options
45
55
  options[:qualifiers] || options[:qualifier] && [options[:qualifier]]
46
56
  end
47
-
57
+
48
58
  # Loads the index from cache.
49
59
  #
50
60
  def load_from_cache
51
- timed_exclaim "Loading index #{identifier}."
61
+ timed_exclaim %Q{"#{identifier}": Loading index.}
52
62
  exact.load
53
63
  partial.load
54
64
  end
55
-
65
+
66
+ # Loads, analyzes, and clears the index.
67
+ #
68
+ # Note: The idea is not to run this while the search engine is running.
69
+ #
70
+ # TODO Spec. Identifier is ok?
71
+ #
72
+ def analyze collector
73
+ collector[identifier] = {
74
+ :exact => Analyzer.new.analyze(exact),
75
+ :partial => Analyzer.new.analyze(partial)
76
+ }
77
+ collector
78
+ end
79
+
56
80
  # Gets the weight for this token's text.
57
81
  #
58
82
  def weight token
59
83
  bundle_for(token).weight token.text
60
84
  end
61
-
85
+
62
86
  # Gets the ids for this token's text.
63
87
  #
64
88
  def ids token
65
89
  bundle_for(token).ids token.text
66
90
  end
67
-
91
+
68
92
  # Returns the right index bundle for this token.
69
93
  #
70
94
  def bundle_for token
71
95
  token.partial?? partial : exact
72
96
  end
73
-
97
+
74
98
  # The partial strategy defines whether to really use the partial index.
75
99
  #
76
100
  def partial
77
101
  @partial_strategy.use_exact_for_partial?? @exact : @partial
78
102
  end
79
-
103
+
80
104
  #
81
105
  #
82
106
  def combination_for token
83
107
  weight(token) && Internals::Query::Combination.new(token, self)
84
108
  end
85
-
109
+
86
110
  end
87
-
111
+
88
112
  end
89
-
113
+
90
114
  end
@@ -1,38 +1,39 @@
1
1
  module Internals
2
2
 
3
3
  module Indexed
4
-
4
+
5
5
  #
6
6
  #
7
7
  class Index
8
-
8
+
9
9
  attr_reader :name, :result_identifier, :combinator, :categories
10
-
10
+
11
11
  delegate :load_from_cache,
12
+ :analyze,
12
13
  :to => :categories
13
-
14
+
14
15
  # TODO Externalize?
15
16
  #
16
17
  def initialize name, options = {}
17
18
  @name = name
18
-
19
+
19
20
  @result_identifier = options[:result_identifier] || name
20
21
  @bundle_class = options[:indexed_bundle_class] # TODO This should actually be a fixed parameter.
21
22
  ignore_unassigned_tokens = options[:ignore_unassigned_tokens] || false # TODO Move to query, somehow.
22
-
23
+
23
24
  @categories = Categories.new ignore_unassigned_tokens: ignore_unassigned_tokens
24
25
  end
25
-
26
+
26
27
  # TODO Doc. Externalize?
27
28
  #
28
29
  def define_category category_name, options = {}
29
30
  options = default_category_options.merge options
30
-
31
+
31
32
  new_category = Category.new category_name, self, options
32
33
  categories << new_category
33
34
  new_category
34
35
  end
35
-
36
+
36
37
  # By default, the category uses
37
38
  # * the index's bundle type.
38
39
  #
@@ -41,7 +42,7 @@ module Internals
41
42
  :indexed_bundle_class => @bundle_class
42
43
  }
43
44
  end
44
-
45
+
45
46
  # Return the possible combinations for this token.
46
47
  #
47
48
  # A combination is a tuple <token, index_bundle>.
@@ -49,9 +50,18 @@ module Internals
49
50
  def possible_combinations token
50
51
  categories.possible_combinations_for token
51
52
  end
52
-
53
+
54
+ def to_s
55
+ <<-INDEX
56
+ Indexed(#{name}):
57
+ Result identifier: "#{result_identifier}"
58
+ Categories:
59
+ #{categories.indented_to_s}
60
+ INDEX
61
+ end
62
+
53
63
  end
54
-
64
+
55
65
  end
56
-
66
+
57
67
  end
@@ -1,25 +1,43 @@
1
1
  module Indexed
2
2
  module Wrappers
3
-
3
+
4
4
  # Per Bundle wrappers.
5
5
  #
6
6
  module Bundle
7
-
7
+
8
8
  # Base wrapper. Just delegates all methods to the bundle.
9
9
  #
10
10
  class Wrapper
11
-
11
+
12
12
  attr_reader :bundle
13
-
13
+
14
14
  def initialize bundle
15
15
  @bundle = bundle
16
16
  end
17
-
18
- delegate :load, :ids, :weight, :identifier, :to => :@bundle
19
-
17
+
18
+ delegate :load,
19
+ :load_index,
20
+ :load_weights,
21
+ :load_similarity,
22
+ :load_configuration,
23
+ :clear_index,
24
+ :clear_weights,
25
+ :clear_similarity,
26
+ :clear_configuration,
27
+ :ids,
28
+ :weight,
29
+ :identifier,
30
+ :analyze,
31
+ :size,
32
+ :index,
33
+ :weights,
34
+ :similarity,
35
+ :configuration,
36
+ :to => :@bundle
37
+
20
38
  end
21
-
39
+
22
40
  end
23
-
41
+
24
42
  end
25
43
  end
@@ -65,7 +65,7 @@ module Indexers
65
65
  end
66
66
  end
67
67
  def indexing_message
68
- timed_exclaim "INDEX #{@configuration}" # TODO from ...
68
+ timed_exclaim %Q{"#{@configuration.identifier}": Starting indexing.}
69
69
  end
70
70
 
71
71
  end