picky 0.0.0 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (161) hide show
  1. data/bin/picky +14 -0
  2. data/lib/bundling.rb +10 -0
  3. data/lib/constants.rb +9 -0
  4. data/lib/deployment.rb +212 -0
  5. data/lib/picky/application.rb +40 -0
  6. data/lib/picky/cacher/convenience.rb +3 -0
  7. data/lib/picky/cacher/generator.rb +17 -0
  8. data/lib/picky/cacher/partial/default.rb +7 -0
  9. data/lib/picky/cacher/partial/none.rb +19 -0
  10. data/lib/picky/cacher/partial/strategy.rb +7 -0
  11. data/lib/picky/cacher/partial/subtoken.rb +91 -0
  12. data/lib/picky/cacher/partial_generator.rb +15 -0
  13. data/lib/picky/cacher/similarity/default.rb +7 -0
  14. data/lib/picky/cacher/similarity/double_levenshtone.rb +73 -0
  15. data/lib/picky/cacher/similarity/none.rb +25 -0
  16. data/lib/picky/cacher/similarity/strategy.rb +7 -0
  17. data/lib/picky/cacher/similarity_generator.rb +15 -0
  18. data/lib/picky/cacher/weights/default.rb +7 -0
  19. data/lib/picky/cacher/weights/logarithmic.rb +39 -0
  20. data/lib/picky/cacher/weights/strategy.rb +7 -0
  21. data/lib/picky/cacher/weights_generator.rb +15 -0
  22. data/lib/picky/configuration/configuration.rb +13 -0
  23. data/lib/picky/configuration/field.rb +68 -0
  24. data/lib/picky/configuration/indexes.rb +60 -0
  25. data/lib/picky/configuration/queries.rb +32 -0
  26. data/lib/picky/configuration/type.rb +52 -0
  27. data/lib/picky/cores.rb +101 -0
  28. data/lib/picky/db/configuration.rb +23 -0
  29. data/lib/picky/ext/ruby19/extconf.rb +7 -0
  30. data/lib/picky/ext/ruby19/performant.c +339 -0
  31. data/lib/picky/extensions/array.rb +45 -0
  32. data/lib/picky/extensions/hash.rb +11 -0
  33. data/lib/picky/extensions/module.rb +15 -0
  34. data/lib/picky/extensions/symbol.rb +18 -0
  35. data/lib/picky/generator.rb +156 -0
  36. data/lib/picky/helpers/cache.rb +23 -0
  37. data/lib/picky/helpers/gc.rb +11 -0
  38. data/lib/picky/helpers/measuring.rb +45 -0
  39. data/lib/picky/helpers/search.rb +27 -0
  40. data/lib/picky/index/bundle.rb +328 -0
  41. data/lib/picky/index/category.rb +109 -0
  42. data/lib/picky/index/combined.rb +38 -0
  43. data/lib/picky/index/type.rb +30 -0
  44. data/lib/picky/indexers/base.rb +77 -0
  45. data/lib/picky/indexers/default.rb +3 -0
  46. data/lib/picky/indexers/field.rb +13 -0
  47. data/lib/picky/indexers/no_source_specified_error.rb +5 -0
  48. data/lib/picky/indexers/solr.rb +60 -0
  49. data/lib/picky/indexes.rb +180 -0
  50. data/lib/picky/initializers/ext.rb +6 -0
  51. data/lib/picky/initializers/mysql.rb +22 -0
  52. data/lib/picky/loader.rb +287 -0
  53. data/lib/picky/loggers/search.rb +19 -0
  54. data/lib/picky/performant/array.rb +23 -0
  55. data/lib/picky/query/allocation.rb +82 -0
  56. data/lib/picky/query/allocations.rb +131 -0
  57. data/lib/picky/query/base.rb +124 -0
  58. data/lib/picky/query/combination.rb +69 -0
  59. data/lib/picky/query/combinations.rb +106 -0
  60. data/lib/picky/query/combinator.rb +92 -0
  61. data/lib/picky/query/full.rb +15 -0
  62. data/lib/picky/query/live.rb +22 -0
  63. data/lib/picky/query/qualifiers.rb +73 -0
  64. data/lib/picky/query/solr.rb +77 -0
  65. data/lib/picky/query/token.rb +215 -0
  66. data/lib/picky/query/tokens.rb +102 -0
  67. data/lib/picky/query/weigher.rb +159 -0
  68. data/lib/picky/query/weights.rb +55 -0
  69. data/lib/picky/rack/harakiri.rb +37 -0
  70. data/lib/picky/results/base.rb +103 -0
  71. data/lib/picky/results/full.rb +19 -0
  72. data/lib/picky/results/live.rb +19 -0
  73. data/lib/picky/routing.rb +165 -0
  74. data/lib/picky/signals.rb +11 -0
  75. data/lib/picky/solr/schema_generator.rb +73 -0
  76. data/lib/picky/sources/base.rb +19 -0
  77. data/lib/picky/sources/csv.rb +30 -0
  78. data/lib/picky/sources/db.rb +77 -0
  79. data/lib/picky/tokenizers/base.rb +130 -0
  80. data/lib/picky/tokenizers/default.rb +3 -0
  81. data/lib/picky/tokenizers/index.rb +73 -0
  82. data/lib/picky/tokenizers/query.rb +70 -0
  83. data/lib/picky/umlaut_substituter.rb +21 -0
  84. data/lib/picky-tasks.rb +6 -0
  85. data/lib/picky.rb +18 -0
  86. data/lib/tasks/application.rake +5 -0
  87. data/lib/tasks/cache.rake +53 -0
  88. data/lib/tasks/framework.rake +4 -0
  89. data/lib/tasks/index.rake +29 -0
  90. data/lib/tasks/server.rake +48 -0
  91. data/lib/tasks/shortcuts.rake +13 -0
  92. data/lib/tasks/solr.rake +36 -0
  93. data/lib/tasks/spec.rake +11 -0
  94. data/lib/tasks/statistics.rake +13 -0
  95. data/lib/tasks/try.rake +29 -0
  96. data/prototype_project/Gemfile +23 -0
  97. data/prototype_project/Rakefile +1 -0
  98. data/prototype_project/app/README +6 -0
  99. data/prototype_project/app/application.rb +50 -0
  100. data/prototype_project/app/application.ru +29 -0
  101. data/prototype_project/app/db.yml +10 -0
  102. data/prototype_project/app/logging.rb +20 -0
  103. data/prototype_project/app/unicorn.ru +10 -0
  104. data/prototype_project/log/README +1 -0
  105. data/prototype_project/script/console +34 -0
  106. data/prototype_project/tmp/README +0 -0
  107. data/prototype_project/tmp/pids/README +0 -0
  108. data/spec/ext/performant_spec.rb +64 -0
  109. data/spec/lib/application_spec.rb +61 -0
  110. data/spec/lib/cacher/partial/subtoken_spec.rb +89 -0
  111. data/spec/lib/cacher/partial_generator_spec.rb +35 -0
  112. data/spec/lib/cacher/similarity/double_levenshtone_spec.rb +60 -0
  113. data/spec/lib/cacher/similarity/none_spec.rb +23 -0
  114. data/spec/lib/cacher/similarity_generator_spec.rb +22 -0
  115. data/spec/lib/cacher/weights/logarithmic_spec.rb +30 -0
  116. data/spec/lib/cacher/weights_generator_spec.rb +21 -0
  117. data/spec/lib/configuration/configuration_spec.rb +38 -0
  118. data/spec/lib/configuration/type_spec.rb +49 -0
  119. data/spec/lib/configuration_spec.rb +8 -0
  120. data/spec/lib/cores_spec.rb +65 -0
  121. data/spec/lib/extensions/array_spec.rb +37 -0
  122. data/spec/lib/extensions/hash_spec.rb +11 -0
  123. data/spec/lib/extensions/module_spec.rb +27 -0
  124. data/spec/lib/extensions/symbol_spec.rb +85 -0
  125. data/spec/lib/generator_spec.rb +135 -0
  126. data/spec/lib/helpers/cache_spec.rb +35 -0
  127. data/spec/lib/helpers/gc_spec.rb +71 -0
  128. data/spec/lib/helpers/measuring_spec.rb +18 -0
  129. data/spec/lib/helpers/search_spec.rb +50 -0
  130. data/spec/lib/index/bundle_partial_generation_speed_spec.rb +47 -0
  131. data/spec/lib/index/bundle_spec.rb +260 -0
  132. data/spec/lib/index/category_spec.rb +203 -0
  133. data/spec/lib/indexers/base_spec.rb +73 -0
  134. data/spec/lib/indexers/field_spec.rb +20 -0
  135. data/spec/lib/loader_spec.rb +48 -0
  136. data/spec/lib/loggers/search_spec.rb +19 -0
  137. data/spec/lib/performant/array_spec.rb +13 -0
  138. data/spec/lib/query/allocation_spec.rb +194 -0
  139. data/spec/lib/query/allocations_spec.rb +336 -0
  140. data/spec/lib/query/base_spec.rb +104 -0
  141. data/spec/lib/query/combination_spec.rb +90 -0
  142. data/spec/lib/query/combinations_spec.rb +83 -0
  143. data/spec/lib/query/combinator_spec.rb +112 -0
  144. data/spec/lib/query/full_spec.rb +22 -0
  145. data/spec/lib/query/live_spec.rb +61 -0
  146. data/spec/lib/query/qualifiers_spec.rb +31 -0
  147. data/spec/lib/query/solr_spec.rb +51 -0
  148. data/spec/lib/query/token_spec.rb +297 -0
  149. data/spec/lib/query/tokens_spec.rb +189 -0
  150. data/spec/lib/query/weights_spec.rb +47 -0
  151. data/spec/lib/results/base_spec.rb +233 -0
  152. data/spec/lib/routing_spec.rb +318 -0
  153. data/spec/lib/solr/schema_generator_spec.rb +42 -0
  154. data/spec/lib/sources/db_spec.rb +91 -0
  155. data/spec/lib/tokenizers/base_spec.rb +61 -0
  156. data/spec/lib/tokenizers/index_spec.rb +51 -0
  157. data/spec/lib/tokenizers/query_spec.rb +105 -0
  158. data/spec/lib/umlaut_substituter_spec.rb +84 -0
  159. data/spec/specific/speed_spec.rb +55 -0
  160. metadata +371 -15
  161. data/README.textile +0 -9
@@ -0,0 +1,69 @@
1
+ module Query
2
+
3
+ # Describes the combination of a token (the text) and
4
+ # the index (the bundle).
5
+ #
6
+ # A combination is a single part of an allocation.
7
+ #
8
+ # An allocation consists of a number of combinations.
9
+ #
10
+ class Combination
11
+
12
+ attr_reader :token, :bundle
13
+
14
+ def initialize token, category
15
+ @token = token
16
+ @category = category
17
+ @bundle = category.bundle_for token
18
+ @text = @token.text # don't want to use reset_similar already
19
+ end
20
+
21
+ # Note: Required for uniq!
22
+ #
23
+ def hash
24
+ [@token.to_s, @bundle].hash
25
+ end
26
+
27
+ # Returns the weight of this combination.
28
+ #
29
+ def weight
30
+ @weight || @weight = @bundle.weight(@text)
31
+ end
32
+
33
+ # Returns an array of ids for the given text.
34
+ #
35
+ def ids
36
+ @ids || @ids = @bundle.ids(@text)
37
+ end
38
+
39
+ # The identifier for this combination.
40
+ #
41
+ def identifier
42
+ @category.name
43
+ end
44
+
45
+ # Is the identifier in the given identifiers?
46
+ #
47
+ def in? identifiers
48
+ identifiers.include? identifier
49
+ end
50
+
51
+ # Combines the category names with the original names.
52
+ # [
53
+ # [:title, 'Flarbl', :flarbl],
54
+ # [:category, 'Gnorf', :gnorf]
55
+ # ]
56
+ #
57
+ def to_result
58
+ [identifier, *@token.to_result]
59
+ end
60
+
61
+ # full/title:Flarbl:flarbl
62
+ #
63
+ def to_s
64
+ "#{bundle.name}/#{to_result.join(':')}"
65
+ end
66
+
67
+ end
68
+
69
+ end
@@ -0,0 +1,106 @@
1
+ module Query
2
+
3
+ # Combinations are a number of Combination-s.
4
+ #
5
+ # They are, in effect, the core of an allocation.
6
+ #
7
+ class Combinations
8
+
9
+ attr_reader :type, :combinations
10
+
11
+ delegate :empty?, :to => :@combinations
12
+
13
+ def initialize type, combinations = []
14
+ @type = type # TODO Remove.
15
+ @combinations = combinations
16
+ end
17
+
18
+ def hash
19
+ @combinations.hash
20
+ end
21
+
22
+ #
23
+ #
24
+ # TODO Rewrite.
25
+ #
26
+ def calculate_score weights
27
+ @score = @combinations.sum &:weight
28
+ @score += weights.score @combinations
29
+ @score
30
+ end
31
+
32
+ # Gets all ids for the allocations.
33
+ #
34
+ # Sorts the ids by size and & through them in the following order (sizes):
35
+ # 0. [100_000, 400, 30, 2]
36
+ # 1. [2, 30, 400, 100_000]
37
+ # 2. (100_000 & (400 & (30 & 2))) # => result
38
+ #
39
+ # Returns the ids. Also sets the count.
40
+ #
41
+ # Note: Uses a C-optimized intersection routine for speed and memory efficiency.
42
+ #
43
+ def ids
44
+ return [] if @combinations.empty?
45
+
46
+ # Get the ids for each combination.
47
+ #
48
+ id_arrays = @combinations.inject([]) do |total, combination|
49
+ total << combination.ids
50
+ end
51
+
52
+ # Order by smallest size first such that the intersect can be performed faster.
53
+ #
54
+ # TODO Move into the memory_efficient_intersect such that
55
+ # this precondition for a fast algorithm is always given.
56
+ #
57
+ id_arrays.sort! { |this_array, that_array| this_array.size <=> that_array.size }
58
+
59
+ # Call the optimized C algorithm.
60
+ #
61
+ Performant::Array.memory_efficient_intersect id_arrays
62
+ end
63
+
64
+ #
65
+ #
66
+ def pack_into_allocation
67
+ allocation = Allocation.new self
68
+ allocation.result_type = @type.result_type # TODO Rewrite.
69
+ allocation
70
+ end
71
+
72
+ # Filters the tokens and identifiers such that only identifiers
73
+ # that are passed in, remain, including their tokens.
74
+ #
75
+ # Note: This method is not totally independent of the calculate_ids one.
76
+ # Since identifiers are only nullified, we need to not include the
77
+ # ids that have an associated identifier that is nil.
78
+ #
79
+ def keep identifiers = []
80
+ # TODO Rewrite to use the category!!!
81
+ #
82
+ @combinations.reject! { |combination| !combination.in?(identifiers) }
83
+ end
84
+
85
+ # Filters the tokens and identifiers such that identifiers
86
+ # that are passed in, are removed, including their tokens.
87
+ #
88
+ # Note: This method is not totally independent of the calculate_ids one.
89
+ # Since identifiers are only nullified, we need to not include the
90
+ # ids that have an associated identifier that is nil.
91
+ #
92
+ def remove identifiers = []
93
+ # TODO Rewrite to use the category!!!
94
+ #
95
+ @combinations.reject! { |combination| combination.in?(identifiers) }
96
+ end
97
+
98
+ #
99
+ #
100
+ def to_result
101
+ @combinations.map &:to_result
102
+ end
103
+
104
+ end
105
+
106
+ end
@@ -0,0 +1,92 @@
1
+ module Query
2
+
3
+ # Combines tokens and category indexes into combinations.
4
+ #
5
+ class Combinator
6
+
7
+ attr_reader :categories, :category_hash
8
+ attr_reader :ignore_unassigned_tokens # TODO Should this actually be determined by the query? Probably, yes.
9
+
10
+ def initialize categories, options = {}
11
+ @categories = categories
12
+ @category_hash = hashify categories
13
+
14
+ @ignore_unassigned_tokens = options[:ignore_unassigned_tokens] || false
15
+ end
16
+
17
+ # TODO Move somewhere else.
18
+ #
19
+ # TODO Or use active_support's?
20
+ #
21
+ def hashify category_array
22
+ category_array.inject({}) do |hash, category|
23
+ hash[category.name] = [category]
24
+ hash
25
+ end
26
+ end
27
+
28
+ #
29
+ #
30
+ def possible_combinations_for token
31
+ token.similar? ? similar_possible_for(token) : possible_for(token)
32
+ end
33
+
34
+ # TODO Break apart.
35
+ #
36
+ def similar_possible_for token
37
+ # Get as many similar tokens as necessary
38
+ #
39
+ text = token.text
40
+ tokens = categories.inject([]) do |result, category|
41
+ next_token = token
42
+ # TODO adjust either this or the amount of similar in index
43
+ #
44
+ while next_token = next_token.next(category)
45
+ result << next_token if next_token && next_token.text != text
46
+ end
47
+ result
48
+ end
49
+ # possible combinations
50
+ #
51
+ tokens.inject([]) do |result, token|
52
+ possible = possible_categories token
53
+ result + possible_for(token, possible)
54
+ end
55
+ end
56
+
57
+ # Returns possible Combinations for the token.
58
+ #
59
+ # The categories param is an optimization.
60
+ #
61
+ # TODO Return [RemovedCategory(token, nil)]
62
+ # If the search is ...
63
+ #
64
+ # TODO Make categories also a collection class.
65
+ #
66
+ # TODO Return [] if not ok, nil if needs to be removed?
67
+ # Somehow unnice, but…
68
+ #
69
+ def possible_for token, preselected_categories = nil
70
+ possible = (preselected_categories || possible_categories(token)).map { |category| category.combination_for(token) }
71
+ possible.compact!
72
+ # This is an optimization to mark tokens that are ignored.
73
+ #
74
+ return if ignore_unassigned_tokens && possible.empty?
75
+ possible # wrap in combinations
76
+ end
77
+ #
78
+ #
79
+ # TODO too many calls?
80
+ #
81
+ def possible_categories token
82
+ user_defined_categories(token) || categories
83
+ end
84
+ # Returns nil if there is no user defined category, the category else.
85
+ #
86
+ def user_defined_categories token
87
+ category_hash[token.user_defined_category_name]
88
+ end
89
+
90
+ end
91
+
92
+ end
@@ -0,0 +1,15 @@
1
+ module Query
2
+
3
+ # This is the query class for performing full fledged queries.
4
+ #
5
+ class Full < Base
6
+
7
+ # Generates full results.
8
+ #
9
+ def result_type
10
+ Results::Full
11
+ end
12
+
13
+ end
14
+
15
+ end
@@ -0,0 +1,22 @@
1
+ module Query
2
+
3
+ # This is the query class for live queries.
4
+ #
5
+ # It does:
6
+ # * Return a count of results.
7
+ #
8
+ # It does NOT:
9
+ # * Sort results geographically.
10
+ # * Do any postprocessing.
11
+ #
12
+ class Live < Base
13
+
14
+ # Generates results from allocations.
15
+ #
16
+ def result_type
17
+ Results::Live
18
+ end
19
+
20
+ end
21
+
22
+ end
@@ -0,0 +1,73 @@
1
+ # coding: utf-8
2
+ module Query
3
+
4
+ # A single qualifier.
5
+ #
6
+ class Qualifier
7
+
8
+ attr_reader :normalized_qualifier, :codes
9
+
10
+ #
11
+ #
12
+ # codes is an array.
13
+ #
14
+ def initialize normalized_qualifier, codes
15
+ @normalized_qualifier = normalized_qualifier
16
+ @codes = codes
17
+ end
18
+
19
+ # Will overwrite if the key is present in the hash.
20
+ #
21
+ def inject_into hash
22
+ codes.each do |code|
23
+ hash[code] = normalized_qualifier
24
+ end
25
+ end
26
+
27
+ end
28
+
29
+ # Collection class for qualifiers.
30
+ #
31
+ class Qualifiers
32
+
33
+ include Singleton
34
+
35
+ attr_reader :qualifiers, :normalization_mapping
36
+
37
+ delegate :<<, :to => :qualifiers
38
+
39
+ #
40
+ #
41
+ def initialize
42
+ @qualifiers = []
43
+ @normalization_mapping = {}
44
+ end
45
+
46
+ # TODO Spec.
47
+ #
48
+ def self.add name, qualifiers
49
+ instance << Qualifier.new(name, qualifiers)
50
+ end
51
+
52
+ # Uses the qualifiers to prepare (optimize) the qualifier handling.
53
+ #
54
+ def prepare
55
+ qualifiers.each do |qualifier|
56
+ qualifier.inject_into normalization_mapping
57
+ end
58
+ end
59
+
60
+ # Normalizes the given qualifier.
61
+ #
62
+ # Returns nil if it is not allowed, the normalized qualifier if it is.
63
+ #
64
+ # Note: Normalizes.
65
+ #
66
+ def normalize qualifier
67
+ return nil if qualifier.blank?
68
+
69
+ normalization_mapping[qualifier.to_sym]
70
+ end
71
+
72
+ end
73
+ end
@@ -0,0 +1,77 @@
1
+ require 'rsolr'
2
+
3
+ module Query
4
+
5
+ #
6
+ #
7
+ class Solr < Base
8
+
9
+ attr_reader :server, :index_types
10
+
11
+ def initialize *index_types
12
+ @server = RSolr.connect rescue nil
13
+ super *index_types
14
+ end
15
+
16
+ # # This runs the actual search.
17
+ # #
18
+ # # TODO Remove!
19
+ # #
20
+ # def search tokens, offset = 0
21
+ # results = nil
22
+ #
23
+ # duration = timed do
24
+ # results = execute(tokens, offset) || empty_results # TODO Does not work yet
25
+ # end
26
+ # results.duration = duration
27
+ #
28
+ # results
29
+ # end
30
+
31
+ #
32
+ #
33
+ def execute tokens, offset = 0
34
+ results = Results::Live.new
35
+
36
+ if server
37
+ similar = {}
38
+
39
+ new_query = tokens.to_solr_query
40
+
41
+ return results if new_query.empty?
42
+
43
+ index_types.each do |index|
44
+ begin
45
+ response = server.select :q => new_query, :fq => "type:#{index.name}", :hl => true, :'hl.fl' => '*', :'hl.simple.pre' => '<', :'hl.simple.post' => '>', :facet => true
46
+ rescue RSolr::RequestError => re
47
+ return results
48
+ end
49
+
50
+ highlighting = response['highlighting']
51
+ possibilities = response['response']['docs'].map do |doc|
52
+ highlights = highlighting[doc['id'].to_s]
53
+ next unless highlights
54
+ selected = doc.select { |key| highlights.has_key?(key) }
55
+ selected.values.join ' '
56
+ end
57
+ possibilities.collect! { |possibility| possibility.strip }.uniq!
58
+ similar[index.name] = possibilities unless possibilities.empty?
59
+ end
60
+
61
+ results.add :similar => similar
62
+ end
63
+
64
+ # TODO
65
+ #
66
+ class << results
67
+ def to_log query
68
+ ?* + super
69
+ end
70
+ end
71
+
72
+ results
73
+ end
74
+
75
+ end
76
+
77
+ end
@@ -0,0 +1,215 @@
1
+ module Query
2
+ # This is a query token. Together with other tokens it makes up a query.
3
+ #
4
+ # It remembers the original form, and and a normalized form.
5
+ #
6
+ # It also knows whether it needs to look for similarity (bla~), or whether it is a partial (bla*).
7
+ #
8
+ # TODO Make partial / similarity char configurable.
9
+ #
10
+ class Token
11
+
12
+ attr_reader :text, :original
13
+ attr_writer :similar
14
+
15
+ delegate :blank?, :to => :text
16
+
17
+ # Normal initializer.
18
+ #
19
+ # Note: Use this if you do not want a qualified and normalized token.
20
+ #
21
+ def initialize text
22
+ @text = text
23
+ end
24
+
25
+ # Returns a qualified and normalized token.
26
+ #
27
+ # Note: Use this in the search engine if you need a qualified
28
+ # and normalized token. I.e. one prepared for a search.
29
+ #
30
+ def self.processed text
31
+ token = new text
32
+ token.qualify
33
+ token.extract_original
34
+ token.partialize
35
+ token.similarize
36
+ token.remove_illegals
37
+ token
38
+ end
39
+
40
+ # This returns a predefined category name if the user has given one.
41
+ #
42
+ def user_defined_category_name
43
+ @qualifier
44
+ end
45
+
46
+ # Extracts a qualifier for this token and pre-assigns an allocation.
47
+ #
48
+ # Note: Removes the qualifier if it is not allowed.
49
+ #
50
+ def qualify
51
+ @qualifier, @text = split @text
52
+ @qualifier = Query::Qualifiers.instance.normalize @qualifier
53
+ end
54
+ def extract_original
55
+ @original = @text.dup
56
+ end
57
+
58
+ # Partial is a conditional setter.
59
+ #
60
+ # It is only settable if it hasn't been set yet.
61
+ #
62
+ def partial= partial
63
+ @partial = partial if @partial.nil?
64
+ end
65
+ def partial?
66
+ !@similar && @partial
67
+ end
68
+
69
+ # If the text ends with *, partialize it. If with ", don't.
70
+ #
71
+ @@no_partial = /\"$/
72
+ @@partial = /[\*]$/
73
+ def partialize
74
+ self.partial = false and return if @text =~ @@no_partial
75
+ self.partial = true if @text =~ @@partial
76
+ end
77
+
78
+ # If the text ends with ~ similarize it. If with ", don't.
79
+ #
80
+ @@no_similar = /\"$/
81
+ @@similar = /[~]$/
82
+ def similarize
83
+ self.similar = false and return if @text =~ @@no_similar
84
+ self.similar = true if @text =~ @@similar
85
+ end
86
+
87
+ def similar?
88
+ @similar
89
+ end
90
+
91
+ # Normalizes this token's text.
92
+ #
93
+ @@illegals = /["*~]/
94
+ def remove_illegals
95
+ @text.gsub! @@illegals, '' unless @text.blank?
96
+ end
97
+
98
+ # TODO Think about these, remove illegals and normalize...
99
+ #
100
+
101
+ # Visitor for tokenizer.
102
+ #
103
+ # TODO Rewrite!!!
104
+ #
105
+ def tokenize_with tokenizer
106
+ @text = tokenizer.normalize @text
107
+ end
108
+ # TODO spec!
109
+ #
110
+ # TODO Rewrite!!
111
+ #
112
+ def tokenized tokenizer
113
+ tokenizer.tokenize(@text.to_s).each do |text|
114
+ yield text
115
+ end
116
+ end
117
+
118
+ # Returns an array of possible combinations.
119
+ #
120
+ def possible_combinations_in type
121
+ type.possible_combinations self
122
+ end
123
+
124
+ #
125
+ #
126
+ def from token
127
+ new_token = token.dup
128
+ new_token.instance_variable_set :@text, @text
129
+ new_token.instance_variable_set :@partial, @partial
130
+ new_token.instance_variable_set :@original, @original
131
+ new_token.instance_variable_set :@qualifier, @qualifier
132
+ # TODO
133
+ #
134
+ # token.instance_variable_set :@similarity, @similarity
135
+ new_token
136
+ end
137
+
138
+ # TODO Rewrite, also next_similar.
139
+ #
140
+ def next category
141
+ token = from self
142
+ token if token.next_similar category.bundle_for(token)
143
+ end
144
+
145
+ # Sets and returns the next similar word.
146
+ #
147
+ def next_similar bundle
148
+ @text = similarity(bundle).next if similar?
149
+ rescue StopIteration => stop_iteration
150
+ # reset_similar # TODO
151
+ nil # TODO
152
+ end
153
+ # Lazy similar reader.
154
+ #
155
+ def similarity bundle = nil
156
+ @similarity || @similarity = generate_similarity_for(bundle)
157
+ end
158
+ # Returns an enumerator that traverses over the similar.
159
+ #
160
+ def generate_similarity_for bundle
161
+ (bundle.similar(@text) || []).each
162
+ end
163
+
164
+ # Generates a solr term from this token.
165
+ #
166
+ # E.g. "name:heroes~0.75"
167
+ #
168
+ @@solr_fuzzy_mapping = {
169
+ 1 => :'',
170
+ 2 => :'',
171
+ 3 => :'',
172
+ 4 => :'~0.74',
173
+ 5 => :'~0.78',
174
+ 6 => :'~0.81',
175
+ 7 => :'~0.83',
176
+ 8 => :'~0.85',
177
+ 9 => :'~0.87',
178
+ 10 => :'~0.89'
179
+ }
180
+ @@solr_fuzzy_mapping.default = :'~0.9'
181
+ def to_solr
182
+ blank? ? '' : (to_s + @@solr_fuzzy_mapping[@text.size].to_s)
183
+ end
184
+
185
+ #
186
+ #
187
+ def to_result
188
+ [@original, @text]
189
+ end
190
+
191
+ # Displays the qualifier text and the text, joined.
192
+ #
193
+ # e.g. name:meier
194
+ #
195
+ def to_s
196
+ [@qualifier, @text].compact.join ':'
197
+ end
198
+
199
+ private
200
+
201
+ # Splits text into a qualifier and text.
202
+ #
203
+ # Returns [qualifier, text].
204
+ #
205
+ def split unqualified_text
206
+ qualifier, text = (unqualified_text || '').split(':', 2)
207
+ if text.blank?
208
+ [nil, (qualifier || '')]
209
+ else
210
+ [qualifier, text]
211
+ end
212
+ end
213
+
214
+ end
215
+ end