picky 0.11.2 → 0.12.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (81) hide show
  1. data/lib/picky/Index_api.rb +49 -0
  2. data/lib/picky/alias_instances.rb +4 -1
  3. data/lib/picky/application.rb +16 -15
  4. data/lib/picky/cacher/partial/{subtoken.rb → substring.rb} +19 -18
  5. data/lib/picky/{character_substitution/european.rb → character_substituters/west_european.rb} +2 -2
  6. data/lib/picky/configuration/index.rb +67 -0
  7. data/lib/picky/cores.rb +3 -0
  8. data/lib/picky/index/bundle.rb +35 -51
  9. data/lib/picky/index/file/basic.rb +39 -5
  10. data/lib/picky/index/file/json.rb +10 -0
  11. data/lib/picky/index/file/marshal.rb +10 -0
  12. data/lib/picky/index/file/text.rb +22 -0
  13. data/lib/picky/index/files.rb +11 -36
  14. data/lib/picky/indexed/bundle.rb +61 -0
  15. data/lib/picky/{index → indexed}/categories.rb +1 -1
  16. data/lib/picky/{index → indexed}/category.rb +13 -16
  17. data/lib/picky/{index/type.rb → indexed/index.rb} +6 -6
  18. data/lib/picky/{index/types.rb → indexed/indexes.rb} +10 -10
  19. data/lib/picky/{index → indexed}/wrappers/exact_first.rb +8 -8
  20. data/lib/picky/indexers/no_source_specified_error.rb +1 -1
  21. data/lib/picky/indexers/serial.rb +64 -0
  22. data/lib/picky/indexers/solr.rb +1 -3
  23. data/lib/picky/indexes_api.rb +41 -0
  24. data/lib/picky/indexing/bundle.rb +43 -13
  25. data/lib/picky/indexing/category.rb +17 -64
  26. data/lib/picky/indexing/{type.rb → index.rb} +13 -3
  27. data/lib/picky/indexing/{types.rb → indexes.rb} +22 -22
  28. data/lib/picky/loader.rb +17 -22
  29. data/lib/picky/query/base.rb +1 -1
  30. data/lib/picky/rack/harakiri.rb +9 -2
  31. data/lib/picky/signals.rb +1 -1
  32. data/lib/picky/sources/base.rb +14 -14
  33. data/lib/picky/sources/couch.rb +8 -7
  34. data/lib/picky/sources/csv.rb +10 -10
  35. data/lib/picky/sources/db.rb +8 -8
  36. data/lib/picky/sources/delicious.rb +2 -2
  37. data/lib/picky/sources/wrappers/location.rb +3 -3
  38. data/lib/picky/tokenizers/base.rb +1 -11
  39. data/lib/picky/tokenizers/index.rb +0 -1
  40. data/lib/picky/tokenizers/query.rb +0 -1
  41. data/lib/tasks/index.rake +4 -4
  42. data/lib/tasks/shortcuts.rake +4 -4
  43. data/lib/tasks/try.rake +8 -8
  44. data/project_prototype/Gemfile +1 -1
  45. data/project_prototype/app/application.rb +13 -12
  46. data/spec/lib/application_spec.rb +10 -38
  47. data/spec/lib/cacher/partial/{subtoken_spec.rb → substring_spec.rb} +0 -0
  48. data/spec/lib/{character_substitution/european_spec.rb → character_substituters/west_european_spec.rb} +6 -2
  49. data/spec/lib/configuration/index_spec.rb +80 -0
  50. data/spec/lib/cores_spec.rb +1 -1
  51. data/spec/lib/index/file/text_spec.rb +1 -1
  52. data/spec/lib/index/files_spec.rb +12 -32
  53. data/spec/lib/indexed/bundle_spec.rb +119 -0
  54. data/spec/lib/{indexing → indexed}/categories_spec.rb +13 -14
  55. data/spec/lib/{index → indexed}/category_spec.rb +6 -6
  56. data/spec/lib/{index/type_spec.rb → indexed/index_spec.rb} +3 -3
  57. data/spec/lib/{index → indexed}/wrappers/exact_first_spec.rb +5 -5
  58. data/spec/lib/indexers/serial_spec.rb +62 -0
  59. data/spec/lib/indexing/bundle_partial_generation_speed_spec.rb +7 -5
  60. data/spec/lib/indexing/bundle_spec.rb +9 -14
  61. data/spec/lib/indexing/category_spec.rb +9 -125
  62. data/spec/lib/indexing/{type_spec.rb → index_spec.rb} +3 -3
  63. data/spec/lib/query/base_spec.rb +1 -1
  64. data/spec/lib/query/full_spec.rb +1 -1
  65. data/spec/lib/query/live_spec.rb +2 -4
  66. data/spec/lib/sources/couch_spec.rb +5 -5
  67. data/spec/lib/sources/db_spec.rb +6 -7
  68. data/spec/lib/tokenizers/base_spec.rb +1 -24
  69. data/spec/lib/tokenizers/query_spec.rb +0 -1
  70. metadata +38 -41
  71. data/lib/picky/bundle.rb +0 -33
  72. data/lib/picky/configuration/indexes.rb +0 -51
  73. data/lib/picky/configuration/queries.rb +0 -15
  74. data/lib/picky/indexers/base.rb +0 -85
  75. data/lib/picky/indexers/default.rb +0 -3
  76. data/lib/picky/type.rb +0 -46
  77. data/lib/picky/types.rb +0 -41
  78. data/lib/tasks/cache.rake +0 -46
  79. data/spec/lib/configuration/indexes_spec.rb +0 -28
  80. data/spec/lib/index/bundle_spec.rb +0 -151
  81. data/spec/lib/indexers/base_spec.rb +0 -89
@@ -1,8 +1,8 @@
1
1
  module Indexing
2
2
 
3
- class Types
3
+ class Indexes
4
4
 
5
- attr_reader :types
5
+ attr_reader :indexes
6
6
 
7
7
  each_delegate :take_snapshot,
8
8
  :generate_caches,
@@ -11,7 +11,7 @@ module Indexing
11
11
  :check_caches,
12
12
  :clear_caches,
13
13
  :create_directory_structure,
14
- :to => :types
14
+ :to => :indexes
15
15
 
16
16
  def initialize
17
17
  clear
@@ -20,13 +20,13 @@ module Indexing
20
20
  # TODO Spec.
21
21
  #
22
22
  def clear
23
- @types = []
23
+ @indexes = []
24
24
  end
25
25
 
26
26
  # TODO Spec. Superclass?
27
27
  #
28
- def register type
29
- self.types << type
28
+ def register index
29
+ self.indexes << index
30
30
  end
31
31
 
32
32
  # Runs the indexers in parallel (index + cache).
@@ -39,9 +39,9 @@ module Indexing
39
39
  # Run in parallel.
40
40
  #
41
41
  timed_exclaim "INDEXING USING #{Cores.max_processors} PROCESSORS, IN #{randomly ? 'RANDOM' : 'GIVEN'} ORDER."
42
- Cores.forked self.types, { randomly: randomly } do |type|
43
- type.index
44
- type.cache
42
+ Cores.forked self.indexes, { randomly: randomly } do |an_index|
43
+ an_index.index
44
+ an_index.cache
45
45
  end
46
46
  timed_exclaim "INDEXING FINISHED."
47
47
  end
@@ -51,36 +51,36 @@ module Indexing
51
51
  def index_for_tests
52
52
  take_snapshot
53
53
 
54
- self.types.each do |type|
55
- type.index
56
- type.cache
54
+ self.indexes.each do |an_index|
55
+ an_index.index
56
+ an_index.cache
57
57
  end
58
58
  end
59
59
 
60
60
  # TODO Spec
61
61
  #
62
- def generate_index_only type_name, category_name
63
- found = find type_name, category_name
62
+ def generate_index_only index_name, category_name
63
+ found = find index_name, category_name
64
64
  found.index if found
65
65
  end
66
- def generate_cache_only type_name, category_name
67
- found = find type_name, category_name
66
+ def generate_cache_only index_name, category_name
67
+ found = find index_name, category_name
68
68
  found.generate_caches if found
69
69
  end
70
70
 
71
71
  # TODO Spec
72
72
  #
73
- def find type_name, category_name
74
- type_name = type_name.to_sym
73
+ def find index_name, category_name
74
+ index_name = index_name.to_sym
75
75
 
76
- types.each do |type|
77
- next unless type.name == type_name
76
+ indexes.each do |index|
77
+ next unless index.name == index_name
78
78
 
79
- found = type.categories.find category_name
79
+ found = index.categories.find category_name
80
80
  return found if found
81
81
  end
82
82
 
83
- raise %Q{Index "#{type_name}" not found. Possible indexes: "#{types.map(&:name).join('", "')}".}
83
+ raise %Q{Index "#{index_name}" not found. Possible indexes: "#{indexes.map(&:name).join('", "')}".}
84
84
  end
85
85
 
86
86
  end
data/lib/picky/loader.rb CHANGED
@@ -104,9 +104,9 @@ module Loader
104
104
  load_relative 'helpers/cache'
105
105
  load_relative 'helpers/measuring'
106
106
 
107
- # Character Substitution
107
+ # Character Substituters
108
108
  #
109
- load_relative 'character_substitution/european'
109
+ load_relative 'character_substituters/west_european'
110
110
 
111
111
  # Signal handling
112
112
  #
@@ -119,8 +119,7 @@ module Loader
119
119
  # Index generation strategies.
120
120
  #
121
121
  load_relative 'indexers/no_source_specified_error'
122
- load_relative 'indexers/base'
123
- load_relative 'indexers/default'
122
+ load_relative 'indexers/serial'
124
123
  #
125
124
  # load_relative 'indexers/solr'
126
125
 
@@ -132,7 +131,7 @@ module Loader
132
131
  #
133
132
  load_relative 'cacher/partial/strategy'
134
133
  load_relative 'cacher/partial/none'
135
- load_relative 'cacher/partial/subtoken'
134
+ load_relative 'cacher/partial/substring'
136
135
  load_relative 'cacher/partial/default'
137
136
 
138
137
  # Weight index generation strategies.
@@ -167,27 +166,27 @@ module Loader
167
166
  load_relative 'index/file/json'
168
167
  load_relative 'index/files'
169
168
 
170
- # Index types.
169
+ # Indexing and Indexed things.
171
170
  #
172
- load_relative 'bundle'
171
+ load_relative 'index/bundle'
173
172
 
174
173
  load_relative 'indexing/bundle'
175
174
  load_relative 'indexing/category'
176
175
  load_relative 'indexing/categories'
177
- load_relative 'indexing/type'
178
- load_relative 'indexing/types'
176
+ load_relative 'indexing/index'
177
+ load_relative 'indexing/indexes'
179
178
 
180
- load_relative 'index/bundle'
181
- load_relative 'index/category'
182
- load_relative 'index/categories'
183
- load_relative 'index/type'
184
- load_relative 'index/types'
179
+ load_relative 'indexed/bundle'
180
+ load_relative 'indexed/category'
181
+ load_relative 'indexed/categories'
182
+ load_relative 'indexed/index'
183
+ load_relative 'indexed/indexes'
185
184
 
186
- load_relative 'types'
185
+ load_relative 'indexes_api'
187
186
  load_relative 'alias_instances'
188
- load_relative 'type'
187
+ load_relative 'index_api'
189
188
 
190
- load_relative 'index/wrappers/exact_first'
189
+ load_relative 'indexed/wrappers/exact_first'
191
190
 
192
191
  # Tokens.
193
192
  #
@@ -240,11 +239,7 @@ module Loader
240
239
 
241
240
  # Configuration.
242
241
  #
243
- load_relative 'configuration/indexes'
244
-
245
- # ... in Application.
246
- #
247
- load_relative 'configuration/queries'
242
+ load_relative 'configuration/index'
248
243
 
249
244
  # Application and routing.
250
245
  #
@@ -19,7 +19,7 @@ module Query
19
19
  #
20
20
  def initialize *index_type_definitions
21
21
  options = Hash === index_type_definitions.last ? index_type_definitions.pop : {}
22
- indexes = index_type_definitions.map &:index
22
+ indexes = index_type_definitions.map &:indexed
23
23
 
24
24
  @weigher = options[:weigher] || Weigher.new(indexes)
25
25
  @tokenizer = options[:tokenizer] || Tokenizers::Query.default
@@ -4,8 +4,12 @@ module Rack
4
4
  #
5
5
  # Use as follows in e.g. your rackup File:
6
6
  #
7
- # Rack::Harakiri.after = 50
8
- # use Rack::Harakiri
7
+ # Rack::Harakiri.after = 100
8
+ # use Rack::Harakiri
9
+ #
10
+ # Then the Unicorn will commit suicide after 100 requests (50 is the default).
11
+ #
12
+ # The Master Unicorn process forks a new child Unicorn to replace the old one.
9
13
  #
10
14
  class Harakiri
11
15
 
@@ -21,6 +25,9 @@ module Rack
21
25
  @quit_after_requests = self.class.after || 50
22
26
  end
23
27
 
28
+ # Harakiri is a middleware, so it passes the call on after checking if it
29
+ # is time to honorably retire.
30
+ #
24
31
  def call env
25
32
  harakiri
26
33
  @app.call env
data/lib/picky/signals.rb CHANGED
@@ -1,7 +1,7 @@
1
1
  # TODO Cleanup and move to project_prototype.
2
2
  #
3
3
  # Signal.trap 'USR1' do
4
- # Indexes.reload
4
+ # Indexed.reload
5
5
  # end
6
6
  # Signal.trap 'USR2' do
7
7
  # Loader.reload
@@ -10,22 +10,9 @@ module Sources
10
10
  # * take_snapshot: Optional, called once for each type.
11
11
  class Base
12
12
 
13
- # Note: Methods listed for illustrative purposes.
13
+ # Note: Default methods do nothing.
14
14
  #
15
15
 
16
- # Called by the indexer when gathering data.
17
- #
18
- # Yields the data (id, text for id) for the given type and field.
19
- #
20
- # When implementing or overriding your own,
21
- # be sure to <tt>yield</tt> (or <tt>block.call</tt>) an id (as string or integer)
22
- # and a corresponding text for the given type symbol and
23
- # category symbol.
24
- #
25
- def harvest type, category
26
- # yields nothing
27
- end
28
-
29
16
  # Connect to the backend.
30
17
  #
31
18
  # Note: Called once per index/category combination
@@ -47,6 +34,19 @@ module Sources
47
34
 
48
35
  end
49
36
 
37
+ # Called by the indexer when gathering data.
38
+ #
39
+ # Yields the data (id, text for id) for the given type and category.
40
+ #
41
+ # When implementing or overriding your own,
42
+ # be sure to <tt>yield</tt> (or <tt>block.call</tt>) an id (as string or integer)
43
+ # and a corresponding text for the given type symbol and
44
+ # category symbol.
45
+ #
46
+ def harvest type, category
47
+ # yields nothing
48
+ end
49
+
50
50
  end
51
51
 
52
52
  end
@@ -8,9 +8,9 @@ module Sources
8
8
 
9
9
  class Couch < Base
10
10
 
11
- def initialize *field_names, options
11
+ def initialize *category_names, options
12
12
  check_gem
13
- Hash === options && options[:url] || raise_no_db_given(field_names)
13
+ Hash === options && options[:url] || raise_no_db_given(category_names)
14
14
  @db = RestClient::Resource.new options.delete(:url), options
15
15
  end
16
16
 
@@ -23,9 +23,10 @@ module Sources
23
23
 
24
24
  # Harvests the data to index.
25
25
  #
26
- def harvest type, field
26
+ def harvest type, category
27
+ category_name = category.name.to_s
27
28
  get_data do |doc|
28
- yield doc['_id'].to_i, doc[field.name.to_s] || next
29
+ yield doc['_id'].to_i, doc[category_name] || next
29
30
  end
30
31
  end
31
32
 
@@ -35,9 +36,9 @@ module Sources
35
36
  map{|row| row['doc']}.
36
37
  each &block
37
38
  end
38
-
39
- def raise_no_db_given field_names
40
- raise NoCouchDBGiven.new(field_names.join(', '))
39
+
40
+ def raise_no_db_given category_names
41
+ raise NoCouchDBGiven.new(category_names.join(', '))
41
42
  end
42
43
  end
43
44
  end
@@ -1,32 +1,32 @@
1
1
  module Sources
2
2
 
3
3
  # Describes a CSV source, a file with csv in it.
4
- # Give it a sequence of field names and a file option with the filename.
4
+ # Give it a sequence of category names and a file option with the filename.
5
5
  #
6
6
  class NoCSVFileGiven < StandardError; end
7
7
 
8
8
  class CSV < Base
9
9
 
10
- attr_reader :file_name, :field_names
10
+ attr_reader :file_name, :category_names
11
11
 
12
- def initialize *field_names, options
12
+ def initialize *category_names, options
13
13
  require 'csv'
14
- @field_names = field_names
15
- @file_name = Hash === options && options[:file] || raise_no_file_given(field_names)
14
+ @category_names = category_names
15
+ @file_name = Hash === options && options[:file] || raise_no_file_given(category_names)
16
16
  end
17
17
 
18
18
  #
19
19
  #
20
- def raise_no_file_given field_names
21
- raise NoCSVFileGiven.new(field_names.join(', '))
20
+ def raise_no_file_given category_names
21
+ raise NoCSVFileGiven.new(category_names.join(', '))
22
22
  end
23
23
 
24
24
  # Harvests the data to index.
25
25
  #
26
- def harvest _, field
27
- index = field_names.index field.name
26
+ def harvest _, category
27
+ index = category_names.index category.name
28
28
  get_data do |ary|
29
- indexed_id = ary.shift.to_i
29
+ indexed_id = ary.shift.to_i # TODO is to_i necessary?
30
30
  text = ary[index]
31
31
  next unless text
32
32
  text.force_encoding 'utf-8' # TODO Still needed?
@@ -93,11 +93,11 @@ module Sources
93
93
  # Example:
94
94
  # "SELECT indexed_id, value FROM bla_table st WHERE kind = 'bla'"
95
95
  #
96
- def harvest type, field
96
+ def harvest type, category
97
97
  connect_backend
98
98
 
99
99
  (0..count(type)).step(chunksize) do |offset|
100
- get_data(type, field, offset).each do |indexed_id, text|
100
+ get_data(type, category, offset).each do |indexed_id, text|
101
101
  next unless text
102
102
  text.force_encoding 'utf-8' # TODO Still needed?
103
103
  yield indexed_id, text
@@ -107,16 +107,16 @@ module Sources
107
107
 
108
108
  # Gets database from the backend.
109
109
  #
110
- def get_data type, field, offset
111
- database.connection.execute harvest_statement_with_offset(type, field, offset)
110
+ def get_data type, category, offset
111
+ database.connection.execute harvest_statement_with_offset(type, category, offset)
112
112
  end
113
113
 
114
114
  # Builds a harvest statement for getting data to index.
115
115
  #
116
116
  # TODO Use the adapter for this.
117
117
  #
118
- def harvest_statement_with_offset type, field, offset
119
- statement = harvest_statement type, field
118
+ def harvest_statement_with_offset type, category, offset
119
+ statement = harvest_statement type, category
120
120
 
121
121
  statement += statement.include?('WHERE') ? ' AND' : ' WHERE'
122
122
 
@@ -125,8 +125,8 @@ module Sources
125
125
 
126
126
  # Base harvest statement for dbs.
127
127
  #
128
- def harvest_statement type, field
129
- "SELECT indexed_id, #{field.name} FROM #{snapshot_table_name(type)} st"
128
+ def harvest_statement type, category
129
+ "SELECT indexed_id, #{category.name} FROM #{snapshot_table_name(type)} st"
130
130
  end
131
131
 
132
132
  # Override in subclasses.
@@ -16,10 +16,10 @@ module Sources
16
16
 
17
17
  # Harvests the data to index.
18
18
  #
19
- def harvest _, field
19
+ def harvest _, category
20
20
  get_data do |uid, data|
21
21
  indexed_id = uid
22
- text = data[field.name]
22
+ text = data[category.name]
23
23
  next unless text
24
24
  text.force_encoding 'utf-8' # TODO Still needed?
25
25
  yield indexed_id, text
@@ -38,9 +38,9 @@ module Sources
38
38
  @min = 1.0/0
39
39
  end
40
40
 
41
- # Yield the data (id, text for id) for the given type and field.
41
+ # Yield the data (id, text for id) for the given type and category.
42
42
  #
43
- def harvest type, field
43
+ def harvest type, category
44
44
  reset
45
45
 
46
46
  # Cache. TODO Make option?
@@ -49,7 +49,7 @@ module Sources
49
49
 
50
50
  # Gather min/max.
51
51
  #
52
- backend.harvest type, field do |indexed_id, location|
52
+ backend.harvest type, category do |indexed_id, location|
53
53
  location = location.to_f
54
54
  @min = location if location < @min
55
55
  locations << [indexed_id, location]
@@ -22,16 +22,6 @@ module Tokenizers
22
22
  remove_stopwords text
23
23
  end
24
24
 
25
- # Contraction.
26
- #
27
- def contracts_expressions what, to_what
28
- @contract_what = what
29
- @contract_to_what = to_what
30
- end
31
- def contract text
32
- text.gsub! @contract_what, @contract_to_what if @contract_what
33
- end
34
-
35
25
  # Illegals.
36
26
  #
37
27
  # TODO Should there be a legal?
@@ -83,7 +73,7 @@ module Tokenizers
83
73
  #
84
74
  # Default is European Character substitution.
85
75
  #
86
- def substitutes_characters_with substituter = CharacterSubstitution::European.new
76
+ def substitutes_characters_with substituter = CharacterSubstituters::WestEuropean.new
87
77
  # TODO Raise if it doesn't quack substitute?
88
78
  @substituter = substituter
89
79
  end
@@ -25,7 +25,6 @@ module Tokenizers
25
25
  text = substitute_characters text
26
26
  text.downcase!
27
27
  remove_illegals text
28
- contract text
29
28
  # we do not remove single stopwords for an entirely different
30
29
  # reason than in the query tokenizer.
31
30
  # An indexed thing with just name "UND" (a stopword) should not lose its name.
@@ -30,7 +30,6 @@ module Tokenizers
30
30
  def preprocess text
31
31
  remove_illegals text # Remove illegal characters
32
32
  remove_non_single_stopwords text # remove stop words
33
- contract text # contract st sankt etc
34
33
  text
35
34
  end
36
35
 
data/lib/tasks/index.rake CHANGED
@@ -17,10 +17,10 @@ namespace :index do
17
17
  end
18
18
 
19
19
  desc "Generates a specific index from index snapshots."
20
- task :specific, [:type, :field] => :application do |_, options|
21
- type, field = options.type, options.field
22
- Indexes.generate_index_only type.to_sym, field.to_sym
23
- Indexes.generate_cache_only type.to_sym, field.to_sym
20
+ task :specific, [:index, :category] => :application do |_, options|
21
+ index, category = options.index, options.category
22
+ Indexes.generate_index_only index.to_sym, category.to_sym
23
+ Indexes.generate_cache_only index.to_sym, category.to_sym
24
24
  end
25
25
 
26
26
  desc 'Checks the index files for files that are suspiciously small or missing.'
@@ -3,11 +3,11 @@ task :index => :application do
3
3
  Rake::Task[:'index:randomly'].invoke
4
4
  end
5
5
 
6
- desc "Try the given text in the indexer/query (type:field optional)."
7
- task :try, [:text, :type_and_field] => :application do |_, options|
8
- text, type_and_field = options.text, options.type_and_field
6
+ desc "Try the given text in the indexer/query (index:category optional)."
7
+ task :try, [:text, :index_and_category] => :application do |_, options|
8
+ text, index_and_category = options.text, options.index_and_category
9
9
 
10
- Rake::Task[:'try:both'].invoke text, type_and_field
10
+ Rake::Task[:'try:both'].invoke text, index_and_category
11
11
  end
12
12
 
13
13
  desc "Start the server."
data/lib/tasks/try.rake CHANGED
@@ -2,11 +2,11 @@
2
2
  #
3
3
  namespace :try do
4
4
 
5
- # desc "Try how a given word would be tokenized when indexing (type:field optional)."
6
- task :index, [:text, :type_and_field] => :application do |_, options|
7
- text, type_and_field = options.text, options.type_and_field
5
+ # desc "Try how a given word would be tokenized when indexing (type:category optional)."
6
+ task :index, [:text, :index_and_category] => :application do |_, options|
7
+ text, index_and_category = options.text, options.index_and_category
8
8
 
9
- tokenizer = type_and_field ? Indexes.find(*type_and_field.split(':')).tokenizer : Tokenizers::Index.default
9
+ tokenizer = index_and_category ? Indexes.find(*index_and_category.split(':')).tokenizer : Tokenizers::Index.default
10
10
 
11
11
  puts "\"#{text}\" is index tokenized as #{tokenizer.tokenize(text.dup).to_a}"
12
12
  end
@@ -18,11 +18,11 @@ namespace :try do
18
18
  puts "\"#{text}\" is query tokenized as #{Tokenizers::Query.default.tokenize(text.dup).to_a.map(&:to_s).map(&:to_sym)}"
19
19
  end
20
20
 
21
- # desc "Try the given text with both the index and the query (type:field optional)."
22
- task :both, [:text, :type_and_field] => :application do |_, options|
23
- text, type_and_field = options.text, options.type_and_field
21
+ # desc "Try the given text with both the index and the query (type:category optional)."
22
+ task :both, [:text, :index_and_category] => :application do |_, options|
23
+ text, index_and_category = options.text, options.index_and_category
24
24
 
25
- Rake::Task[:"try:index"].invoke text, type_and_field
25
+ Rake::Task[:"try:index"].invoke text, index_and_category
26
26
  Rake::Task[:"try:query"].invoke text
27
27
  end
28
28
 
@@ -2,7 +2,7 @@ source :gemcutter
2
2
 
3
3
  # Gems required by Picky.
4
4
  #
5
- gem 'picky', '~> 0.11.0'
5
+ gem 'picky', '~> 0.12.0'
6
6
  gem 'rake'
7
7
  gem 'bundler'
8
8
  gem 'rack', '~> 1.2.1'
@@ -9,32 +9,33 @@
9
9
  class PickySearch < Application
10
10
 
11
11
  # Indexing: How text is indexed.
12
- # Querying: How query text is handled.
13
12
  #
14
13
  default_indexing removes_characters: /[^a-zA-Z0-9\s\/\-\"\&\.]/,
15
14
  stopwords: /\b(and|the|of|it|in|for)\b/,
16
15
  splits_text_on: /[\s\/\-\"\&\.]/
17
16
 
17
+ # Querying: How query text is handled.
18
+ #
18
19
  default_querying removes_characters: /[^a-zA-Z0-9\s\/\-\,\&\"\~\*\:]/, # Picky needs control chars *"~: to pass through.
19
20
  stopwords: /\b(and|the|of|it|in|for)\b/,
20
21
  splits_text_on: /[\s\/\-\,\&]+/,
21
22
 
22
- maximum_tokens: 5, # Max amount of tokens passing into a query. 5 is the default.
23
- substitutes_characters_with: CharacterSubstitution::European.new # Normalizes special user input, Ä -> Ae, ñ -> n etc.
23
+ maximum_tokens: 5, # Amount of tokens passing into a query (5 = default).
24
+ substitutes_characters_with: CharacterSubstituters::WestEuropean.new # Normalizes special user input, Ä -> Ae, ñ -> n etc.
24
25
 
25
26
  # Define an index. Use a database etc. source?
26
27
  # See http://github.com/floere/picky/wiki/Sources-Configuration#sources
27
28
  #
28
29
  books_index = index :books, Sources::CSV.new(:title, :author, :isbn, file: 'app/library.csv')
29
- books_index.category :title,
30
- similarity: Similarity::Phonetic.new(3), # Up to three similar title word indexed (default: No similarity).
31
- partial: Partial::Substring.new(from: 1) # Indexes substrings upwards from character 1 (default: -3),
32
- # You'll find "picky" even when entering just a "p".
33
- books_index.category :author,
34
- partial: Partial::Substring.new(from: 1)
35
- books_index.category :isbn,
36
- partial: Partial::None.new # Partial substring searching on an ISBN does not make
37
- # much sense, neither does similarity.
30
+ books_index.define_category :title,
31
+ similarity: Similarity::Phonetic.new(3), # Up to three similar title word indexed (default: No similarity).
32
+ partial: Partial::Substring.new(from: 1) # Indexes substrings upwards from character 1 (default: -3),
33
+ # You'll find "picky" even when entering just a "p".
34
+ books_index.define_category :author,
35
+ partial: Partial::Substring.new(from: 1)
36
+ books_index.define_category :isbn,
37
+ partial: Partial::None.new # Partial substring searching on an ISBN does not make
38
+ # much sense, neither does similarity.
38
39
 
39
40
  query_options = { :weights => { [:title, :author] => +3, [:title] => +1 } } # +/- points for ordered combinations.
40
41