picky 0.11.2 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. data/lib/picky/Index_api.rb +49 -0
  2. data/lib/picky/alias_instances.rb +4 -1
  3. data/lib/picky/application.rb +16 -15
  4. data/lib/picky/cacher/partial/{subtoken.rb → substring.rb} +19 -18
  5. data/lib/picky/{character_substitution/european.rb → character_substituters/west_european.rb} +2 -2
  6. data/lib/picky/configuration/index.rb +67 -0
  7. data/lib/picky/cores.rb +3 -0
  8. data/lib/picky/index/bundle.rb +35 -51
  9. data/lib/picky/index/file/basic.rb +39 -5
  10. data/lib/picky/index/file/json.rb +10 -0
  11. data/lib/picky/index/file/marshal.rb +10 -0
  12. data/lib/picky/index/file/text.rb +22 -0
  13. data/lib/picky/index/files.rb +11 -36
  14. data/lib/picky/indexed/bundle.rb +61 -0
  15. data/lib/picky/{index → indexed}/categories.rb +1 -1
  16. data/lib/picky/{index → indexed}/category.rb +13 -16
  17. data/lib/picky/{index/type.rb → indexed/index.rb} +6 -6
  18. data/lib/picky/{index/types.rb → indexed/indexes.rb} +10 -10
  19. data/lib/picky/{index → indexed}/wrappers/exact_first.rb +8 -8
  20. data/lib/picky/indexers/no_source_specified_error.rb +1 -1
  21. data/lib/picky/indexers/serial.rb +64 -0
  22. data/lib/picky/indexers/solr.rb +1 -3
  23. data/lib/picky/indexes_api.rb +41 -0
  24. data/lib/picky/indexing/bundle.rb +43 -13
  25. data/lib/picky/indexing/category.rb +17 -64
  26. data/lib/picky/indexing/{type.rb → index.rb} +13 -3
  27. data/lib/picky/indexing/{types.rb → indexes.rb} +22 -22
  28. data/lib/picky/loader.rb +17 -22
  29. data/lib/picky/query/base.rb +1 -1
  30. data/lib/picky/rack/harakiri.rb +9 -2
  31. data/lib/picky/signals.rb +1 -1
  32. data/lib/picky/sources/base.rb +14 -14
  33. data/lib/picky/sources/couch.rb +8 -7
  34. data/lib/picky/sources/csv.rb +10 -10
  35. data/lib/picky/sources/db.rb +8 -8
  36. data/lib/picky/sources/delicious.rb +2 -2
  37. data/lib/picky/sources/wrappers/location.rb +3 -3
  38. data/lib/picky/tokenizers/base.rb +1 -11
  39. data/lib/picky/tokenizers/index.rb +0 -1
  40. data/lib/picky/tokenizers/query.rb +0 -1
  41. data/lib/tasks/index.rake +4 -4
  42. data/lib/tasks/shortcuts.rake +4 -4
  43. data/lib/tasks/try.rake +8 -8
  44. data/project_prototype/Gemfile +1 -1
  45. data/project_prototype/app/application.rb +13 -12
  46. data/spec/lib/application_spec.rb +10 -38
  47. data/spec/lib/cacher/partial/{subtoken_spec.rb → substring_spec.rb} +0 -0
  48. data/spec/lib/{character_substitution/european_spec.rb → character_substituters/west_european_spec.rb} +6 -2
  49. data/spec/lib/configuration/index_spec.rb +80 -0
  50. data/spec/lib/cores_spec.rb +1 -1
  51. data/spec/lib/index/file/text_spec.rb +1 -1
  52. data/spec/lib/index/files_spec.rb +12 -32
  53. data/spec/lib/indexed/bundle_spec.rb +119 -0
  54. data/spec/lib/{indexing → indexed}/categories_spec.rb +13 -14
  55. data/spec/lib/{index → indexed}/category_spec.rb +6 -6
  56. data/spec/lib/{index/type_spec.rb → indexed/index_spec.rb} +3 -3
  57. data/spec/lib/{index → indexed}/wrappers/exact_first_spec.rb +5 -5
  58. data/spec/lib/indexers/serial_spec.rb +62 -0
  59. data/spec/lib/indexing/bundle_partial_generation_speed_spec.rb +7 -5
  60. data/spec/lib/indexing/bundle_spec.rb +9 -14
  61. data/spec/lib/indexing/category_spec.rb +9 -125
  62. data/spec/lib/indexing/{type_spec.rb → index_spec.rb} +3 -3
  63. data/spec/lib/query/base_spec.rb +1 -1
  64. data/spec/lib/query/full_spec.rb +1 -1
  65. data/spec/lib/query/live_spec.rb +2 -4
  66. data/spec/lib/sources/couch_spec.rb +5 -5
  67. data/spec/lib/sources/db_spec.rb +6 -7
  68. data/spec/lib/tokenizers/base_spec.rb +1 -24
  69. data/spec/lib/tokenizers/query_spec.rb +0 -1
  70. metadata +38 -41
  71. data/lib/picky/bundle.rb +0 -33
  72. data/lib/picky/configuration/indexes.rb +0 -51
  73. data/lib/picky/configuration/queries.rb +0 -15
  74. data/lib/picky/indexers/base.rb +0 -85
  75. data/lib/picky/indexers/default.rb +0 -3
  76. data/lib/picky/type.rb +0 -46
  77. data/lib/picky/types.rb +0 -41
  78. data/lib/tasks/cache.rake +0 -46
  79. data/spec/lib/configuration/indexes_spec.rb +0 -28
  80. data/spec/lib/index/bundle_spec.rb +0 -151
  81. data/spec/lib/indexers/base_spec.rb +0 -89
@@ -1,8 +1,8 @@
1
1
  module Indexing
2
2
 
3
- class Types
3
+ class Indexes
4
4
 
5
- attr_reader :types
5
+ attr_reader :indexes
6
6
 
7
7
  each_delegate :take_snapshot,
8
8
  :generate_caches,
@@ -11,7 +11,7 @@ module Indexing
11
11
  :check_caches,
12
12
  :clear_caches,
13
13
  :create_directory_structure,
14
- :to => :types
14
+ :to => :indexes
15
15
 
16
16
  def initialize
17
17
  clear
@@ -20,13 +20,13 @@ module Indexing
20
20
  # TODO Spec.
21
21
  #
22
22
  def clear
23
- @types = []
23
+ @indexes = []
24
24
  end
25
25
 
26
26
  # TODO Spec. Superclass?
27
27
  #
28
- def register type
29
- self.types << type
28
+ def register index
29
+ self.indexes << index
30
30
  end
31
31
 
32
32
  # Runs the indexers in parallel (index + cache).
@@ -39,9 +39,9 @@ module Indexing
39
39
  # Run in parallel.
40
40
  #
41
41
  timed_exclaim "INDEXING USING #{Cores.max_processors} PROCESSORS, IN #{randomly ? 'RANDOM' : 'GIVEN'} ORDER."
42
- Cores.forked self.types, { randomly: randomly } do |type|
43
- type.index
44
- type.cache
42
+ Cores.forked self.indexes, { randomly: randomly } do |an_index|
43
+ an_index.index
44
+ an_index.cache
45
45
  end
46
46
  timed_exclaim "INDEXING FINISHED."
47
47
  end
@@ -51,36 +51,36 @@ module Indexing
51
51
  def index_for_tests
52
52
  take_snapshot
53
53
 
54
- self.types.each do |type|
55
- type.index
56
- type.cache
54
+ self.indexes.each do |an_index|
55
+ an_index.index
56
+ an_index.cache
57
57
  end
58
58
  end
59
59
 
60
60
  # TODO Spec
61
61
  #
62
- def generate_index_only type_name, category_name
63
- found = find type_name, category_name
62
+ def generate_index_only index_name, category_name
63
+ found = find index_name, category_name
64
64
  found.index if found
65
65
  end
66
- def generate_cache_only type_name, category_name
67
- found = find type_name, category_name
66
+ def generate_cache_only index_name, category_name
67
+ found = find index_name, category_name
68
68
  found.generate_caches if found
69
69
  end
70
70
 
71
71
  # TODO Spec
72
72
  #
73
- def find type_name, category_name
74
- type_name = type_name.to_sym
73
+ def find index_name, category_name
74
+ index_name = index_name.to_sym
75
75
 
76
- types.each do |type|
77
- next unless type.name == type_name
76
+ indexes.each do |index|
77
+ next unless index.name == index_name
78
78
 
79
- found = type.categories.find category_name
79
+ found = index.categories.find category_name
80
80
  return found if found
81
81
  end
82
82
 
83
- raise %Q{Index "#{type_name}" not found. Possible indexes: "#{types.map(&:name).join('", "')}".}
83
+ raise %Q{Index "#{index_name}" not found. Possible indexes: "#{indexes.map(&:name).join('", "')}".}
84
84
  end
85
85
 
86
86
  end
data/lib/picky/loader.rb CHANGED
@@ -104,9 +104,9 @@ module Loader
104
104
  load_relative 'helpers/cache'
105
105
  load_relative 'helpers/measuring'
106
106
 
107
- # Character Substitution
107
+ # Character Substituters
108
108
  #
109
- load_relative 'character_substitution/european'
109
+ load_relative 'character_substituters/west_european'
110
110
 
111
111
  # Signal handling
112
112
  #
@@ -119,8 +119,7 @@ module Loader
119
119
  # Index generation strategies.
120
120
  #
121
121
  load_relative 'indexers/no_source_specified_error'
122
- load_relative 'indexers/base'
123
- load_relative 'indexers/default'
122
+ load_relative 'indexers/serial'
124
123
  #
125
124
  # load_relative 'indexers/solr'
126
125
 
@@ -132,7 +131,7 @@ module Loader
132
131
  #
133
132
  load_relative 'cacher/partial/strategy'
134
133
  load_relative 'cacher/partial/none'
135
- load_relative 'cacher/partial/subtoken'
134
+ load_relative 'cacher/partial/substring'
136
135
  load_relative 'cacher/partial/default'
137
136
 
138
137
  # Weight index generation strategies.
@@ -167,27 +166,27 @@ module Loader
167
166
  load_relative 'index/file/json'
168
167
  load_relative 'index/files'
169
168
 
170
- # Index types.
169
+ # Indexing and Indexed things.
171
170
  #
172
- load_relative 'bundle'
171
+ load_relative 'index/bundle'
173
172
 
174
173
  load_relative 'indexing/bundle'
175
174
  load_relative 'indexing/category'
176
175
  load_relative 'indexing/categories'
177
- load_relative 'indexing/type'
178
- load_relative 'indexing/types'
176
+ load_relative 'indexing/index'
177
+ load_relative 'indexing/indexes'
179
178
 
180
- load_relative 'index/bundle'
181
- load_relative 'index/category'
182
- load_relative 'index/categories'
183
- load_relative 'index/type'
184
- load_relative 'index/types'
179
+ load_relative 'indexed/bundle'
180
+ load_relative 'indexed/category'
181
+ load_relative 'indexed/categories'
182
+ load_relative 'indexed/index'
183
+ load_relative 'indexed/indexes'
185
184
 
186
- load_relative 'types'
185
+ load_relative 'indexes_api'
187
186
  load_relative 'alias_instances'
188
- load_relative 'type'
187
+ load_relative 'index_api'
189
188
 
190
- load_relative 'index/wrappers/exact_first'
189
+ load_relative 'indexed/wrappers/exact_first'
191
190
 
192
191
  # Tokens.
193
192
  #
@@ -240,11 +239,7 @@ module Loader
240
239
 
241
240
  # Configuration.
242
241
  #
243
- load_relative 'configuration/indexes'
244
-
245
- # ... in Application.
246
- #
247
- load_relative 'configuration/queries'
242
+ load_relative 'configuration/index'
248
243
 
249
244
  # Application and routing.
250
245
  #
@@ -19,7 +19,7 @@ module Query
19
19
  #
20
20
  def initialize *index_type_definitions
21
21
  options = Hash === index_type_definitions.last ? index_type_definitions.pop : {}
22
- indexes = index_type_definitions.map &:index
22
+ indexes = index_type_definitions.map &:indexed
23
23
 
24
24
  @weigher = options[:weigher] || Weigher.new(indexes)
25
25
  @tokenizer = options[:tokenizer] || Tokenizers::Query.default
@@ -4,8 +4,12 @@ module Rack
4
4
  #
5
5
  # Use as follows in e.g. your rackup File:
6
6
  #
7
- # Rack::Harakiri.after = 50
8
- # use Rack::Harakiri
7
+ # Rack::Harakiri.after = 100
8
+ # use Rack::Harakiri
9
+ #
10
+ # Then the Unicorn will commit suicide after 100 requests (50 is the default).
11
+ #
12
+ # The Master Unicorn process forks a new child Unicorn to replace the old one.
9
13
  #
10
14
  class Harakiri
11
15
 
@@ -21,6 +25,9 @@ module Rack
21
25
  @quit_after_requests = self.class.after || 50
22
26
  end
23
27
 
28
+ # Harakiri is a middleware, so it passes the call on after checking if it
29
+ # is time to honorably retire.
30
+ #
24
31
  def call env
25
32
  harakiri
26
33
  @app.call env
data/lib/picky/signals.rb CHANGED
@@ -1,7 +1,7 @@
1
1
  # TODO Cleanup and move to project_prototype.
2
2
  #
3
3
  # Signal.trap 'USR1' do
4
- # Indexes.reload
4
+ # Indexed.reload
5
5
  # end
6
6
  # Signal.trap 'USR2' do
7
7
  # Loader.reload
@@ -10,22 +10,9 @@ module Sources
10
10
  # * take_snapshot: Optional, called once for each type.
11
11
  class Base
12
12
 
13
- # Note: Methods listed for illustrative purposes.
13
+ # Note: Default methods do nothing.
14
14
  #
15
15
 
16
- # Called by the indexer when gathering data.
17
- #
18
- # Yields the data (id, text for id) for the given type and field.
19
- #
20
- # When implementing or overriding your own,
21
- # be sure to <tt>yield</tt> (or <tt>block.call</tt>) an id (as string or integer)
22
- # and a corresponding text for the given type symbol and
23
- # category symbol.
24
- #
25
- def harvest type, category
26
- # yields nothing
27
- end
28
-
29
16
  # Connect to the backend.
30
17
  #
31
18
  # Note: Called once per index/category combination
@@ -47,6 +34,19 @@ module Sources
47
34
 
48
35
  end
49
36
 
37
+ # Called by the indexer when gathering data.
38
+ #
39
+ # Yields the data (id, text for id) for the given type and category.
40
+ #
41
+ # When implementing or overriding your own,
42
+ # be sure to <tt>yield</tt> (or <tt>block.call</tt>) an id (as string or integer)
43
+ # and a corresponding text for the given type symbol and
44
+ # category symbol.
45
+ #
46
+ def harvest type, category
47
+ # yields nothing
48
+ end
49
+
50
50
  end
51
51
 
52
52
  end
@@ -8,9 +8,9 @@ module Sources
8
8
 
9
9
  class Couch < Base
10
10
 
11
- def initialize *field_names, options
11
+ def initialize *category_names, options
12
12
  check_gem
13
- Hash === options && options[:url] || raise_no_db_given(field_names)
13
+ Hash === options && options[:url] || raise_no_db_given(category_names)
14
14
  @db = RestClient::Resource.new options.delete(:url), options
15
15
  end
16
16
 
@@ -23,9 +23,10 @@ module Sources
23
23
 
24
24
  # Harvests the data to index.
25
25
  #
26
- def harvest type, field
26
+ def harvest type, category
27
+ category_name = category.name.to_s
27
28
  get_data do |doc|
28
- yield doc['_id'].to_i, doc[field.name.to_s] || next
29
+ yield doc['_id'].to_i, doc[category_name] || next
29
30
  end
30
31
  end
31
32
 
@@ -35,9 +36,9 @@ module Sources
35
36
  map{|row| row['doc']}.
36
37
  each &block
37
38
  end
38
-
39
- def raise_no_db_given field_names
40
- raise NoCouchDBGiven.new(field_names.join(', '))
39
+
40
+ def raise_no_db_given category_names
41
+ raise NoCouchDBGiven.new(category_names.join(', '))
41
42
  end
42
43
  end
43
44
  end
@@ -1,32 +1,32 @@
1
1
  module Sources
2
2
 
3
3
  # Describes a CSV source, a file with csv in it.
4
- # Give it a sequence of field names and a file option with the filename.
4
+ # Give it a sequence of category names and a file option with the filename.
5
5
  #
6
6
  class NoCSVFileGiven < StandardError; end
7
7
 
8
8
  class CSV < Base
9
9
 
10
- attr_reader :file_name, :field_names
10
+ attr_reader :file_name, :category_names
11
11
 
12
- def initialize *field_names, options
12
+ def initialize *category_names, options
13
13
  require 'csv'
14
- @field_names = field_names
15
- @file_name = Hash === options && options[:file] || raise_no_file_given(field_names)
14
+ @category_names = category_names
15
+ @file_name = Hash === options && options[:file] || raise_no_file_given(category_names)
16
16
  end
17
17
 
18
18
  #
19
19
  #
20
- def raise_no_file_given field_names
21
- raise NoCSVFileGiven.new(field_names.join(', '))
20
+ def raise_no_file_given category_names
21
+ raise NoCSVFileGiven.new(category_names.join(', '))
22
22
  end
23
23
 
24
24
  # Harvests the data to index.
25
25
  #
26
- def harvest _, field
27
- index = field_names.index field.name
26
+ def harvest _, category
27
+ index = category_names.index category.name
28
28
  get_data do |ary|
29
- indexed_id = ary.shift.to_i
29
+ indexed_id = ary.shift.to_i # TODO is to_i necessary?
30
30
  text = ary[index]
31
31
  next unless text
32
32
  text.force_encoding 'utf-8' # TODO Still needed?
@@ -93,11 +93,11 @@ module Sources
93
93
  # Example:
94
94
  # "SELECT indexed_id, value FROM bla_table st WHERE kind = 'bla'"
95
95
  #
96
- def harvest type, field
96
+ def harvest type, category
97
97
  connect_backend
98
98
 
99
99
  (0..count(type)).step(chunksize) do |offset|
100
- get_data(type, field, offset).each do |indexed_id, text|
100
+ get_data(type, category, offset).each do |indexed_id, text|
101
101
  next unless text
102
102
  text.force_encoding 'utf-8' # TODO Still needed?
103
103
  yield indexed_id, text
@@ -107,16 +107,16 @@ module Sources
107
107
 
108
108
  # Gets database from the backend.
109
109
  #
110
- def get_data type, field, offset
111
- database.connection.execute harvest_statement_with_offset(type, field, offset)
110
+ def get_data type, category, offset
111
+ database.connection.execute harvest_statement_with_offset(type, category, offset)
112
112
  end
113
113
 
114
114
  # Builds a harvest statement for getting data to index.
115
115
  #
116
116
  # TODO Use the adapter for this.
117
117
  #
118
- def harvest_statement_with_offset type, field, offset
119
- statement = harvest_statement type, field
118
+ def harvest_statement_with_offset type, category, offset
119
+ statement = harvest_statement type, category
120
120
 
121
121
  statement += statement.include?('WHERE') ? ' AND' : ' WHERE'
122
122
 
@@ -125,8 +125,8 @@ module Sources
125
125
 
126
126
  # Base harvest statement for dbs.
127
127
  #
128
- def harvest_statement type, field
129
- "SELECT indexed_id, #{field.name} FROM #{snapshot_table_name(type)} st"
128
+ def harvest_statement type, category
129
+ "SELECT indexed_id, #{category.name} FROM #{snapshot_table_name(type)} st"
130
130
  end
131
131
 
132
132
  # Override in subclasses.
@@ -16,10 +16,10 @@ module Sources
16
16
 
17
17
  # Harvests the data to index.
18
18
  #
19
- def harvest _, field
19
+ def harvest _, category
20
20
  get_data do |uid, data|
21
21
  indexed_id = uid
22
- text = data[field.name]
22
+ text = data[category.name]
23
23
  next unless text
24
24
  text.force_encoding 'utf-8' # TODO Still needed?
25
25
  yield indexed_id, text
@@ -38,9 +38,9 @@ module Sources
38
38
  @min = 1.0/0
39
39
  end
40
40
 
41
- # Yield the data (id, text for id) for the given type and field.
41
+ # Yield the data (id, text for id) for the given type and category.
42
42
  #
43
- def harvest type, field
43
+ def harvest type, category
44
44
  reset
45
45
 
46
46
  # Cache. TODO Make option?
@@ -49,7 +49,7 @@ module Sources
49
49
 
50
50
  # Gather min/max.
51
51
  #
52
- backend.harvest type, field do |indexed_id, location|
52
+ backend.harvest type, category do |indexed_id, location|
53
53
  location = location.to_f
54
54
  @min = location if location < @min
55
55
  locations << [indexed_id, location]
@@ -22,16 +22,6 @@ module Tokenizers
22
22
  remove_stopwords text
23
23
  end
24
24
 
25
- # Contraction.
26
- #
27
- def contracts_expressions what, to_what
28
- @contract_what = what
29
- @contract_to_what = to_what
30
- end
31
- def contract text
32
- text.gsub! @contract_what, @contract_to_what if @contract_what
33
- end
34
-
35
25
  # Illegals.
36
26
  #
37
27
  # TODO Should there be a legal?
@@ -83,7 +73,7 @@ module Tokenizers
83
73
  #
84
74
  # Default is European Character substitution.
85
75
  #
86
- def substitutes_characters_with substituter = CharacterSubstitution::European.new
76
+ def substitutes_characters_with substituter = CharacterSubstituters::WestEuropean.new
87
77
  # TODO Raise if it doesn't quack substitute?
88
78
  @substituter = substituter
89
79
  end
@@ -25,7 +25,6 @@ module Tokenizers
25
25
  text = substitute_characters text
26
26
  text.downcase!
27
27
  remove_illegals text
28
- contract text
29
28
  # we do not remove single stopwords for an entirely different
30
29
  # reason than in the query tokenizer.
31
30
  # An indexed thing with just name "UND" (a stopword) should not lose its name.
@@ -30,7 +30,6 @@ module Tokenizers
30
30
  def preprocess text
31
31
  remove_illegals text # Remove illegal characters
32
32
  remove_non_single_stopwords text # remove stop words
33
- contract text # contract st sankt etc
34
33
  text
35
34
  end
36
35
 
data/lib/tasks/index.rake CHANGED
@@ -17,10 +17,10 @@ namespace :index do
17
17
  end
18
18
 
19
19
  desc "Generates a specific index from index snapshots."
20
- task :specific, [:type, :field] => :application do |_, options|
21
- type, field = options.type, options.field
22
- Indexes.generate_index_only type.to_sym, field.to_sym
23
- Indexes.generate_cache_only type.to_sym, field.to_sym
20
+ task :specific, [:index, :category] => :application do |_, options|
21
+ index, category = options.index, options.category
22
+ Indexes.generate_index_only index.to_sym, category.to_sym
23
+ Indexes.generate_cache_only index.to_sym, category.to_sym
24
24
  end
25
25
 
26
26
  desc 'Checks the index files for files that are suspiciously small or missing.'
@@ -3,11 +3,11 @@ task :index => :application do
3
3
  Rake::Task[:'index:randomly'].invoke
4
4
  end
5
5
 
6
- desc "Try the given text in the indexer/query (type:field optional)."
7
- task :try, [:text, :type_and_field] => :application do |_, options|
8
- text, type_and_field = options.text, options.type_and_field
6
+ desc "Try the given text in the indexer/query (index:category optional)."
7
+ task :try, [:text, :index_and_category] => :application do |_, options|
8
+ text, index_and_category = options.text, options.index_and_category
9
9
 
10
- Rake::Task[:'try:both'].invoke text, type_and_field
10
+ Rake::Task[:'try:both'].invoke text, index_and_category
11
11
  end
12
12
 
13
13
  desc "Start the server."
data/lib/tasks/try.rake CHANGED
@@ -2,11 +2,11 @@
2
2
  #
3
3
  namespace :try do
4
4
 
5
- # desc "Try how a given word would be tokenized when indexing (type:field optional)."
6
- task :index, [:text, :type_and_field] => :application do |_, options|
7
- text, type_and_field = options.text, options.type_and_field
5
+ # desc "Try how a given word would be tokenized when indexing (type:category optional)."
6
+ task :index, [:text, :index_and_category] => :application do |_, options|
7
+ text, index_and_category = options.text, options.index_and_category
8
8
 
9
- tokenizer = type_and_field ? Indexes.find(*type_and_field.split(':')).tokenizer : Tokenizers::Index.default
9
+ tokenizer = index_and_category ? Indexes.find(*index_and_category.split(':')).tokenizer : Tokenizers::Index.default
10
10
 
11
11
  puts "\"#{text}\" is index tokenized as #{tokenizer.tokenize(text.dup).to_a}"
12
12
  end
@@ -18,11 +18,11 @@ namespace :try do
18
18
  puts "\"#{text}\" is query tokenized as #{Tokenizers::Query.default.tokenize(text.dup).to_a.map(&:to_s).map(&:to_sym)}"
19
19
  end
20
20
 
21
- # desc "Try the given text with both the index and the query (type:field optional)."
22
- task :both, [:text, :type_and_field] => :application do |_, options|
23
- text, type_and_field = options.text, options.type_and_field
21
+ # desc "Try the given text with both the index and the query (type:category optional)."
22
+ task :both, [:text, :index_and_category] => :application do |_, options|
23
+ text, index_and_category = options.text, options.index_and_category
24
24
 
25
- Rake::Task[:"try:index"].invoke text, type_and_field
25
+ Rake::Task[:"try:index"].invoke text, index_and_category
26
26
  Rake::Task[:"try:query"].invoke text
27
27
  end
28
28
 
@@ -2,7 +2,7 @@ source :gemcutter
2
2
 
3
3
  # Gems required by Picky.
4
4
  #
5
- gem 'picky', '~> 0.11.0'
5
+ gem 'picky', '~> 0.12.0'
6
6
  gem 'rake'
7
7
  gem 'bundler'
8
8
  gem 'rack', '~> 1.2.1'
@@ -9,32 +9,33 @@
9
9
  class PickySearch < Application
10
10
 
11
11
  # Indexing: How text is indexed.
12
- # Querying: How query text is handled.
13
12
  #
14
13
  default_indexing removes_characters: /[^a-zA-Z0-9\s\/\-\"\&\.]/,
15
14
  stopwords: /\b(and|the|of|it|in|for)\b/,
16
15
  splits_text_on: /[\s\/\-\"\&\.]/
17
16
 
17
+ # Querying: How query text is handled.
18
+ #
18
19
  default_querying removes_characters: /[^a-zA-Z0-9\s\/\-\,\&\"\~\*\:]/, # Picky needs control chars *"~: to pass through.
19
20
  stopwords: /\b(and|the|of|it|in|for)\b/,
20
21
  splits_text_on: /[\s\/\-\,\&]+/,
21
22
 
22
- maximum_tokens: 5, # Max amount of tokens passing into a query. 5 is the default.
23
- substitutes_characters_with: CharacterSubstitution::European.new # Normalizes special user input, Ä -> Ae, ñ -> n etc.
23
+ maximum_tokens: 5, # Amount of tokens passing into a query (5 = default).
24
+ substitutes_characters_with: CharacterSubstituters::WestEuropean.new # Normalizes special user input, Ä -> Ae, ñ -> n etc.
24
25
 
25
26
  # Define an index. Use a database etc. source?
26
27
  # See http://github.com/floere/picky/wiki/Sources-Configuration#sources
27
28
  #
28
29
  books_index = index :books, Sources::CSV.new(:title, :author, :isbn, file: 'app/library.csv')
29
- books_index.category :title,
30
- similarity: Similarity::Phonetic.new(3), # Up to three similar title word indexed (default: No similarity).
31
- partial: Partial::Substring.new(from: 1) # Indexes substrings upwards from character 1 (default: -3),
32
- # You'll find "picky" even when entering just a "p".
33
- books_index.category :author,
34
- partial: Partial::Substring.new(from: 1)
35
- books_index.category :isbn,
36
- partial: Partial::None.new # Partial substring searching on an ISBN does not make
37
- # much sense, neither does similarity.
30
+ books_index.define_category :title,
31
+ similarity: Similarity::Phonetic.new(3), # Up to three similar title word indexed (default: No similarity).
32
+ partial: Partial::Substring.new(from: 1) # Indexes substrings upwards from character 1 (default: -3),
33
+ # You'll find "picky" even when entering just a "p".
34
+ books_index.define_category :author,
35
+ partial: Partial::Substring.new(from: 1)
36
+ books_index.define_category :isbn,
37
+ partial: Partial::None.new # Partial substring searching on an ISBN does not make
38
+ # much sense, neither does similarity.
38
39
 
39
40
  query_options = { :weights => { [:title, :author] => +3, [:title] => +1 } } # +/- points for ordered combinations.
40
41