picky 0.11.2 → 0.12.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (81) hide show
  1. data/lib/picky/Index_api.rb +49 -0
  2. data/lib/picky/alias_instances.rb +4 -1
  3. data/lib/picky/application.rb +16 -15
  4. data/lib/picky/cacher/partial/{subtoken.rb → substring.rb} +19 -18
  5. data/lib/picky/{character_substitution/european.rb → character_substituters/west_european.rb} +2 -2
  6. data/lib/picky/configuration/index.rb +67 -0
  7. data/lib/picky/cores.rb +3 -0
  8. data/lib/picky/index/bundle.rb +35 -51
  9. data/lib/picky/index/file/basic.rb +39 -5
  10. data/lib/picky/index/file/json.rb +10 -0
  11. data/lib/picky/index/file/marshal.rb +10 -0
  12. data/lib/picky/index/file/text.rb +22 -0
  13. data/lib/picky/index/files.rb +11 -36
  14. data/lib/picky/indexed/bundle.rb +61 -0
  15. data/lib/picky/{index → indexed}/categories.rb +1 -1
  16. data/lib/picky/{index → indexed}/category.rb +13 -16
  17. data/lib/picky/{index/type.rb → indexed/index.rb} +6 -6
  18. data/lib/picky/{index/types.rb → indexed/indexes.rb} +10 -10
  19. data/lib/picky/{index → indexed}/wrappers/exact_first.rb +8 -8
  20. data/lib/picky/indexers/no_source_specified_error.rb +1 -1
  21. data/lib/picky/indexers/serial.rb +64 -0
  22. data/lib/picky/indexers/solr.rb +1 -3
  23. data/lib/picky/indexes_api.rb +41 -0
  24. data/lib/picky/indexing/bundle.rb +43 -13
  25. data/lib/picky/indexing/category.rb +17 -64
  26. data/lib/picky/indexing/{type.rb → index.rb} +13 -3
  27. data/lib/picky/indexing/{types.rb → indexes.rb} +22 -22
  28. data/lib/picky/loader.rb +17 -22
  29. data/lib/picky/query/base.rb +1 -1
  30. data/lib/picky/rack/harakiri.rb +9 -2
  31. data/lib/picky/signals.rb +1 -1
  32. data/lib/picky/sources/base.rb +14 -14
  33. data/lib/picky/sources/couch.rb +8 -7
  34. data/lib/picky/sources/csv.rb +10 -10
  35. data/lib/picky/sources/db.rb +8 -8
  36. data/lib/picky/sources/delicious.rb +2 -2
  37. data/lib/picky/sources/wrappers/location.rb +3 -3
  38. data/lib/picky/tokenizers/base.rb +1 -11
  39. data/lib/picky/tokenizers/index.rb +0 -1
  40. data/lib/picky/tokenizers/query.rb +0 -1
  41. data/lib/tasks/index.rake +4 -4
  42. data/lib/tasks/shortcuts.rake +4 -4
  43. data/lib/tasks/try.rake +8 -8
  44. data/project_prototype/Gemfile +1 -1
  45. data/project_prototype/app/application.rb +13 -12
  46. data/spec/lib/application_spec.rb +10 -38
  47. data/spec/lib/cacher/partial/{subtoken_spec.rb → substring_spec.rb} +0 -0
  48. data/spec/lib/{character_substitution/european_spec.rb → character_substituters/west_european_spec.rb} +6 -2
  49. data/spec/lib/configuration/index_spec.rb +80 -0
  50. data/spec/lib/cores_spec.rb +1 -1
  51. data/spec/lib/index/file/text_spec.rb +1 -1
  52. data/spec/lib/index/files_spec.rb +12 -32
  53. data/spec/lib/indexed/bundle_spec.rb +119 -0
  54. data/spec/lib/{indexing → indexed}/categories_spec.rb +13 -14
  55. data/spec/lib/{index → indexed}/category_spec.rb +6 -6
  56. data/spec/lib/{index/type_spec.rb → indexed/index_spec.rb} +3 -3
  57. data/spec/lib/{index → indexed}/wrappers/exact_first_spec.rb +5 -5
  58. data/spec/lib/indexers/serial_spec.rb +62 -0
  59. data/spec/lib/indexing/bundle_partial_generation_speed_spec.rb +7 -5
  60. data/spec/lib/indexing/bundle_spec.rb +9 -14
  61. data/spec/lib/indexing/category_spec.rb +9 -125
  62. data/spec/lib/indexing/{type_spec.rb → index_spec.rb} +3 -3
  63. data/spec/lib/query/base_spec.rb +1 -1
  64. data/spec/lib/query/full_spec.rb +1 -1
  65. data/spec/lib/query/live_spec.rb +2 -4
  66. data/spec/lib/sources/couch_spec.rb +5 -5
  67. data/spec/lib/sources/db_spec.rb +6 -7
  68. data/spec/lib/tokenizers/base_spec.rb +1 -24
  69. data/spec/lib/tokenizers/query_spec.rb +0 -1
  70. metadata +38 -41
  71. data/lib/picky/bundle.rb +0 -33
  72. data/lib/picky/configuration/indexes.rb +0 -51
  73. data/lib/picky/configuration/queries.rb +0 -15
  74. data/lib/picky/indexers/base.rb +0 -85
  75. data/lib/picky/indexers/default.rb +0 -3
  76. data/lib/picky/type.rb +0 -46
  77. data/lib/picky/types.rb +0 -41
  78. data/lib/tasks/cache.rake +0 -46
  79. data/spec/lib/configuration/indexes_spec.rb +0 -28
  80. data/spec/lib/index/bundle_spec.rb +0 -151
  81. data/spec/lib/indexers/base_spec.rb +0 -89
@@ -1,15 +0,0 @@
1
- module Configuration
2
-
3
- #
4
- #
5
- class Queries
6
-
7
- #
8
- #
9
- def default_tokenizer options = {}
10
- Tokenizers::Query.default = Tokenizers::Query.new(options)
11
- end
12
-
13
- end
14
-
15
- end
@@ -1,85 +0,0 @@
1
- # encoding: utf-8
2
- module Indexers
3
- # Indexer.
4
- #
5
- # 1. Gets data from the original table and copies it into a "snapshot table".
6
- # 3. Processes the data. I.e. takes the snapshot table data words and tokenizes etc. them. Writes the result into a txt file.
7
- #
8
- class Base
9
-
10
- def initialize type, category
11
- @type = type
12
- @category = category
13
- end
14
-
15
- # Convenience method for getting the right Tokenizer.
16
- #
17
- def tokenizer
18
- @category.tokenizer
19
- end
20
- # Convenience methods for user subclasses.
21
- #
22
- # TODO Duplicate code in Index::Files.
23
- #
24
- # TODO Rename to prepared_index_file_name.
25
- #
26
- def search_index_file_name
27
- @category.search_index_file_name
28
- end
29
-
30
- # Executes the specific strategy.
31
- #
32
- def index
33
- process
34
- end
35
-
36
- # Get the source where the data is taken from.
37
- #
38
- def source
39
- @category.source || raise_no_source
40
- end
41
- def raise_no_source
42
- raise NoSourceSpecifiedException.new("No source given for index:#{@type.name}, category:#{@category.name}.") # TODO field.identifier
43
- end
44
-
45
- # Selects the original id (indexed id) and a column to process. The column data is called "token".
46
- #
47
- # Note: Puts together the parts first in an array, then releasing the array from time to time by joining.
48
- #
49
- def process
50
- comma = ?,
51
- newline = ?\n
52
-
53
- indexing_message
54
-
55
- # TODO Move open to Index::File.
56
- #
57
- # @category.prepared_index do |file|
58
- # source.harvest(@type, @category) do |indexed_id, text|
59
- # tokenizer.tokenize(text).each do |token_text|
60
- # next unless token_text
61
- # file.buffer indexed_id << comma << token_text << newline
62
- # end
63
- # file.write_maybe
64
- # end
65
- # end
66
- #
67
- File.open(search_index_file_name, 'w:binary') do |file|
68
- result = []
69
- source.harvest(@type, @category) do |indexed_id, text|
70
- tokenizer.tokenize(text).each do |token_text|
71
- next unless token_text
72
- result << indexed_id << comma << token_text << newline
73
- end
74
- file.write(result.join) && result.clear if result.size > 100_000
75
- end
76
- file.write result.join
77
- end
78
- end
79
-
80
- def indexing_message
81
- timed_exclaim "INDEX #{@type.name} #{@category.name}" #:#{@category.indexed_as}." # TODO field.identifier
82
- end
83
-
84
- end
85
- end
@@ -1,3 +0,0 @@
1
- module Indexers
2
- Default = Base
3
- end
data/lib/picky/type.rb DELETED
@@ -1,46 +0,0 @@
1
- # This class defines the indexing and index API.
2
- #
3
- # Note: A Type holds both an Index::Type and an Indexing::Type.
4
- #
5
- class Type
6
-
7
- # TODO Delegation.
8
- #
9
-
10
- attr_reader :name, :indexing, :index
11
-
12
- def initialize name, source, options = {}
13
- @name = name
14
- @indexing = Indexing::Type.new name, source, options
15
- @index = Index::Type.new name, options
16
-
17
- # Centralized registry.
18
- #
19
- ::Indexes.register self
20
- end
21
-
22
- # API.
23
- #
24
- # TODO Spec! Doc!
25
- #
26
- def category name, options = {}
27
- name = name.to_sym
28
-
29
- indexing.add_category name, options
30
- index.add_category name, options
31
-
32
- self
33
- end
34
- # def location name, options = {}
35
- # grid = options.delete :grid
36
- # precision = options.delete :precision
37
- #
38
- # options[:index_tokenizer] ||= Tokenizers::Index.new # TODO Or a specific location tokenizer.
39
- # options[:query_tokenizer] ||= Tokenizers::Query.new # TODO Or a specific location tokenizer.
40
- # options[:source_wrapper] ||= Sources::Wrappers::Location.new(options)
41
- #
42
- # new_category = category name, options
43
- # :source => Sources::Wrappers::Location.new(source, grid:2), :tokenizer => Tokenizers::Index.new
44
- # end
45
-
46
- end
data/lib/picky/types.rb DELETED
@@ -1,41 +0,0 @@
1
- # Comfortable API convenience class, splits methods to indexes.
2
- #
3
- class Types
4
-
5
- attr_reader :types, :type_mapping
6
-
7
- delegate :reload,
8
- :load_from_cache,
9
- :to => :@indexes
10
-
11
- delegate :check_caches,
12
- :find,
13
- :generate_cache_only,
14
- :generate_index_only,
15
- :index,
16
- :index_for_tests,
17
- :to => :@indexings
18
-
19
- def initialize
20
- @types = []
21
- @type_mapping = {}
22
-
23
- @indexes = Index::Types.new
24
- @indexings = Indexing::Types.new
25
- end
26
-
27
- def register type
28
- self.types << type
29
- self.type_mapping[type.name] = type
30
-
31
- @indexings.register type.indexing
32
- @indexes.register type.index # TODO Even necessary?
33
- end
34
-
35
- def [] name
36
- name = name.to_sym
37
-
38
- self.type_mapping[name]
39
- end
40
-
41
- end
data/lib/tasks/cache.rake DELETED
@@ -1,46 +0,0 @@
1
- namespace :cache do
2
-
3
- # Move to index namespace.
4
- #
5
-
6
- # desc "Generates the index cache files."
7
- # task :generate => :application do
8
- # Indexes.generate_caches
9
- # puts "Caches generated."
10
- # end
11
-
12
- # desc "Generates a specific index cache file like field=books:title. Note: Index tables need to be there. Will generate just the cache."
13
- # task :only => :application do
14
- # type_and_field = ENV['FIELD'] || ENV['field']
15
- # type, field = type_and_field.split ':'
16
- # Indexes.generate_cache_only type.to_sym, field.to_sym
17
- # end
18
-
19
-
20
- # desc 'Checks the index cache files'
21
- # task :check => :application do
22
- # Indexes.check_caches
23
- # puts "All caches look ok."
24
- # end
25
-
26
-
27
- # desc "Removes the index cache files."
28
- # task :clear => :application do
29
- # Indexes.clear_caches
30
- # puts "All index cache files removed."
31
- # end
32
-
33
-
34
- # desc 'Backup the index cache files'
35
- # task :backup => :application do
36
- # Indexes.backup_caches
37
- # puts "Index cache files moved to the backup directory"
38
- # end
39
-
40
- # desc 'Restore the index cache files'
41
- # task :restore => :application do
42
- # Indexes.restore_caches
43
- # puts "Index cache files restored from the backup directory"
44
- # end
45
-
46
- end
@@ -1,28 +0,0 @@
1
- # encoding: utf-8
2
- require 'spec_helper'
3
-
4
- describe Configuration::Indexes do
5
-
6
- before(:each) do
7
- @config = Configuration::Indexes.new
8
- end
9
-
10
- describe "types" do
11
- it "exists" do
12
- lambda { @config.types }.should_not raise_error
13
- end
14
- it "is initially empty" do
15
- @config.types.should be_empty
16
- end
17
- end
18
-
19
- describe "default_tokenizer" do
20
- it "is a default tokenizer" do
21
- @config.default_tokenizer.should be_kind_of(Tokenizers::Index)
22
- end
23
- it "does not cache" do
24
- @config.default_tokenizer.should_not == @config.default_tokenizer
25
- end
26
- end
27
-
28
- end
@@ -1,151 +0,0 @@
1
- require 'spec_helper'
2
-
3
- describe Index::Bundle do
4
-
5
- before(:each) do
6
- @category = stub :category, :name => :some_category
7
- @type = stub :type, :name => :some_type
8
- @similarity = stub :similarity
9
- @index_class = Index::Bundle
10
- @index = @index_class.new :some_name, @category, @type, @similarity
11
- end
12
-
13
- describe 'identifier' do
14
- it 'should return a specific identifier' do
15
- @index.identifier.should == 'some_type: some_name some_category'
16
- end
17
- end
18
-
19
- describe 'initialize_index_for' do
20
- context 'token not yet assigned' do
21
- before(:each) do
22
- @index.stub! :index => {}
23
- end
24
- it 'should assign it an empty array' do
25
- @index.initialize_index_for :some_token
26
-
27
- @index.index[:some_token].should == []
28
- end
29
- end
30
- context 'token already assigned' do
31
- before(:each) do
32
- @index.stub! :index => { :some_token => :already_assigned }
33
- end
34
- it 'should not assign it anymore' do
35
- @index.initialize_index_for :some_token
36
-
37
- @index.index[:some_token].should == :already_assigned
38
- end
39
- end
40
- end
41
-
42
- # TODO
43
- #
44
- # describe 'retrieve' do
45
- # it 'should call the other methods correctly' do
46
- # results = stub :results
47
- # @index.stub! :execute_query => results
48
- # @index.should_receive(:extract).once.with results
49
- #
50
- # @index.retrieve
51
- # end
52
- # end
53
-
54
- describe 'load_from_index_file' do
55
- it 'should call two methods in order' do
56
- @index.should_receive(:load_from_index_generation_message).once.ordered
57
- @index.should_receive(:clear).once.ordered
58
- @index.should_receive(:retrieve).once.ordered
59
-
60
- @index.load_from_index_file
61
- end
62
- end
63
-
64
- describe 'ids' do
65
- before(:each) do
66
- @index.instance_variable_set :@index, { :existing => :some_ids }
67
- end
68
- it 'should return an empty array if not found' do
69
- @index.ids(:non_existing).should == []
70
- end
71
- it 'should return the ids if found' do
72
- @index.ids(:existing).should == :some_ids
73
- end
74
- end
75
-
76
- describe 'weight' do
77
- before(:each) do
78
- @index.instance_variable_set :@weights, { :existing => :specific }
79
- end
80
- it 'should return nil' do
81
- @index.weight(:non_existing).should == nil
82
- end
83
- it 'should return the weight for the text' do
84
- @index.weight(:existing).should == :specific
85
- end
86
- end
87
-
88
- describe 'load' do
89
- it 'should trigger loads' do
90
- @index.should_receive(:load_index).once.with
91
- @index.should_receive(:load_similarity).once.with
92
- @index.should_receive(:load_weights).once.with
93
-
94
- @index.load
95
- end
96
- end
97
- describe "loading indexes" do
98
- before(:each) do
99
- @index.stub! :timed_exclaim
100
- end
101
- describe "load_index" do
102
- it "uses the right file" do
103
- Yajl::Parser.stub! :parse
104
-
105
- File.should_receive(:open).once.with 'some/search/root/index/test/some_type/some_name_some_category_index.json', 'r'
106
-
107
- @index.load_index
108
- end
109
- end
110
- describe "load_similarity" do
111
- it "uses the right file" do
112
- Marshal.stub! :load
113
-
114
- File.should_receive(:open).once.with 'some/search/root/index/test/some_type/some_name_some_category_similarity.dump', 'r:binary'
115
-
116
- @index.load_similarity
117
- end
118
- end
119
- describe "load_weights" do
120
- it "uses the right file" do
121
- Yajl::Parser.stub! :parse
122
-
123
- File.should_receive(:open).once.with 'some/search/root/index/test/some_type/some_name_some_category_weights.json', 'r'
124
-
125
- @index.load_weights
126
- end
127
- end
128
- end
129
-
130
- describe 'initialization' do
131
- before(:each) do
132
- @category = stub :category, :name => :some_category
133
- @type = stub :type, :name => :some_type
134
-
135
- @index = @index_class.new :some_name, @category, @type, :similarity
136
- end
137
- it 'should initialize the index correctly' do
138
- @index.index.should == {}
139
- end
140
- it 'should initialize the weights index correctly' do
141
- @index.weights.should == {}
142
- end
143
- it 'should initialize the similarity index correctly' do
144
- @index.similarity.should == {}
145
- end
146
- it 'should initialize the similarity strategy correctly' do
147
- @index.similarity_strategy.should == :similarity
148
- end
149
- end
150
-
151
- end
@@ -1,89 +0,0 @@
1
- require 'spec_helper'
2
-
3
- describe Indexers::Base do
4
-
5
- before(:each) do
6
- @type = stub :type,
7
- :name => :some_type,
8
- :snapshot_table_name => :some_prepared_table_name
9
- @field = stub :field,
10
- :name => :some_field_name,
11
- :search_index_file_name => :some_search_index_name,
12
- :indexed_name => :some_indexed_field_name
13
- @indexer = Indexers::Base.new @type, @field
14
- @indexer.stub! :timed_exclaim
15
- end
16
-
17
- describe "tokenizer" do
18
- it "delegates to the field" do
19
- @field.should_receive(:tokenizer).once.with
20
-
21
- @indexer.tokenizer
22
- end
23
- end
24
-
25
- describe "indexing_message" do
26
- it "informs the user about what it is going to index" do
27
- @indexer.should_receive(:timed_exclaim).once.with 'INDEX some_type some_field_name'
28
-
29
- @indexer.indexing_message
30
- end
31
- end
32
-
33
- describe "tokenizer" do
34
- it "should delegate to field" do
35
- @indexer.should_receive(:tokenizer).once.with
36
-
37
- @indexer.tokenizer
38
- end
39
- end
40
-
41
- describe 'search_index_file_name' do
42
- it 'should return a specific name' do
43
- @indexer.search_index_file_name.should == :some_search_index_name
44
- end
45
- end
46
-
47
- describe "index" do
48
- it "should execute! the indexer" do
49
- @indexer.should_receive(:process).once.with
50
-
51
- @indexer.index
52
- end
53
- end
54
-
55
- describe "source" do
56
- before(:each) do
57
- @source = stub :source
58
- end
59
- context "field has one" do
60
- before(:each) do
61
- @field.stub! :source => @source
62
- end
63
- it "should return that one" do
64
- @indexer.source.should == @source
65
- end
66
- end
67
- context "field doesn't have one" do
68
- before(:each) do
69
- @field.stub! :source => nil
70
- end
71
- it "should call raise_no_source" do
72
- @indexer.should_receive(:raise_no_source).once.with
73
-
74
- @indexer.source
75
- end
76
- end
77
- end
78
-
79
- describe "raise_no_source" do
80
- it "should raise" do
81
- lambda { @indexer.raise_no_source }.should raise_error(Indexers::NoSourceSpecifiedException)
82
- end
83
- end
84
-
85
- describe "chunked" do
86
-
87
- end
88
-
89
- end