picky 1.5.2 → 1.5.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/picky/analyzer.rb +154 -0
- data/lib/picky/application.rb +53 -33
- data/lib/picky/character_substituters/west_european.rb +10 -6
- data/lib/picky/cli.rb +18 -18
- data/lib/picky/index/base.rb +44 -13
- data/lib/picky/index_bundle.rb +13 -4
- data/lib/picky/indexed/indexes.rb +26 -10
- data/lib/picky/indexing/indexes.rb +26 -24
- data/lib/picky/interfaces/live_parameters.rb +23 -16
- data/lib/picky/internals/extensions/object.rb +13 -6
- data/lib/picky/internals/frontend_adapters/rack.rb +30 -34
- data/lib/picky/internals/index/backend.rb +1 -2
- data/lib/picky/internals/index/file/basic.rb +18 -14
- data/lib/picky/internals/index/files.rb +16 -6
- data/lib/picky/internals/index/redis/basic.rb +12 -5
- data/lib/picky/internals/index/redis.rb +2 -2
- data/lib/picky/internals/indexed/bundle/base.rb +58 -14
- data/lib/picky/internals/indexed/bundle/memory.rb +40 -14
- data/lib/picky/internals/indexed/bundle/redis.rb +9 -30
- data/lib/picky/internals/indexed/categories.rb +19 -14
- data/lib/picky/internals/indexed/category.rb +44 -20
- data/lib/picky/internals/indexed/index.rb +23 -13
- data/lib/picky/internals/indexed/wrappers/bundle/wrapper.rb +27 -9
- data/lib/picky/internals/indexers/serial.rb +1 -1
- data/lib/picky/internals/indexing/bundle/base.rb +28 -28
- data/lib/picky/internals/indexing/bundle/memory.rb +14 -7
- data/lib/picky/internals/indexing/categories.rb +15 -11
- data/lib/picky/internals/indexing/category.rb +30 -20
- data/lib/picky/internals/indexing/index.rb +22 -14
- data/lib/picky/internals/query/allocations.rb +0 -15
- data/lib/picky/internals/query/combinations/base.rb +0 -4
- data/lib/picky/internals/query/combinations/redis.rb +19 -8
- data/lib/picky/internals/query/indexes.rb +3 -6
- data/lib/picky/internals/query/token.rb +0 -4
- data/lib/picky/internals/query/weights.rb +2 -11
- data/lib/picky/internals/results/base.rb +3 -10
- data/lib/picky/internals/tokenizers/base.rb +64 -28
- data/lib/picky/internals/tokenizers/index.rb +8 -8
- data/lib/picky/loader.rb +59 -53
- data/lib/picky/query/base.rb +23 -29
- data/lib/picky/sources/base.rb +10 -10
- data/lib/picky/sources/couch.rb +14 -10
- data/lib/picky/sources/csv.rb +21 -14
- data/lib/picky/sources/db.rb +37 -31
- data/lib/picky/sources/delicious.rb +11 -8
- data/lib/picky/sources/wrappers/base.rb +3 -1
- data/lib/picky/statistics.rb +66 -0
- data/lib/tasks/application.rake +3 -0
- data/lib/tasks/checks.rake +11 -0
- data/lib/tasks/framework.rake +3 -0
- data/lib/tasks/index.rake +9 -11
- data/lib/tasks/routes.rake +3 -2
- data/lib/tasks/shortcuts.rake +17 -5
- data/lib/tasks/statistics.rake +20 -12
- data/lib/tasks/try.rake +14 -14
- data/spec/lib/application_spec.rb +3 -3
- data/spec/lib/index/base_spec.rb +25 -3
- data/spec/lib/internals/extensions/object_spec.rb +46 -20
- data/spec/lib/internals/frontend_adapters/rack_spec.rb +3 -3
- data/spec/lib/internals/index/redis/basic_spec.rb +67 -0
- data/spec/lib/internals/indexers/serial_spec.rb +1 -1
- data/spec/lib/internals/results/base_spec.rb +0 -12
- data/spec/lib/internals/tokenizers/base_spec.rb +49 -1
- data/spec/lib/query/allocations_spec.rb +0 -56
- data/spec/lib/query/base_spec.rb +25 -21
- data/spec/lib/query/combinations/redis_spec.rb +6 -1
- data/spec/lib/sources/delicious_spec.rb +2 -2
- data/spec/lib/statistics_spec.rb +31 -0
- metadata +9 -2
data/lib/tasks/index.rake
CHANGED
@@ -1,7 +1,7 @@
|
|
1
|
-
#
|
1
|
+
# Indexing tasks.
|
2
2
|
#
|
3
3
|
namespace :index do
|
4
|
-
|
4
|
+
|
5
5
|
desc "Takes a snapshot, indexes, and caches in random order."
|
6
6
|
task :randomly => :application do
|
7
7
|
Indexes.index true
|
@@ -10,23 +10,21 @@ namespace :index do
|
|
10
10
|
task :ordered => :application do
|
11
11
|
Indexes.index false
|
12
12
|
end
|
13
|
-
|
13
|
+
|
14
14
|
# desc "Generates the index snapshots."
|
15
|
+
#
|
16
|
+
# Note: Hidden since it is only needed by pro users.
|
17
|
+
#
|
18
|
+
# desc "Generate the data snapshots (intermediate table on a DB source)"
|
15
19
|
task :generate_snapshots => :application do
|
16
20
|
Indexes.take_snapshot
|
17
21
|
end
|
18
|
-
|
22
|
+
|
19
23
|
desc "Generates a specific index from index snapshots (category optional)."
|
20
24
|
task :specific, [:index, :category] => :application do |_, options|
|
21
25
|
index, category = options.index, options.category
|
22
26
|
Indexes.generate_index_only index.to_sym, category && category.to_sym
|
23
27
|
Indexes.generate_cache_only index.to_sym, category && category.to_sym
|
24
28
|
end
|
25
|
-
|
26
|
-
desc 'Checks the index files for files that are suspiciously small or missing.'
|
27
|
-
task :check => :application do
|
28
|
-
Indexes.check_caches
|
29
|
-
puts "All indexes look ok."
|
30
|
-
end
|
31
|
-
|
29
|
+
|
32
30
|
end
|
data/lib/tasks/routes.rake
CHANGED
data/lib/tasks/shortcuts.rake
CHANGED
@@ -1,13 +1,25 @@
|
|
1
|
+
# Shortcut tasks.
|
2
|
+
#
|
3
|
+
|
1
4
|
desc "Generate the index (random order)."
|
2
5
|
task :index => :application do
|
3
6
|
Rake::Task[:'index:randomly'].invoke
|
4
7
|
end
|
5
8
|
|
6
|
-
desc "Try the given text in the indexer/query (index
|
7
|
-
task :try, [:text, :
|
8
|
-
text,
|
9
|
-
|
10
|
-
Rake::Task[:'try:both'].invoke text,
|
9
|
+
desc "Try the given text in the indexer/query (index and category optional)."
|
10
|
+
task :try, [:text, :index, :category] => :application do |_, options|
|
11
|
+
text, index, category = options.text, options.index, options.category
|
12
|
+
|
13
|
+
Rake::Task[:'try:both'].invoke text, index, category
|
14
|
+
end
|
15
|
+
|
16
|
+
desc "Application summary."
|
17
|
+
task :stats do
|
18
|
+
Rake::Task[:'stats:app'].invoke
|
19
|
+
end
|
20
|
+
desc "Analyze your indexes (needs rake index)."
|
21
|
+
task :analyze do
|
22
|
+
Rake::Task[:'stats:analyze'].invoke
|
11
23
|
end
|
12
24
|
|
13
25
|
desc "Start the server."
|
data/lib/tasks/statistics.rake
CHANGED
@@ -1,13 +1,21 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
1
|
+
# Statistics tasks.
|
2
|
+
#
|
3
|
+
namespace :stats do
|
4
|
+
task :prepare => :application do
|
5
|
+
require File.expand_path('../../picky/statistics', __FILE__)
|
6
|
+
statistics = Statistics.instance
|
7
|
+
end
|
8
|
+
task :app => :prepare do
|
9
|
+
Statistics.instance.application
|
10
|
+
puts Statistics.instance
|
11
|
+
end
|
12
|
+
task :analyze => :prepare do
|
13
|
+
begin
|
14
|
+
Statistics.instance.analyze
|
15
|
+
rescue StandardError
|
16
|
+
puts "\n\033[31mNote: rake analyze needs prepared indexes. Run rake index first.\033[m\n\n"
|
17
|
+
raise
|
18
|
+
end
|
19
|
+
puts Statistics.instance
|
20
|
+
end
|
13
21
|
end
|
data/lib/tasks/try.rake
CHANGED
@@ -1,29 +1,29 @@
|
|
1
1
|
# Tasks for manually testing your engine configuration.
|
2
2
|
#
|
3
3
|
namespace :try do
|
4
|
-
|
4
|
+
|
5
5
|
# desc "Try how a given word would be tokenized when indexing (type:category optional)."
|
6
|
-
task :index, [:text, :
|
7
|
-
text,
|
8
|
-
|
9
|
-
tokenizer =
|
10
|
-
|
6
|
+
task :index, [:text, :index, :category] => :application do |_, options|
|
7
|
+
text, index, category = options.text, options.index, options.category
|
8
|
+
|
9
|
+
tokenizer = index && category ? Indexes.find(index, category).tokenizer : Internals::Tokenizers::Index.default
|
10
|
+
|
11
11
|
puts "\"#{text}\" is saved in the index as #{tokenizer.tokenize(text.dup).to_a}"
|
12
12
|
end
|
13
|
-
|
13
|
+
|
14
14
|
# desc "Try how a given word would be tokenized when querying."
|
15
15
|
task :query, [:text] => :application do |_, options|
|
16
16
|
text = options.text
|
17
|
-
|
17
|
+
|
18
18
|
puts "\"#{text}\" as a query will be preprocessed into #{Internals::Tokenizers::Query.default.tokenize(text.dup).to_a.map(&:to_s).map(&:to_sym)}"
|
19
19
|
end
|
20
|
-
|
20
|
+
|
21
21
|
# desc "Try the given text with both the index and the query (type:category optional)."
|
22
|
-
task :both, [:text, :
|
23
|
-
text,
|
24
|
-
|
25
|
-
Rake::Task[:"try:index"].invoke text,
|
22
|
+
task :both, [:text, :index, :category] => :application do |_, options|
|
23
|
+
text, index, category = options.text, options.index, options.category
|
24
|
+
|
25
|
+
Rake::Task[:"try:index"].invoke text, index, category
|
26
26
|
Rake::Task[:"try:query"].invoke text
|
27
27
|
end
|
28
|
-
|
28
|
+
|
29
29
|
end
|
@@ -8,7 +8,7 @@ describe Application do
|
|
8
8
|
it "should run ok" do
|
9
9
|
lambda {
|
10
10
|
class MinimalTestApplication < Application
|
11
|
-
books =
|
11
|
+
books = Index::Memory.new :books, Sources::DB.new('SELECT id, title FROM books', :file => 'app/db.yml')
|
12
12
|
books.define_category :title
|
13
13
|
|
14
14
|
full = Query::Full.new books
|
@@ -43,7 +43,7 @@ describe Application do
|
|
43
43
|
substitutes_characters_with: CharacterSubstituters::WestEuropean.new,
|
44
44
|
maximum_tokens: 5
|
45
45
|
|
46
|
-
books_index =
|
46
|
+
books_index = Index::Memory.new :books,
|
47
47
|
Sources::DB.new('SELECT id, title, author, isbn13 as isbn FROM books', :file => 'app/db.yml')
|
48
48
|
books_index.define_category :title,
|
49
49
|
similarity: Similarity::DoubleLevenshtone.new(3) # Up to three similar title word indexed.
|
@@ -51,7 +51,7 @@ describe Application do
|
|
51
51
|
books_index.define_category :isbn,
|
52
52
|
partial: Partial::None.new # Partially searching on an ISBN makes not much sense.
|
53
53
|
|
54
|
-
geo_index =
|
54
|
+
geo_index = Index::Memory.new :geo, Sources::CSV.new(:location, :north, :east, file: 'data/ch.csv', col_sep: ',')
|
55
55
|
geo_index.define_category :location
|
56
56
|
geo_index.define_ranged_category(:north1, 1, precision: 3, from: :north)
|
57
57
|
.define_ranged_category(:east1, 1, precision: 3, from: :east)
|
data/spec/lib/index/base_spec.rb
CHANGED
@@ -4,22 +4,44 @@ require 'spec_helper'
|
|
4
4
|
|
5
5
|
describe Index::Base do
|
6
6
|
|
7
|
+
let(:some_source) { stub :source, :harvest => nil }
|
8
|
+
|
7
9
|
context 'initializer' do
|
8
10
|
it 'works' do
|
9
|
-
|
11
|
+
expect { described_class.new :some_index_name, some_source }.to_not raise_error
|
12
|
+
end
|
13
|
+
it 'fails correctly' do
|
14
|
+
expect { described_class.new 0, some_source }.to raise_error(<<-ERROR
|
15
|
+
The index identifier (you gave "0") for Index::Memory/Index::Redis should be a String/Symbol,
|
16
|
+
Examples:
|
17
|
+
Index::Memory.new(:my_cool_index, ...) # Recommended
|
18
|
+
Index::Redis.new("a-redis-index", ...)
|
19
|
+
ERROR
|
20
|
+
)
|
21
|
+
end
|
22
|
+
it 'fails correctly' do
|
23
|
+
expect { described_class.new :some_index_name, :some_source }.to raise_error(<<-ERROR
|
24
|
+
The index "some_index_name" should use a data source that responds to the method #harvest, which yields(id, text).
|
25
|
+
Or it could use one of the built-in sources:
|
26
|
+
Sources::DB,
|
27
|
+
Sources::CSV,
|
28
|
+
Sources::Delicious,
|
29
|
+
Sources::Couch
|
30
|
+
ERROR
|
31
|
+
)
|
10
32
|
end
|
11
33
|
it 'registers with the indexes' do
|
12
34
|
@api = described_class.allocate
|
13
35
|
|
14
36
|
::Indexes.should_receive(:register).once.with @api
|
15
37
|
|
16
|
-
@api.send :initialize, :some_index_name,
|
38
|
+
@api.send :initialize, :some_index_name, some_source
|
17
39
|
end
|
18
40
|
end
|
19
41
|
|
20
42
|
context 'unit' do
|
21
43
|
before(:each) do
|
22
|
-
@api = described_class.new :some_index_name,
|
44
|
+
@api = described_class.new :some_index_name, some_source
|
23
45
|
end
|
24
46
|
|
25
47
|
describe 'define_category' do
|
@@ -2,32 +2,58 @@ require 'spec_helper'
|
|
2
2
|
|
3
3
|
describe Object do
|
4
4
|
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
5
|
+
context 'basic object' do
|
6
|
+
let(:object) { described_class.new }
|
7
|
+
|
8
|
+
describe "exclaim" do
|
9
|
+
it "delegates to puts" do
|
10
|
+
object.should_receive(:puts).once.with :bla
|
11
|
+
|
12
|
+
object.exclaim :bla
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
describe "timed_exclaim" do
|
17
|
+
it "should exclaim right" do
|
18
|
+
Time.stub! :now => Time.parse('07-03-1977 12:34:56')
|
19
|
+
object.should_receive(:exclaim).once.with "12:34:56: bla"
|
20
|
+
|
21
|
+
object.timed_exclaim 'bla'
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
describe 'warn_gem_missing' do
|
26
|
+
it 'should warn right' do
|
27
|
+
object.should_receive(:warn).once.with "gnorf gem missing!\nTo use gnarble gnarf, you need to:\n 1. Add the following line to Gemfile:\n gem 'gnorf'\n 2. Then, run:\n bundle update\n"
|
28
|
+
|
29
|
+
object.warn_gem_missing 'gnorf', 'gnarble gnarf'
|
30
|
+
end
|
14
31
|
end
|
15
32
|
end
|
16
33
|
|
17
|
-
describe
|
18
|
-
|
19
|
-
|
20
|
-
@object.should_receive(:exclaim).once.with "12:34:56: bla"
|
34
|
+
describe 'indented_to_s' do
|
35
|
+
describe String do
|
36
|
+
let(:object) { described_class.new("Hello\nTest") }
|
21
37
|
|
22
|
-
|
38
|
+
it 'indents a default amount' do
|
39
|
+
object.indented_to_s.should == " Hello\n Test"
|
40
|
+
end
|
41
|
+
it 'indents twice' do
|
42
|
+
object.indented_to_s.indented_to_s.should == " Hello\n Test"
|
43
|
+
end
|
44
|
+
it 'indents correctly' do
|
45
|
+
object.indented_to_s(3).should == " Hello\n Test"
|
46
|
+
end
|
23
47
|
end
|
24
|
-
|
25
|
-
|
26
|
-
describe 'puts_gem_missing' do
|
27
|
-
it 'should puts right' do
|
28
|
-
@object.should_receive(:puts).once.with "gnorf gem missing!\nTo use gnarble gnarf, you need to:\n 1. Add the following line to Gemfile:\n gem 'gnorf'\n 2. Then, run:\n bundle update\n"
|
48
|
+
describe Array do
|
49
|
+
let(:object) { described_class.new(["Hello", "Test"]) }
|
29
50
|
|
30
|
-
|
51
|
+
it 'indents a default amount' do
|
52
|
+
object.indented_to_s.should == " Hello\n Test"
|
53
|
+
end
|
54
|
+
it 'indents twice' do
|
55
|
+
object.indented_to_s.indented_to_s.should == " Hello\n Test"
|
56
|
+
end
|
31
57
|
end
|
32
58
|
end
|
33
59
|
|
@@ -192,17 +192,17 @@ describe Internals::FrontendAdapters::Rack do
|
|
192
192
|
Internals::Adapters::Rack.stub! :app_for => :some_query_app
|
193
193
|
end
|
194
194
|
it 'should add the right route' do
|
195
|
-
@routes.should_receive(:add_route).once.with :some_query_app, { :request_method => "GET", :path_info => /some_url/ }
|
195
|
+
@routes.should_receive(:add_route).once.with :some_query_app, { :request_method => "GET", :path_info => /some_url/ }, {}, "some_query"
|
196
196
|
|
197
197
|
@rack_adapter.route_one %r{some_url}, :some_query, {}
|
198
198
|
end
|
199
199
|
it 'should add the right route' do
|
200
|
-
@routes.should_receive(:add_route).once.with :some_query_app, { :request_method => "GET", :path_info => /some_url/ }
|
200
|
+
@routes.should_receive(:add_route).once.with :some_query_app, { :request_method => "GET", :path_info => /some_url/ }, {}, "some_query"
|
201
201
|
|
202
202
|
@rack_adapter.route_one 'some_url', :some_query, {}
|
203
203
|
end
|
204
204
|
it 'should add the right route' do
|
205
|
-
@routes.should_receive(:add_route).once.with :some_query_app, { :request_method => "GET", :glarf => :blarf, :path_info => /some_url/ }
|
205
|
+
@routes.should_receive(:add_route).once.with :some_query_app, { :request_method => "GET", :glarf => :blarf, :path_info => /some_url/ }, {}, "some_query"
|
206
206
|
|
207
207
|
@rack_adapter.route_one 'some_url', :some_query, { :glarf => :blarf }
|
208
208
|
end
|
@@ -0,0 +1,67 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Internals::Index::Redis::Basic do
|
4
|
+
|
5
|
+
let(:redis) { described_class.new "some_namespace" }
|
6
|
+
|
7
|
+
describe 'load, retrieve, backup, delete' do
|
8
|
+
it 'is nothing they do (at least on the backend)' do
|
9
|
+
redis.should_receive(:backend).never
|
10
|
+
|
11
|
+
redis.load
|
12
|
+
redis.retrieve
|
13
|
+
redis.backup
|
14
|
+
redis.delete
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
describe 'cache_small?' do
|
19
|
+
context 'size 0' do
|
20
|
+
before(:each) do
|
21
|
+
redis.stub! :size => 0
|
22
|
+
end
|
23
|
+
it 'is small' do
|
24
|
+
redis.cache_small?.should == true
|
25
|
+
end
|
26
|
+
end
|
27
|
+
context 'size 1' do
|
28
|
+
before(:each) do
|
29
|
+
redis.stub! :size => 1
|
30
|
+
end
|
31
|
+
it 'is not small' do
|
32
|
+
redis.cache_small?.should == false
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
describe 'cache_ok?' do
|
38
|
+
context 'size 0' do
|
39
|
+
before(:each) do
|
40
|
+
redis.stub! :size => 0
|
41
|
+
end
|
42
|
+
it 'is not ok' do
|
43
|
+
redis.cache_ok?.should == false
|
44
|
+
end
|
45
|
+
end
|
46
|
+
context 'size 1' do
|
47
|
+
before(:each) do
|
48
|
+
redis.stub! :size => 1
|
49
|
+
end
|
50
|
+
it 'is ok' do
|
51
|
+
redis.cache_ok?.should == true
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
describe "size" do
|
57
|
+
it 'delegates to the backend' do
|
58
|
+
backend = stub :backend
|
59
|
+
redis.stub! :backend => backend
|
60
|
+
|
61
|
+
backend.should_receive(:dbsize).once.with
|
62
|
+
|
63
|
+
redis.size
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
end
|
@@ -40,7 +40,7 @@ describe Indexers::Serial do
|
|
40
40
|
|
41
41
|
describe "indexing_message" do
|
42
42
|
it "informs the user about what it is going to index" do
|
43
|
-
@indexer.should_receive(:timed_exclaim).once.with '
|
43
|
+
@indexer.should_receive(:timed_exclaim).once.with '"some_index:some_category": Starting indexing.'
|
44
44
|
|
45
45
|
@indexer.indexing_message
|
46
46
|
end
|
@@ -26,18 +26,6 @@ describe Internals::Results::Base do
|
|
26
26
|
end
|
27
27
|
end
|
28
28
|
|
29
|
-
describe "random_ids" do
|
30
|
-
before(:each) do
|
31
|
-
@allocations = stub :allocations
|
32
|
-
@results = described_class.new :unimportant, @allocations
|
33
|
-
end
|
34
|
-
it "delegates" do
|
35
|
-
@allocations.should_receive(:random_ids).once.with :anything
|
36
|
-
|
37
|
-
@results.random_ids :anything
|
38
|
-
end
|
39
|
-
end
|
40
|
-
|
41
29
|
describe 'to_log' do
|
42
30
|
before(:each) do
|
43
31
|
time = stub :time, :to_s => '0-08-16 10:07:33'
|
@@ -12,11 +12,38 @@ describe Internals::Tokenizers::Base do
|
|
12
12
|
it 'rejects tokens that are called :hello' do
|
13
13
|
tokenizer.reject([:hel, :hell, :hello]).should == [:hel, :hell]
|
14
14
|
end
|
15
|
+
describe 'to_s' do
|
16
|
+
it 'does something' do
|
17
|
+
tokenizer.to_s.should == <<-EXPECTED
|
18
|
+
Removes characters: -
|
19
|
+
Stopwords: -
|
20
|
+
Splits text on: /\\s/
|
21
|
+
Removes chars after split: -
|
22
|
+
Normalizes words: -
|
23
|
+
Rejects tokens? Yes, see line 8 in app/application.rb
|
24
|
+
Substitutes chars? -
|
25
|
+
EXPECTED
|
26
|
+
end
|
27
|
+
end
|
15
28
|
end
|
16
29
|
|
17
30
|
context 'with normal instance' do
|
18
31
|
let(:tokenizer) { described_class.new }
|
19
|
-
|
32
|
+
|
33
|
+
describe 'to_s' do
|
34
|
+
it 'does something' do
|
35
|
+
tokenizer.to_s.should == <<-EXPECTED
|
36
|
+
Removes characters: -
|
37
|
+
Stopwords: -
|
38
|
+
Splits text on: /\\s/
|
39
|
+
Removes chars after split: -
|
40
|
+
Normalizes words: -
|
41
|
+
Rejects tokens? -
|
42
|
+
Substitutes chars? -
|
43
|
+
EXPECTED
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
20
47
|
describe 'reject_token_if' do
|
21
48
|
it 'rejects empty tokens by default' do
|
22
49
|
tokenizer.reject(['a', nil, '', 'b']).should == ['a', 'b']
|
@@ -32,6 +59,9 @@ describe Internals::Tokenizers::Base do
|
|
32
59
|
it "doesn't substitute if there is no substituter" do
|
33
60
|
tokenizer.substitute_characters('abcdefghijklmnopqrstuvwxyzäöü').should == 'abcdefghijklmnopqrstuvwxyzäöü'
|
34
61
|
end
|
62
|
+
it 'raises if nothing with #substitute is given' do
|
63
|
+
expect { tokenizer.substitutes_characters_with Object.new }.to raise_error("The substitutes_characters_with option needs a character substituter, which responds to #substitute.")
|
64
|
+
end
|
35
65
|
it "uses the substituter to replace characters" do
|
36
66
|
tokenizer.substitutes_characters_with CharacterSubstituters::WestEuropean.new
|
37
67
|
|
@@ -45,6 +75,9 @@ describe Internals::Tokenizers::Base do
|
|
45
75
|
end
|
46
76
|
|
47
77
|
describe "removes_characters_after_splitting" do
|
78
|
+
it 'handles broken arguments' do
|
79
|
+
expect { tokenizer.removes_characters_after_splitting("gnorf") }.to raise_error(ArgumentError)
|
80
|
+
end
|
48
81
|
context "without removes_characters_after_splitting called" do
|
49
82
|
it "has remove_after_normalizing_illegals" do
|
50
83
|
expect { tokenizer.remove_after_normalizing_illegals('any') }.to_not raise_error
|
@@ -69,6 +102,9 @@ describe Internals::Tokenizers::Base do
|
|
69
102
|
end
|
70
103
|
|
71
104
|
describe "normalizes_words" do
|
105
|
+
it 'handles broken arguments' do
|
106
|
+
expect { tokenizer.normalizes_words(:not_an_array) }.to raise_error(ArgumentError)
|
107
|
+
end
|
72
108
|
context "without normalizes_words called" do
|
73
109
|
it "has normalize_with_patterns" do
|
74
110
|
expect { tokenizer.normalize_with_patterns('any') }.to_not raise_error
|
@@ -96,6 +132,12 @@ describe Internals::Tokenizers::Base do
|
|
96
132
|
end
|
97
133
|
|
98
134
|
describe "splits_text_on" do
|
135
|
+
it 'handles nonbroken arguments' do
|
136
|
+
expect { tokenizer.splits_text_on("hello") }.to_not raise_error(ArgumentError)
|
137
|
+
end
|
138
|
+
it 'handles broken arguments' do
|
139
|
+
expect { tokenizer.splits_text_on(:gnorf) }.to raise_error(ArgumentError)
|
140
|
+
end
|
99
141
|
context "without splits_text_on called" do
|
100
142
|
it "has split" do
|
101
143
|
lambda { tokenizer.split('any') }.should_not raise_error
|
@@ -121,6 +163,9 @@ describe Internals::Tokenizers::Base do
|
|
121
163
|
end
|
122
164
|
|
123
165
|
describe "removes_characters" do
|
166
|
+
it 'handles broken arguments' do
|
167
|
+
expect { tokenizer.removes_characters("hello") }.to raise_error(ArgumentError)
|
168
|
+
end
|
124
169
|
context "without removes_characters called" do
|
125
170
|
it "has remove_illegals" do
|
126
171
|
expect { tokenizer.remove_illegals('any') }.to_not raise_error
|
@@ -145,6 +190,9 @@ describe Internals::Tokenizers::Base do
|
|
145
190
|
end
|
146
191
|
|
147
192
|
describe 'stopwords' do
|
193
|
+
it 'handles broken arguments' do
|
194
|
+
expect { tokenizer.stopwords("hello") }.to raise_error(ArgumentError)
|
195
|
+
end
|
148
196
|
context 'without stopwords given' do
|
149
197
|
it 'should define a method remove_stopwords' do
|
150
198
|
lambda { tokenizer.remove_stopwords('from this text') }.should_not raise_error
|
@@ -179,62 +179,6 @@ describe Internals::Query::Allocations do
|
|
179
179
|
end
|
180
180
|
end
|
181
181
|
|
182
|
-
describe 'random_ids' do
|
183
|
-
context 'enough ids' do
|
184
|
-
before(:each) do
|
185
|
-
@allocation1 = stub :allocation1, :ids => [1, 2, 3]
|
186
|
-
@allocation2 = stub :allocation2, :ids => [4, 5, 6, 7]
|
187
|
-
@allocations = described_class.new [@allocation1, @allocation2]
|
188
|
-
end
|
189
|
-
it 'should return one random id from the first allocations by default' do
|
190
|
-
@allocations.random_ids.size.should == 1
|
191
|
-
end
|
192
|
-
it 'should return multiple random ids from the first allocation' do
|
193
|
-
@allocations.random_ids(5).size.should == 3
|
194
|
-
end
|
195
|
-
it "should return one random id from the first allocation's ids" do
|
196
|
-
(1..7).to_a.should include(@allocations.random_ids.first)
|
197
|
-
end
|
198
|
-
it 'should not contain the same id twice' do
|
199
|
-
20.times do
|
200
|
-
@allocations.random_ids(2).uniq.size.should_not == 1
|
201
|
-
end
|
202
|
-
end
|
203
|
-
end
|
204
|
-
context 'just one id' do
|
205
|
-
before(:each) do
|
206
|
-
@allocation1 = stub :allocation1, :ids => [1]
|
207
|
-
@allocation2 = stub :allocation2, :ids => []
|
208
|
-
@allocations = described_class.new [@allocation1, @allocation2]
|
209
|
-
end
|
210
|
-
it 'should return one random id from its allocations by default' do
|
211
|
-
@allocations.random_ids.size.should == 1
|
212
|
-
end
|
213
|
-
it 'should return multiple random ids from its allocations' do
|
214
|
-
@allocations.random_ids(5).size.should == 1
|
215
|
-
end
|
216
|
-
it 'should return one random id from the allocations ids' do
|
217
|
-
@allocations.random_ids.should == [1]
|
218
|
-
end
|
219
|
-
end
|
220
|
-
context 'no id' do
|
221
|
-
before(:each) do
|
222
|
-
@allocation1 = stub :allocation1, :ids => []
|
223
|
-
@allocation2 = stub :allocation2, :ids => []
|
224
|
-
@allocations = described_class.new [@allocation1, @allocation2]
|
225
|
-
end
|
226
|
-
it 'should return one random id from its allocations by default' do
|
227
|
-
@allocations.random_ids.size.should == 0
|
228
|
-
end
|
229
|
-
it 'should return no random id from its allocations' do
|
230
|
-
@allocations.random_ids(5).size.should == 0
|
231
|
-
end
|
232
|
-
it 'should return no random id from the allocations ids' do
|
233
|
-
@allocations.random_ids.should == []
|
234
|
-
end
|
235
|
-
end
|
236
|
-
end
|
237
|
-
|
238
182
|
describe 'to_result' do
|
239
183
|
context 'all allocations have results' do
|
240
184
|
before(:each) do
|