picky 1.5.2 → 1.5.3
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/picky/analyzer.rb +154 -0
- data/lib/picky/application.rb +53 -33
- data/lib/picky/character_substituters/west_european.rb +10 -6
- data/lib/picky/cli.rb +18 -18
- data/lib/picky/index/base.rb +44 -13
- data/lib/picky/index_bundle.rb +13 -4
- data/lib/picky/indexed/indexes.rb +26 -10
- data/lib/picky/indexing/indexes.rb +26 -24
- data/lib/picky/interfaces/live_parameters.rb +23 -16
- data/lib/picky/internals/extensions/object.rb +13 -6
- data/lib/picky/internals/frontend_adapters/rack.rb +30 -34
- data/lib/picky/internals/index/backend.rb +1 -2
- data/lib/picky/internals/index/file/basic.rb +18 -14
- data/lib/picky/internals/index/files.rb +16 -6
- data/lib/picky/internals/index/redis/basic.rb +12 -5
- data/lib/picky/internals/index/redis.rb +2 -2
- data/lib/picky/internals/indexed/bundle/base.rb +58 -14
- data/lib/picky/internals/indexed/bundle/memory.rb +40 -14
- data/lib/picky/internals/indexed/bundle/redis.rb +9 -30
- data/lib/picky/internals/indexed/categories.rb +19 -14
- data/lib/picky/internals/indexed/category.rb +44 -20
- data/lib/picky/internals/indexed/index.rb +23 -13
- data/lib/picky/internals/indexed/wrappers/bundle/wrapper.rb +27 -9
- data/lib/picky/internals/indexers/serial.rb +1 -1
- data/lib/picky/internals/indexing/bundle/base.rb +28 -28
- data/lib/picky/internals/indexing/bundle/memory.rb +14 -7
- data/lib/picky/internals/indexing/categories.rb +15 -11
- data/lib/picky/internals/indexing/category.rb +30 -20
- data/lib/picky/internals/indexing/index.rb +22 -14
- data/lib/picky/internals/query/allocations.rb +0 -15
- data/lib/picky/internals/query/combinations/base.rb +0 -4
- data/lib/picky/internals/query/combinations/redis.rb +19 -8
- data/lib/picky/internals/query/indexes.rb +3 -6
- data/lib/picky/internals/query/token.rb +0 -4
- data/lib/picky/internals/query/weights.rb +2 -11
- data/lib/picky/internals/results/base.rb +3 -10
- data/lib/picky/internals/tokenizers/base.rb +64 -28
- data/lib/picky/internals/tokenizers/index.rb +8 -8
- data/lib/picky/loader.rb +59 -53
- data/lib/picky/query/base.rb +23 -29
- data/lib/picky/sources/base.rb +10 -10
- data/lib/picky/sources/couch.rb +14 -10
- data/lib/picky/sources/csv.rb +21 -14
- data/lib/picky/sources/db.rb +37 -31
- data/lib/picky/sources/delicious.rb +11 -8
- data/lib/picky/sources/wrappers/base.rb +3 -1
- data/lib/picky/statistics.rb +66 -0
- data/lib/tasks/application.rake +3 -0
- data/lib/tasks/checks.rake +11 -0
- data/lib/tasks/framework.rake +3 -0
- data/lib/tasks/index.rake +9 -11
- data/lib/tasks/routes.rake +3 -2
- data/lib/tasks/shortcuts.rake +17 -5
- data/lib/tasks/statistics.rake +20 -12
- data/lib/tasks/try.rake +14 -14
- data/spec/lib/application_spec.rb +3 -3
- data/spec/lib/index/base_spec.rb +25 -3
- data/spec/lib/internals/extensions/object_spec.rb +46 -20
- data/spec/lib/internals/frontend_adapters/rack_spec.rb +3 -3
- data/spec/lib/internals/index/redis/basic_spec.rb +67 -0
- data/spec/lib/internals/indexers/serial_spec.rb +1 -1
- data/spec/lib/internals/results/base_spec.rb +0 -12
- data/spec/lib/internals/tokenizers/base_spec.rb +49 -1
- data/spec/lib/query/allocations_spec.rb +0 -56
- data/spec/lib/query/base_spec.rb +25 -21
- data/spec/lib/query/combinations/redis_spec.rb +6 -1
- data/spec/lib/sources/delicious_spec.rb +2 -2
- data/spec/lib/statistics_spec.rb +31 -0
- metadata +9 -2
data/lib/tasks/index.rake
CHANGED
@@ -1,7 +1,7 @@
|
|
1
|
-
#
|
1
|
+
# Indexing tasks.
|
2
2
|
#
|
3
3
|
namespace :index do
|
4
|
-
|
4
|
+
|
5
5
|
desc "Takes a snapshot, indexes, and caches in random order."
|
6
6
|
task :randomly => :application do
|
7
7
|
Indexes.index true
|
@@ -10,23 +10,21 @@ namespace :index do
|
|
10
10
|
task :ordered => :application do
|
11
11
|
Indexes.index false
|
12
12
|
end
|
13
|
-
|
13
|
+
|
14
14
|
# desc "Generates the index snapshots."
|
15
|
+
#
|
16
|
+
# Note: Hidden since it is only needed by pro users.
|
17
|
+
#
|
18
|
+
# desc "Generate the data snapshots (intermediate table on a DB source)"
|
15
19
|
task :generate_snapshots => :application do
|
16
20
|
Indexes.take_snapshot
|
17
21
|
end
|
18
|
-
|
22
|
+
|
19
23
|
desc "Generates a specific index from index snapshots (category optional)."
|
20
24
|
task :specific, [:index, :category] => :application do |_, options|
|
21
25
|
index, category = options.index, options.category
|
22
26
|
Indexes.generate_index_only index.to_sym, category && category.to_sym
|
23
27
|
Indexes.generate_cache_only index.to_sym, category && category.to_sym
|
24
28
|
end
|
25
|
-
|
26
|
-
desc 'Checks the index files for files that are suspiciously small or missing.'
|
27
|
-
task :check => :application do
|
28
|
-
Indexes.check_caches
|
29
|
-
puts "All indexes look ok."
|
30
|
-
end
|
31
|
-
|
29
|
+
|
32
30
|
end
|
data/lib/tasks/routes.rake
CHANGED
data/lib/tasks/shortcuts.rake
CHANGED
@@ -1,13 +1,25 @@
|
|
1
|
+
# Shortcut tasks.
|
2
|
+
#
|
3
|
+
|
1
4
|
desc "Generate the index (random order)."
|
2
5
|
task :index => :application do
|
3
6
|
Rake::Task[:'index:randomly'].invoke
|
4
7
|
end
|
5
8
|
|
6
|
-
desc "Try the given text in the indexer/query (index
|
7
|
-
task :try, [:text, :
|
8
|
-
text,
|
9
|
-
|
10
|
-
Rake::Task[:'try:both'].invoke text,
|
9
|
+
desc "Try the given text in the indexer/query (index and category optional)."
|
10
|
+
task :try, [:text, :index, :category] => :application do |_, options|
|
11
|
+
text, index, category = options.text, options.index, options.category
|
12
|
+
|
13
|
+
Rake::Task[:'try:both'].invoke text, index, category
|
14
|
+
end
|
15
|
+
|
16
|
+
desc "Application summary."
|
17
|
+
task :stats do
|
18
|
+
Rake::Task[:'stats:app'].invoke
|
19
|
+
end
|
20
|
+
desc "Analyze your indexes (needs rake index)."
|
21
|
+
task :analyze do
|
22
|
+
Rake::Task[:'stats:analyze'].invoke
|
11
23
|
end
|
12
24
|
|
13
25
|
desc "Start the server."
|
data/lib/tasks/statistics.rake
CHANGED
@@ -1,13 +1,21 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
1
|
+
# Statistics tasks.
|
2
|
+
#
|
3
|
+
namespace :stats do
|
4
|
+
task :prepare => :application do
|
5
|
+
require File.expand_path('../../picky/statistics', __FILE__)
|
6
|
+
statistics = Statistics.instance
|
7
|
+
end
|
8
|
+
task :app => :prepare do
|
9
|
+
Statistics.instance.application
|
10
|
+
puts Statistics.instance
|
11
|
+
end
|
12
|
+
task :analyze => :prepare do
|
13
|
+
begin
|
14
|
+
Statistics.instance.analyze
|
15
|
+
rescue StandardError
|
16
|
+
puts "\n\033[31mNote: rake analyze needs prepared indexes. Run rake index first.\033[m\n\n"
|
17
|
+
raise
|
18
|
+
end
|
19
|
+
puts Statistics.instance
|
20
|
+
end
|
13
21
|
end
|
data/lib/tasks/try.rake
CHANGED
@@ -1,29 +1,29 @@
|
|
1
1
|
# Tasks for manually testing your engine configuration.
|
2
2
|
#
|
3
3
|
namespace :try do
|
4
|
-
|
4
|
+
|
5
5
|
# desc "Try how a given word would be tokenized when indexing (type:category optional)."
|
6
|
-
task :index, [:text, :
|
7
|
-
text,
|
8
|
-
|
9
|
-
tokenizer =
|
10
|
-
|
6
|
+
task :index, [:text, :index, :category] => :application do |_, options|
|
7
|
+
text, index, category = options.text, options.index, options.category
|
8
|
+
|
9
|
+
tokenizer = index && category ? Indexes.find(index, category).tokenizer : Internals::Tokenizers::Index.default
|
10
|
+
|
11
11
|
puts "\"#{text}\" is saved in the index as #{tokenizer.tokenize(text.dup).to_a}"
|
12
12
|
end
|
13
|
-
|
13
|
+
|
14
14
|
# desc "Try how a given word would be tokenized when querying."
|
15
15
|
task :query, [:text] => :application do |_, options|
|
16
16
|
text = options.text
|
17
|
-
|
17
|
+
|
18
18
|
puts "\"#{text}\" as a query will be preprocessed into #{Internals::Tokenizers::Query.default.tokenize(text.dup).to_a.map(&:to_s).map(&:to_sym)}"
|
19
19
|
end
|
20
|
-
|
20
|
+
|
21
21
|
# desc "Try the given text with both the index and the query (type:category optional)."
|
22
|
-
task :both, [:text, :
|
23
|
-
text,
|
24
|
-
|
25
|
-
Rake::Task[:"try:index"].invoke text,
|
22
|
+
task :both, [:text, :index, :category] => :application do |_, options|
|
23
|
+
text, index, category = options.text, options.index, options.category
|
24
|
+
|
25
|
+
Rake::Task[:"try:index"].invoke text, index, category
|
26
26
|
Rake::Task[:"try:query"].invoke text
|
27
27
|
end
|
28
|
-
|
28
|
+
|
29
29
|
end
|
@@ -8,7 +8,7 @@ describe Application do
|
|
8
8
|
it "should run ok" do
|
9
9
|
lambda {
|
10
10
|
class MinimalTestApplication < Application
|
11
|
-
books =
|
11
|
+
books = Index::Memory.new :books, Sources::DB.new('SELECT id, title FROM books', :file => 'app/db.yml')
|
12
12
|
books.define_category :title
|
13
13
|
|
14
14
|
full = Query::Full.new books
|
@@ -43,7 +43,7 @@ describe Application do
|
|
43
43
|
substitutes_characters_with: CharacterSubstituters::WestEuropean.new,
|
44
44
|
maximum_tokens: 5
|
45
45
|
|
46
|
-
books_index =
|
46
|
+
books_index = Index::Memory.new :books,
|
47
47
|
Sources::DB.new('SELECT id, title, author, isbn13 as isbn FROM books', :file => 'app/db.yml')
|
48
48
|
books_index.define_category :title,
|
49
49
|
similarity: Similarity::DoubleLevenshtone.new(3) # Up to three similar title word indexed.
|
@@ -51,7 +51,7 @@ describe Application do
|
|
51
51
|
books_index.define_category :isbn,
|
52
52
|
partial: Partial::None.new # Partially searching on an ISBN makes not much sense.
|
53
53
|
|
54
|
-
geo_index =
|
54
|
+
geo_index = Index::Memory.new :geo, Sources::CSV.new(:location, :north, :east, file: 'data/ch.csv', col_sep: ',')
|
55
55
|
geo_index.define_category :location
|
56
56
|
geo_index.define_ranged_category(:north1, 1, precision: 3, from: :north)
|
57
57
|
.define_ranged_category(:east1, 1, precision: 3, from: :east)
|
data/spec/lib/index/base_spec.rb
CHANGED
@@ -4,22 +4,44 @@ require 'spec_helper'
|
|
4
4
|
|
5
5
|
describe Index::Base do
|
6
6
|
|
7
|
+
let(:some_source) { stub :source, :harvest => nil }
|
8
|
+
|
7
9
|
context 'initializer' do
|
8
10
|
it 'works' do
|
9
|
-
|
11
|
+
expect { described_class.new :some_index_name, some_source }.to_not raise_error
|
12
|
+
end
|
13
|
+
it 'fails correctly' do
|
14
|
+
expect { described_class.new 0, some_source }.to raise_error(<<-ERROR
|
15
|
+
The index identifier (you gave "0") for Index::Memory/Index::Redis should be a String/Symbol,
|
16
|
+
Examples:
|
17
|
+
Index::Memory.new(:my_cool_index, ...) # Recommended
|
18
|
+
Index::Redis.new("a-redis-index", ...)
|
19
|
+
ERROR
|
20
|
+
)
|
21
|
+
end
|
22
|
+
it 'fails correctly' do
|
23
|
+
expect { described_class.new :some_index_name, :some_source }.to raise_error(<<-ERROR
|
24
|
+
The index "some_index_name" should use a data source that responds to the method #harvest, which yields(id, text).
|
25
|
+
Or it could use one of the built-in sources:
|
26
|
+
Sources::DB,
|
27
|
+
Sources::CSV,
|
28
|
+
Sources::Delicious,
|
29
|
+
Sources::Couch
|
30
|
+
ERROR
|
31
|
+
)
|
10
32
|
end
|
11
33
|
it 'registers with the indexes' do
|
12
34
|
@api = described_class.allocate
|
13
35
|
|
14
36
|
::Indexes.should_receive(:register).once.with @api
|
15
37
|
|
16
|
-
@api.send :initialize, :some_index_name,
|
38
|
+
@api.send :initialize, :some_index_name, some_source
|
17
39
|
end
|
18
40
|
end
|
19
41
|
|
20
42
|
context 'unit' do
|
21
43
|
before(:each) do
|
22
|
-
@api = described_class.new :some_index_name,
|
44
|
+
@api = described_class.new :some_index_name, some_source
|
23
45
|
end
|
24
46
|
|
25
47
|
describe 'define_category' do
|
@@ -2,32 +2,58 @@ require 'spec_helper'
|
|
2
2
|
|
3
3
|
describe Object do
|
4
4
|
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
5
|
+
context 'basic object' do
|
6
|
+
let(:object) { described_class.new }
|
7
|
+
|
8
|
+
describe "exclaim" do
|
9
|
+
it "delegates to puts" do
|
10
|
+
object.should_receive(:puts).once.with :bla
|
11
|
+
|
12
|
+
object.exclaim :bla
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
describe "timed_exclaim" do
|
17
|
+
it "should exclaim right" do
|
18
|
+
Time.stub! :now => Time.parse('07-03-1977 12:34:56')
|
19
|
+
object.should_receive(:exclaim).once.with "12:34:56: bla"
|
20
|
+
|
21
|
+
object.timed_exclaim 'bla'
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
describe 'warn_gem_missing' do
|
26
|
+
it 'should warn right' do
|
27
|
+
object.should_receive(:warn).once.with "gnorf gem missing!\nTo use gnarble gnarf, you need to:\n 1. Add the following line to Gemfile:\n gem 'gnorf'\n 2. Then, run:\n bundle update\n"
|
28
|
+
|
29
|
+
object.warn_gem_missing 'gnorf', 'gnarble gnarf'
|
30
|
+
end
|
14
31
|
end
|
15
32
|
end
|
16
33
|
|
17
|
-
describe
|
18
|
-
|
19
|
-
|
20
|
-
@object.should_receive(:exclaim).once.with "12:34:56: bla"
|
34
|
+
describe 'indented_to_s' do
|
35
|
+
describe String do
|
36
|
+
let(:object) { described_class.new("Hello\nTest") }
|
21
37
|
|
22
|
-
|
38
|
+
it 'indents a default amount' do
|
39
|
+
object.indented_to_s.should == " Hello\n Test"
|
40
|
+
end
|
41
|
+
it 'indents twice' do
|
42
|
+
object.indented_to_s.indented_to_s.should == " Hello\n Test"
|
43
|
+
end
|
44
|
+
it 'indents correctly' do
|
45
|
+
object.indented_to_s(3).should == " Hello\n Test"
|
46
|
+
end
|
23
47
|
end
|
24
|
-
|
25
|
-
|
26
|
-
describe 'puts_gem_missing' do
|
27
|
-
it 'should puts right' do
|
28
|
-
@object.should_receive(:puts).once.with "gnorf gem missing!\nTo use gnarble gnarf, you need to:\n 1. Add the following line to Gemfile:\n gem 'gnorf'\n 2. Then, run:\n bundle update\n"
|
48
|
+
describe Array do
|
49
|
+
let(:object) { described_class.new(["Hello", "Test"]) }
|
29
50
|
|
30
|
-
|
51
|
+
it 'indents a default amount' do
|
52
|
+
object.indented_to_s.should == " Hello\n Test"
|
53
|
+
end
|
54
|
+
it 'indents twice' do
|
55
|
+
object.indented_to_s.indented_to_s.should == " Hello\n Test"
|
56
|
+
end
|
31
57
|
end
|
32
58
|
end
|
33
59
|
|
@@ -192,17 +192,17 @@ describe Internals::FrontendAdapters::Rack do
|
|
192
192
|
Internals::Adapters::Rack.stub! :app_for => :some_query_app
|
193
193
|
end
|
194
194
|
it 'should add the right route' do
|
195
|
-
@routes.should_receive(:add_route).once.with :some_query_app, { :request_method => "GET", :path_info => /some_url/ }
|
195
|
+
@routes.should_receive(:add_route).once.with :some_query_app, { :request_method => "GET", :path_info => /some_url/ }, {}, "some_query"
|
196
196
|
|
197
197
|
@rack_adapter.route_one %r{some_url}, :some_query, {}
|
198
198
|
end
|
199
199
|
it 'should add the right route' do
|
200
|
-
@routes.should_receive(:add_route).once.with :some_query_app, { :request_method => "GET", :path_info => /some_url/ }
|
200
|
+
@routes.should_receive(:add_route).once.with :some_query_app, { :request_method => "GET", :path_info => /some_url/ }, {}, "some_query"
|
201
201
|
|
202
202
|
@rack_adapter.route_one 'some_url', :some_query, {}
|
203
203
|
end
|
204
204
|
it 'should add the right route' do
|
205
|
-
@routes.should_receive(:add_route).once.with :some_query_app, { :request_method => "GET", :glarf => :blarf, :path_info => /some_url/ }
|
205
|
+
@routes.should_receive(:add_route).once.with :some_query_app, { :request_method => "GET", :glarf => :blarf, :path_info => /some_url/ }, {}, "some_query"
|
206
206
|
|
207
207
|
@rack_adapter.route_one 'some_url', :some_query, { :glarf => :blarf }
|
208
208
|
end
|
@@ -0,0 +1,67 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Internals::Index::Redis::Basic do
|
4
|
+
|
5
|
+
let(:redis) { described_class.new "some_namespace" }
|
6
|
+
|
7
|
+
describe 'load, retrieve, backup, delete' do
|
8
|
+
it 'is nothing they do (at least on the backend)' do
|
9
|
+
redis.should_receive(:backend).never
|
10
|
+
|
11
|
+
redis.load
|
12
|
+
redis.retrieve
|
13
|
+
redis.backup
|
14
|
+
redis.delete
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
describe 'cache_small?' do
|
19
|
+
context 'size 0' do
|
20
|
+
before(:each) do
|
21
|
+
redis.stub! :size => 0
|
22
|
+
end
|
23
|
+
it 'is small' do
|
24
|
+
redis.cache_small?.should == true
|
25
|
+
end
|
26
|
+
end
|
27
|
+
context 'size 1' do
|
28
|
+
before(:each) do
|
29
|
+
redis.stub! :size => 1
|
30
|
+
end
|
31
|
+
it 'is not small' do
|
32
|
+
redis.cache_small?.should == false
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
describe 'cache_ok?' do
|
38
|
+
context 'size 0' do
|
39
|
+
before(:each) do
|
40
|
+
redis.stub! :size => 0
|
41
|
+
end
|
42
|
+
it 'is not ok' do
|
43
|
+
redis.cache_ok?.should == false
|
44
|
+
end
|
45
|
+
end
|
46
|
+
context 'size 1' do
|
47
|
+
before(:each) do
|
48
|
+
redis.stub! :size => 1
|
49
|
+
end
|
50
|
+
it 'is ok' do
|
51
|
+
redis.cache_ok?.should == true
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
describe "size" do
|
57
|
+
it 'delegates to the backend' do
|
58
|
+
backend = stub :backend
|
59
|
+
redis.stub! :backend => backend
|
60
|
+
|
61
|
+
backend.should_receive(:dbsize).once.with
|
62
|
+
|
63
|
+
redis.size
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
end
|
@@ -40,7 +40,7 @@ describe Indexers::Serial do
|
|
40
40
|
|
41
41
|
describe "indexing_message" do
|
42
42
|
it "informs the user about what it is going to index" do
|
43
|
-
@indexer.should_receive(:timed_exclaim).once.with '
|
43
|
+
@indexer.should_receive(:timed_exclaim).once.with '"some_index:some_category": Starting indexing.'
|
44
44
|
|
45
45
|
@indexer.indexing_message
|
46
46
|
end
|
@@ -26,18 +26,6 @@ describe Internals::Results::Base do
|
|
26
26
|
end
|
27
27
|
end
|
28
28
|
|
29
|
-
describe "random_ids" do
|
30
|
-
before(:each) do
|
31
|
-
@allocations = stub :allocations
|
32
|
-
@results = described_class.new :unimportant, @allocations
|
33
|
-
end
|
34
|
-
it "delegates" do
|
35
|
-
@allocations.should_receive(:random_ids).once.with :anything
|
36
|
-
|
37
|
-
@results.random_ids :anything
|
38
|
-
end
|
39
|
-
end
|
40
|
-
|
41
29
|
describe 'to_log' do
|
42
30
|
before(:each) do
|
43
31
|
time = stub :time, :to_s => '0-08-16 10:07:33'
|
@@ -12,11 +12,38 @@ describe Internals::Tokenizers::Base do
|
|
12
12
|
it 'rejects tokens that are called :hello' do
|
13
13
|
tokenizer.reject([:hel, :hell, :hello]).should == [:hel, :hell]
|
14
14
|
end
|
15
|
+
describe 'to_s' do
|
16
|
+
it 'does something' do
|
17
|
+
tokenizer.to_s.should == <<-EXPECTED
|
18
|
+
Removes characters: -
|
19
|
+
Stopwords: -
|
20
|
+
Splits text on: /\\s/
|
21
|
+
Removes chars after split: -
|
22
|
+
Normalizes words: -
|
23
|
+
Rejects tokens? Yes, see line 8 in app/application.rb
|
24
|
+
Substitutes chars? -
|
25
|
+
EXPECTED
|
26
|
+
end
|
27
|
+
end
|
15
28
|
end
|
16
29
|
|
17
30
|
context 'with normal instance' do
|
18
31
|
let(:tokenizer) { described_class.new }
|
19
|
-
|
32
|
+
|
33
|
+
describe 'to_s' do
|
34
|
+
it 'does something' do
|
35
|
+
tokenizer.to_s.should == <<-EXPECTED
|
36
|
+
Removes characters: -
|
37
|
+
Stopwords: -
|
38
|
+
Splits text on: /\\s/
|
39
|
+
Removes chars after split: -
|
40
|
+
Normalizes words: -
|
41
|
+
Rejects tokens? -
|
42
|
+
Substitutes chars? -
|
43
|
+
EXPECTED
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
20
47
|
describe 'reject_token_if' do
|
21
48
|
it 'rejects empty tokens by default' do
|
22
49
|
tokenizer.reject(['a', nil, '', 'b']).should == ['a', 'b']
|
@@ -32,6 +59,9 @@ describe Internals::Tokenizers::Base do
|
|
32
59
|
it "doesn't substitute if there is no substituter" do
|
33
60
|
tokenizer.substitute_characters('abcdefghijklmnopqrstuvwxyzäöü').should == 'abcdefghijklmnopqrstuvwxyzäöü'
|
34
61
|
end
|
62
|
+
it 'raises if nothing with #substitute is given' do
|
63
|
+
expect { tokenizer.substitutes_characters_with Object.new }.to raise_error("The substitutes_characters_with option needs a character substituter, which responds to #substitute.")
|
64
|
+
end
|
35
65
|
it "uses the substituter to replace characters" do
|
36
66
|
tokenizer.substitutes_characters_with CharacterSubstituters::WestEuropean.new
|
37
67
|
|
@@ -45,6 +75,9 @@ describe Internals::Tokenizers::Base do
|
|
45
75
|
end
|
46
76
|
|
47
77
|
describe "removes_characters_after_splitting" do
|
78
|
+
it 'handles broken arguments' do
|
79
|
+
expect { tokenizer.removes_characters_after_splitting("gnorf") }.to raise_error(ArgumentError)
|
80
|
+
end
|
48
81
|
context "without removes_characters_after_splitting called" do
|
49
82
|
it "has remove_after_normalizing_illegals" do
|
50
83
|
expect { tokenizer.remove_after_normalizing_illegals('any') }.to_not raise_error
|
@@ -69,6 +102,9 @@ describe Internals::Tokenizers::Base do
|
|
69
102
|
end
|
70
103
|
|
71
104
|
describe "normalizes_words" do
|
105
|
+
it 'handles broken arguments' do
|
106
|
+
expect { tokenizer.normalizes_words(:not_an_array) }.to raise_error(ArgumentError)
|
107
|
+
end
|
72
108
|
context "without normalizes_words called" do
|
73
109
|
it "has normalize_with_patterns" do
|
74
110
|
expect { tokenizer.normalize_with_patterns('any') }.to_not raise_error
|
@@ -96,6 +132,12 @@ describe Internals::Tokenizers::Base do
|
|
96
132
|
end
|
97
133
|
|
98
134
|
describe "splits_text_on" do
|
135
|
+
it 'handles nonbroken arguments' do
|
136
|
+
expect { tokenizer.splits_text_on("hello") }.to_not raise_error(ArgumentError)
|
137
|
+
end
|
138
|
+
it 'handles broken arguments' do
|
139
|
+
expect { tokenizer.splits_text_on(:gnorf) }.to raise_error(ArgumentError)
|
140
|
+
end
|
99
141
|
context "without splits_text_on called" do
|
100
142
|
it "has split" do
|
101
143
|
lambda { tokenizer.split('any') }.should_not raise_error
|
@@ -121,6 +163,9 @@ describe Internals::Tokenizers::Base do
|
|
121
163
|
end
|
122
164
|
|
123
165
|
describe "removes_characters" do
|
166
|
+
it 'handles broken arguments' do
|
167
|
+
expect { tokenizer.removes_characters("hello") }.to raise_error(ArgumentError)
|
168
|
+
end
|
124
169
|
context "without removes_characters called" do
|
125
170
|
it "has remove_illegals" do
|
126
171
|
expect { tokenizer.remove_illegals('any') }.to_not raise_error
|
@@ -145,6 +190,9 @@ describe Internals::Tokenizers::Base do
|
|
145
190
|
end
|
146
191
|
|
147
192
|
describe 'stopwords' do
|
193
|
+
it 'handles broken arguments' do
|
194
|
+
expect { tokenizer.stopwords("hello") }.to raise_error(ArgumentError)
|
195
|
+
end
|
148
196
|
context 'without stopwords given' do
|
149
197
|
it 'should define a method remove_stopwords' do
|
150
198
|
lambda { tokenizer.remove_stopwords('from this text') }.should_not raise_error
|
@@ -179,62 +179,6 @@ describe Internals::Query::Allocations do
|
|
179
179
|
end
|
180
180
|
end
|
181
181
|
|
182
|
-
describe 'random_ids' do
|
183
|
-
context 'enough ids' do
|
184
|
-
before(:each) do
|
185
|
-
@allocation1 = stub :allocation1, :ids => [1, 2, 3]
|
186
|
-
@allocation2 = stub :allocation2, :ids => [4, 5, 6, 7]
|
187
|
-
@allocations = described_class.new [@allocation1, @allocation2]
|
188
|
-
end
|
189
|
-
it 'should return one random id from the first allocations by default' do
|
190
|
-
@allocations.random_ids.size.should == 1
|
191
|
-
end
|
192
|
-
it 'should return multiple random ids from the first allocation' do
|
193
|
-
@allocations.random_ids(5).size.should == 3
|
194
|
-
end
|
195
|
-
it "should return one random id from the first allocation's ids" do
|
196
|
-
(1..7).to_a.should include(@allocations.random_ids.first)
|
197
|
-
end
|
198
|
-
it 'should not contain the same id twice' do
|
199
|
-
20.times do
|
200
|
-
@allocations.random_ids(2).uniq.size.should_not == 1
|
201
|
-
end
|
202
|
-
end
|
203
|
-
end
|
204
|
-
context 'just one id' do
|
205
|
-
before(:each) do
|
206
|
-
@allocation1 = stub :allocation1, :ids => [1]
|
207
|
-
@allocation2 = stub :allocation2, :ids => []
|
208
|
-
@allocations = described_class.new [@allocation1, @allocation2]
|
209
|
-
end
|
210
|
-
it 'should return one random id from its allocations by default' do
|
211
|
-
@allocations.random_ids.size.should == 1
|
212
|
-
end
|
213
|
-
it 'should return multiple random ids from its allocations' do
|
214
|
-
@allocations.random_ids(5).size.should == 1
|
215
|
-
end
|
216
|
-
it 'should return one random id from the allocations ids' do
|
217
|
-
@allocations.random_ids.should == [1]
|
218
|
-
end
|
219
|
-
end
|
220
|
-
context 'no id' do
|
221
|
-
before(:each) do
|
222
|
-
@allocation1 = stub :allocation1, :ids => []
|
223
|
-
@allocation2 = stub :allocation2, :ids => []
|
224
|
-
@allocations = described_class.new [@allocation1, @allocation2]
|
225
|
-
end
|
226
|
-
it 'should return one random id from its allocations by default' do
|
227
|
-
@allocations.random_ids.size.should == 0
|
228
|
-
end
|
229
|
-
it 'should return no random id from its allocations' do
|
230
|
-
@allocations.random_ids(5).size.should == 0
|
231
|
-
end
|
232
|
-
it 'should return no random id from the allocations ids' do
|
233
|
-
@allocations.random_ids.should == []
|
234
|
-
end
|
235
|
-
end
|
236
|
-
end
|
237
|
-
|
238
182
|
describe 'to_result' do
|
239
183
|
context 'all allocations have results' do
|
240
184
|
before(:each) do
|