picky 1.2.3 → 1.2.4
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/picky/application.rb +1 -1
- data/lib/picky/cli.rb +6 -5
- data/lib/picky/extensions/array.rb +0 -2
- data/lib/picky/indexed/categories.rb +1 -3
- data/lib/picky/indexed/category.rb +1 -1
- data/lib/picky/indexed/index.rb +3 -1
- data/lib/picky/indexed/wrappers/exact_first.rb +1 -1
- data/lib/picky/indexes_api.rb +1 -1
- data/lib/picky/indexing/indexes.rb +1 -1
- data/lib/picky/loader.rb +0 -2
- data/lib/picky/query/allocations.rb +0 -1
- data/lib/picky/signals.rb +1 -4
- data/lib/picky/tokenizers/base.rb +13 -6
- data/lib/picky/tokenizers/index.rb +0 -13
- data/lib/picky/tokenizers/query.rb +1 -1
- data/spec/lib/application_spec.rb +9 -2
- data/spec/lib/cli_spec.rb +20 -0
- data/spec/lib/configuration/index_spec.rb +5 -5
- data/spec/lib/index/files_spec.rb +4 -4
- data/spec/lib/indexed/bundle_spec.rb +4 -4
- data/spec/lib/indexed/index_spec.rb +19 -2
- data/spec/lib/indexing/indexes_spec.rb +36 -0
- data/spec/lib/sources/db_spec.rb +1 -1
- data/spec/lib/tokenizers/base_spec.rb +164 -138
- metadata +5 -9
- data/lib/picky/helpers/cache.rb +0 -25
- data/lib/picky/helpers/gc.rb +0 -13
- data/spec/lib/helpers/cache_spec.rb +0 -35
- data/spec/lib/helpers/gc_spec.rb +0 -71
data/lib/picky/application.rb
CHANGED
@@ -179,7 +179,7 @@ class Application
|
|
179
179
|
# * source: The source the data comes from. See Sources::Base. # TODO Sources (all).
|
180
180
|
#
|
181
181
|
# Options:
|
182
|
-
# * result_identifier:
|
182
|
+
# * result_identifier: Use if you'd like a different identifier/name in the results JSON than the name of the index.
|
183
183
|
#
|
184
184
|
def index name, source, options = {}
|
185
185
|
IndexAPI.new name, source, options
|
data/lib/picky/cli.rb
CHANGED
@@ -9,10 +9,13 @@ module Picky
|
|
9
9
|
# Note: By default, help is displayed. I.e. when no command is given.
|
10
10
|
#
|
11
11
|
def execute selector = nil, *args
|
12
|
-
executor_class, *params =
|
12
|
+
executor_class, *params = executor_class_for selector
|
13
13
|
executor = executor_class.new
|
14
14
|
executor.execute selector, args, params
|
15
15
|
end
|
16
|
+
def executor_class_for selector = nil
|
17
|
+
selector && @@mapping[selector.to_sym] || [Help]
|
18
|
+
end
|
16
19
|
|
17
20
|
class Base
|
18
21
|
def usage name, params
|
@@ -63,12 +66,10 @@ module Picky
|
|
63
66
|
|
64
67
|
# Maps commands to the other gem's command.
|
65
68
|
#
|
66
|
-
# TODO Add optional params.
|
67
|
-
#
|
68
69
|
@@mapping = {
|
69
|
-
:generate => [Generate, '
|
70
|
+
:generate => [Generate, 'sinatra_client | unicorn_server | empty_unicorn_server', 'app_directory_name (optional)'],
|
70
71
|
:help => [Help],
|
71
|
-
:stats => [Statistics, '
|
72
|
+
:stats => [Statistics, 'logfile, e.g. log/search.log', 'port (optional)']
|
72
73
|
}
|
73
74
|
def self.mapping
|
74
75
|
@@mapping
|
@@ -60,13 +60,11 @@ module Indexed
|
|
60
60
|
|
61
61
|
# Returns possible Combinations for the token.
|
62
62
|
#
|
63
|
-
# The
|
63
|
+
# The preselected_categories param is an optimization.
|
64
64
|
#
|
65
65
|
# TODO Return [RemovedCategory(token, nil)]
|
66
66
|
# If the search is ...
|
67
67
|
#
|
68
|
-
# TODO Make categories also a collection class.
|
69
|
-
#
|
70
68
|
# TODO Return [] if not ok, nil if needs to be removed?
|
71
69
|
# Somehow unnice, but…
|
72
70
|
#
|
data/lib/picky/indexed/index.rb
CHANGED
@@ -18,7 +18,7 @@ module Indexed
|
|
18
18
|
@categories = Categories.new ignore_unassigned_tokens: ignore_unassigned_tokens
|
19
19
|
end
|
20
20
|
|
21
|
-
# TODO
|
21
|
+
# TODO Doc.
|
22
22
|
#
|
23
23
|
def define_category category_name, options = {}
|
24
24
|
new_category = Category.new category_name, self, options
|
@@ -26,7 +26,9 @@ module Indexed
|
|
26
26
|
new_category
|
27
27
|
end
|
28
28
|
|
29
|
+
# Return the possible combinations for this token.
|
29
30
|
#
|
31
|
+
# A combination is a tuple <token, index_bundle>.
|
30
32
|
#
|
31
33
|
def possible_combinations token
|
32
34
|
categories.possible_combinations_for token
|
data/lib/picky/indexes_api.rb
CHANGED
data/lib/picky/loader.rb
CHANGED
data/lib/picky/signals.rb
CHANGED
@@ -82,6 +82,17 @@ module Tokenizers # :nodoc:all
|
|
82
82
|
substituter?? substituter.substitute(text) : text
|
83
83
|
end
|
84
84
|
|
85
|
+
# Reject tokens after tokenizing based on the given criteria.
|
86
|
+
#
|
87
|
+
# Note: Currently only for indexing. TODO Redesign and write for both!
|
88
|
+
#
|
89
|
+
def reject_token_if &condition
|
90
|
+
@reject_condition = condition
|
91
|
+
end
|
92
|
+
def reject tokens
|
93
|
+
tokens.reject! &@reject_condition
|
94
|
+
end
|
95
|
+
|
85
96
|
|
86
97
|
# Returns a number of tokens, generated from the given text.
|
87
98
|
#
|
@@ -111,6 +122,7 @@ module Tokenizers # :nodoc:all
|
|
111
122
|
# Defaults.
|
112
123
|
#
|
113
124
|
splits_text_on options[:splits_text_on] || /\s/
|
125
|
+
reject_token_if &(options[:reject_token_if] || :blank?)
|
114
126
|
end
|
115
127
|
|
116
128
|
# Hooks.
|
@@ -125,15 +137,10 @@ module Tokenizers # :nodoc:all
|
|
125
137
|
# Postprocessing.
|
126
138
|
#
|
127
139
|
def process tokens
|
128
|
-
reject tokens
|
140
|
+
reject tokens # Reject any tokens that don't meet criteria
|
129
141
|
tokens
|
130
142
|
end
|
131
143
|
|
132
|
-
# Rejects blank tokens.
|
133
|
-
#
|
134
|
-
def reject tokens
|
135
|
-
tokens.reject! &:blank?
|
136
|
-
end
|
137
144
|
# Converts words into real tokens.
|
138
145
|
#
|
139
146
|
def tokens_for words
|
@@ -39,8 +39,6 @@ module Tokenizers
|
|
39
39
|
# 1. Split the text into words.
|
40
40
|
# 2. Normalize each word.
|
41
41
|
#
|
42
|
-
# TODO Rename into wordize? Or somesuch?
|
43
|
-
#
|
44
42
|
def pretokenize text
|
45
43
|
words = split text
|
46
44
|
words.collect! do |word|
|
@@ -56,16 +54,5 @@ module Tokenizers
|
|
56
54
|
symbolize text
|
57
55
|
end
|
58
56
|
|
59
|
-
# Rejects tokens if they are too short (or blank).
|
60
|
-
#
|
61
|
-
# Override in subclasses to redefine behaviour.
|
62
|
-
#
|
63
|
-
# TODO TODO TODO Make parametrizable! reject { |token| }
|
64
|
-
#
|
65
|
-
def reject tokens
|
66
|
-
tokens.reject! &:blank?
|
67
|
-
# tokens.reject! { |token| token.to_s.size < 2 }
|
68
|
-
end
|
69
|
-
|
70
57
|
end
|
71
58
|
end
|
@@ -57,7 +57,7 @@ module Tokenizers
|
|
57
57
|
# TODO Perhaps move to Normalizer?
|
58
58
|
#
|
59
59
|
def normalize text
|
60
|
-
text = substitute_characters text # Substitute special characters
|
60
|
+
text = substitute_characters text # Substitute special characters
|
61
61
|
text.downcase! # Downcase all text
|
62
62
|
normalize_with_patterns text # normalize
|
63
63
|
text.to_sym # symbolize
|
@@ -24,13 +24,15 @@ describe Application do
|
|
24
24
|
end
|
25
25
|
it "should run ok" do
|
26
26
|
lambda {
|
27
|
-
#
|
27
|
+
# Here we just test if the API can be called ok.
|
28
28
|
#
|
29
29
|
class TestApplication < Application
|
30
30
|
default_indexing removes_characters: /[^a-zA-Z0-9\s\/\-\"\&\.]/,
|
31
31
|
stopwords: /\b(and|the|of|it|in|for)\b/,
|
32
32
|
splits_text_on: /[\s\/\-\"\&\.]/,
|
33
|
-
removes_characters_after_splitting: /[\.]
|
33
|
+
removes_characters_after_splitting: /[\.]/,
|
34
|
+
normalizes_words: [[/\$(\w+)/i, '\1 dollars']],
|
35
|
+
reject_token_if: lambda { |token| token.blank? || token == :amistad }
|
34
36
|
|
35
37
|
default_querying removes_characters: /[^a-zA-Z0-9äöü\s\/\-\,\&\"\~\*\:]/,
|
36
38
|
stopwords: /\b(and|the|of|it|in|for)\b/,
|
@@ -48,6 +50,11 @@ describe Application do
|
|
48
50
|
books_index.define_category :isbn,
|
49
51
|
partial: Partial::None.new # Partially searching on an ISBN makes not much sense.
|
50
52
|
|
53
|
+
geo_index = index :geo, Sources::CSV.new(:location, :north, :east, file: 'data/ch.csv', col_sep: ',')
|
54
|
+
geo_index.define_category :location
|
55
|
+
geo_index.define_ranged_category(:north1, 1, precision: 3, from: :north)
|
56
|
+
.define_ranged_category(:east1, 1, precision: 3, from: :east)
|
57
|
+
|
51
58
|
full = Query::Full.new books_index
|
52
59
|
live = Query::Live.new books_index
|
53
60
|
|
data/spec/lib/cli_spec.rb
CHANGED
@@ -10,6 +10,26 @@ require File.expand_path '../../../lib/picky/cli', __FILE__
|
|
10
10
|
#
|
11
11
|
describe Picky::CLI do
|
12
12
|
|
13
|
+
describe 'instance' do
|
14
|
+
before(:each) do
|
15
|
+
@cli = Picky::CLI.new
|
16
|
+
end
|
17
|
+
describe 'executor_class_for' do
|
18
|
+
it 'returns Help by default' do
|
19
|
+
@cli.executor_class_for.should == [Picky::CLI::Help]
|
20
|
+
end
|
21
|
+
it 'returns Generator for generate' do
|
22
|
+
@cli.executor_class_for(:generate).should == [Picky::CLI::Generate, "sinatra_client | unicorn_server | empty_unicorn_server", "app_directory_name (optional)"]
|
23
|
+
end
|
24
|
+
it 'returns Help for help' do
|
25
|
+
@cli.executor_class_for(:help).should == [Picky::CLI::Help]
|
26
|
+
end
|
27
|
+
it 'returns Statistics for stats' do
|
28
|
+
@cli.executor_class_for(:stats).should == [Picky::CLI::Statistics, "logfile, e.g. log/search.log", "port (optional)"]
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
13
33
|
describe Picky::CLI::Base do
|
14
34
|
before(:each) do
|
15
35
|
@executor = Picky::CLI::Base.new
|
@@ -24,7 +24,7 @@ describe 'Configuration::Index' do
|
|
24
24
|
@config.index_path(:some_bundle, :some_name).should_not equal(@config.index_path(:some_bundle, :some_name))
|
25
25
|
end
|
26
26
|
it "returns the right thing" do
|
27
|
-
@config.index_path(:some_bundle, :some_name).should == '
|
27
|
+
@config.index_path(:some_bundle, :some_name).should == 'spec/test_directory/index/test/some_index/some_category_some_bundle_some_name'
|
28
28
|
end
|
29
29
|
end
|
30
30
|
|
@@ -50,7 +50,7 @@ describe 'Configuration::Index' do
|
|
50
50
|
@config.index_root.should equal(@config.index_root)
|
51
51
|
end
|
52
52
|
it "returns the right thing" do
|
53
|
-
@config.index_root.should == '
|
53
|
+
@config.index_root.should == 'spec/test_directory/index'
|
54
54
|
end
|
55
55
|
end
|
56
56
|
describe "index_directory" do
|
@@ -58,7 +58,7 @@ describe 'Configuration::Index' do
|
|
58
58
|
@config.index_directory.should equal(@config.index_directory)
|
59
59
|
end
|
60
60
|
it "returns the right thing" do
|
61
|
-
@config.index_directory.should == '
|
61
|
+
@config.index_directory.should == 'spec/test_directory/index/test/some_index'
|
62
62
|
end
|
63
63
|
end
|
64
64
|
describe "prepared_index_path" do
|
@@ -66,12 +66,12 @@ describe 'Configuration::Index' do
|
|
66
66
|
@config.prepared_index_path.should equal(@config.prepared_index_path)
|
67
67
|
end
|
68
68
|
it "returns the right thing" do
|
69
|
-
@config.prepared_index_path.should == '
|
69
|
+
@config.prepared_index_path.should == 'spec/test_directory/index/test/some_index/prepared_some_category_index'
|
70
70
|
end
|
71
71
|
end
|
72
72
|
describe "prepare_index_directory" do
|
73
73
|
it "calls the right thing" do
|
74
|
-
FileUtils.should_receive(:mkdir_p).once.with '
|
74
|
+
FileUtils.should_receive(:mkdir_p).once.with 'spec/test_directory/index/test/some_index'
|
75
75
|
|
76
76
|
@config.prepare_index_directory
|
77
77
|
end
|
@@ -64,7 +64,7 @@ describe Index::Files do
|
|
64
64
|
it "uses the right file" do
|
65
65
|
Yajl::Parser.stub! :parse
|
66
66
|
|
67
|
-
File.should_receive(:open).once.with '
|
67
|
+
File.should_receive(:open).once.with 'spec/test_directory/index/test/some_index/some_category_some_name_index.json', 'r'
|
68
68
|
|
69
69
|
@files.load_index
|
70
70
|
end
|
@@ -73,7 +73,7 @@ describe Index::Files do
|
|
73
73
|
it "uses the right file" do
|
74
74
|
Yajl::Parser.stub! :parse
|
75
75
|
|
76
|
-
File.should_receive(:open).once.with '
|
76
|
+
File.should_receive(:open).once.with 'spec/test_directory/index/test/some_index/some_category_some_name_weights.json', 'r'
|
77
77
|
|
78
78
|
@files.load_weights
|
79
79
|
end
|
@@ -82,7 +82,7 @@ describe Index::Files do
|
|
82
82
|
it "uses the right file" do
|
83
83
|
Marshal.stub! :load
|
84
84
|
|
85
|
-
File.should_receive(:open).once.with '
|
85
|
+
File.should_receive(:open).once.with 'spec/test_directory/index/test/some_index/some_category_some_name_similarity.dump', 'r:binary'
|
86
86
|
|
87
87
|
@files.load_similarity
|
88
88
|
end
|
@@ -91,7 +91,7 @@ describe Index::Files do
|
|
91
91
|
it "uses the right file" do
|
92
92
|
Yajl::Parser.stub! :parse
|
93
93
|
|
94
|
-
File.should_receive(:open).once.with '
|
94
|
+
File.should_receive(:open).once.with 'spec/test_directory/index/test/some_index/some_category_some_name_configuration.json', 'r'
|
95
95
|
|
96
96
|
@files.load_configuration
|
97
97
|
end
|
@@ -60,7 +60,7 @@ describe Indexed::Bundle do
|
|
60
60
|
it "uses the right file" do
|
61
61
|
Yajl::Parser.stub! :parse
|
62
62
|
|
63
|
-
File.should_receive(:open).once.with '
|
63
|
+
File.should_receive(:open).once.with 'spec/test_directory/index/test/some_index/some_category_some_name_index.json', 'r'
|
64
64
|
|
65
65
|
@bundle.load_index
|
66
66
|
end
|
@@ -69,7 +69,7 @@ describe Indexed::Bundle do
|
|
69
69
|
it "uses the right file" do
|
70
70
|
Yajl::Parser.stub! :parse
|
71
71
|
|
72
|
-
File.should_receive(:open).once.with '
|
72
|
+
File.should_receive(:open).once.with 'spec/test_directory/index/test/some_index/some_category_some_name_weights.json', 'r'
|
73
73
|
|
74
74
|
@bundle.load_weights
|
75
75
|
end
|
@@ -78,7 +78,7 @@ describe Indexed::Bundle do
|
|
78
78
|
it "uses the right file" do
|
79
79
|
Marshal.stub! :load
|
80
80
|
|
81
|
-
File.should_receive(:open).once.with '
|
81
|
+
File.should_receive(:open).once.with 'spec/test_directory/index/test/some_index/some_category_some_name_similarity.dump', 'r:binary'
|
82
82
|
|
83
83
|
@bundle.load_similarity
|
84
84
|
end
|
@@ -87,7 +87,7 @@ describe Indexed::Bundle do
|
|
87
87
|
it "uses the right file" do
|
88
88
|
Yajl::Parser.stub! :parse
|
89
89
|
|
90
|
-
File.should_receive(:open).once.with '
|
90
|
+
File.should_receive(:open).once.with 'spec/test_directory/index/test/some_index/some_category_some_name_configuration.json', 'r'
|
91
91
|
|
92
92
|
@bundle.load_configuration
|
93
93
|
end
|
@@ -2,7 +2,24 @@ require 'spec_helper'
|
|
2
2
|
|
3
3
|
describe Indexed::Index do
|
4
4
|
|
5
|
-
context
|
5
|
+
context 'without stubbed categories' do
|
6
|
+
before(:each) do
|
7
|
+
@index = Indexed::Index.new :some_index_name
|
8
|
+
end
|
9
|
+
|
10
|
+
describe 'define_category' do
|
11
|
+
it 'adds a new category to the categories' do
|
12
|
+
@index.define_category :some_category_name
|
13
|
+
|
14
|
+
@index.categories.categories.size.should == 1
|
15
|
+
end
|
16
|
+
it 'returns the new category' do
|
17
|
+
@index.define_category(:some_category_name).should be_kind_of(Indexed::Category)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
context "with stubbed categories" do
|
6
23
|
before(:each) do
|
7
24
|
@categories = stub :categories
|
8
25
|
|
@@ -12,7 +29,7 @@ describe Indexed::Index do
|
|
12
29
|
|
13
30
|
@index.stub! :categories => @categories
|
14
31
|
end
|
15
|
-
|
32
|
+
|
16
33
|
describe "load_from_cache" do
|
17
34
|
it "delegates to each category" do
|
18
35
|
@categories.should_receive(:load_from_cache).once.with
|
@@ -0,0 +1,36 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Indexing::Indexes do
|
4
|
+
|
5
|
+
before(:each) do
|
6
|
+
@indexes = Indexing::Indexes.new
|
7
|
+
end
|
8
|
+
|
9
|
+
describe 'indexes' do
|
10
|
+
it 'exists' do
|
11
|
+
lambda { @indexes.indexes }.should_not raise_error
|
12
|
+
end
|
13
|
+
it 'is empty by default' do
|
14
|
+
@indexes.indexes.should be_empty
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
describe 'clear' do
|
19
|
+
it 'clears the indexes' do
|
20
|
+
@indexes.register :some_index
|
21
|
+
|
22
|
+
@indexes.clear
|
23
|
+
|
24
|
+
@indexes.indexes.should == []
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
describe 'register' do
|
29
|
+
it 'adds the given index to the indexes' do
|
30
|
+
@indexes.register :some_index
|
31
|
+
|
32
|
+
@indexes.indexes.should == [:some_index]
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
end
|
data/spec/lib/sources/db_spec.rb
CHANGED
@@ -53,7 +53,7 @@ describe Sources::DB do
|
|
53
53
|
end
|
54
54
|
context "with file" do
|
55
55
|
it "opens the config file relative to root" do
|
56
|
-
File.should_receive(:open).once.with '
|
56
|
+
File.should_receive(:open).once.with 'spec/test_directory/app/bla.yml'
|
57
57
|
|
58
58
|
@source.configure :file => 'app/bla.yml'
|
59
59
|
end
|
@@ -3,167 +3,193 @@
|
|
3
3
|
require 'spec_helper'
|
4
4
|
|
5
5
|
describe Tokenizers::Base do
|
6
|
-
|
7
|
-
before(:each) do
|
8
|
-
@tokenizer = Tokenizers::Base.new
|
9
|
-
end
|
10
6
|
|
11
|
-
|
12
|
-
|
13
|
-
@tokenizer.
|
14
|
-
end
|
15
|
-
it "uses the substituter to replace characters" do
|
16
|
-
@tokenizer.substitutes_characters_with CharacterSubstituters::WestEuropean.new
|
17
|
-
|
18
|
-
@tokenizer.substitute_characters('abcdefghijklmnopqrstuvwxyzäöü').should == 'abcdefghijklmnopqrstuvwxyzaeoeue'
|
7
|
+
context 'with special instance' do
|
8
|
+
before(:each) do
|
9
|
+
@tokenizer = Tokenizers::Base.new reject_token_if: lambda { |token| token.to_s.length < 2 || token == :hello }
|
19
10
|
end
|
20
|
-
it
|
21
|
-
@tokenizer.
|
22
|
-
|
23
|
-
@tokenizer.substitute_characters('abcdefghijklmnopqrstuvwxyzäöü').should == 'abcdefghijklmnopqrstuvwxyzaeoeue'
|
24
|
-
end
|
25
|
-
end
|
26
|
-
|
27
|
-
describe "removes_characters_after_splitting" do
|
28
|
-
context "without removes_characters_after_splitting called" do
|
29
|
-
it "has remove_after_normalizing_illegals" do
|
30
|
-
lambda { @tokenizer.remove_after_normalizing_illegals('any') }.should_not raise_error
|
31
|
-
end
|
32
|
-
it 'should define a remove_after_normalizing_illegals normalize_with_patterns does nothing' do
|
33
|
-
unchanging = stub :unchanging
|
34
|
-
@tokenizer.remove_after_normalizing_illegals unchanging
|
35
|
-
end
|
11
|
+
it 'rejects tokens with length < 2' do
|
12
|
+
@tokenizer.reject([:'', :a, :ab, :abc]).should == [:ab, :abc]
|
36
13
|
end
|
37
|
-
|
38
|
-
|
39
|
-
@tokenizer.removes_characters_after_splitting(/[afo]/)
|
40
|
-
end
|
41
|
-
it "has remove_after_normalizing_illegals" do
|
42
|
-
lambda { @tokenizer.remove_after_normalizing_illegals('abcdefghijklmnop') }.should_not raise_error
|
43
|
-
end
|
44
|
-
it "removes illegal characters" do
|
45
|
-
@tokenizer.remove_after_normalizing_illegals('abcdefghijklmnop').should == 'bcdeghijklmnp'
|
46
|
-
end
|
14
|
+
it 'rejects tokens that are called :hello' do
|
15
|
+
@tokenizer.reject([:hel, :hell, :hello]).should == [:hel, :hell]
|
47
16
|
end
|
48
17
|
end
|
49
18
|
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
lambda { @tokenizer.normalize_with_patterns('any') }.should_not raise_error
|
54
|
-
end
|
55
|
-
it 'should define a method normalize_with_patterns does nothing' do
|
56
|
-
unchanging = stub :unchanging
|
57
|
-
@tokenizer.normalize_with_patterns(unchanging).should == unchanging
|
58
|
-
end
|
59
|
-
end
|
60
|
-
context "with normalizes_words called" do
|
61
|
-
before(:each) do
|
62
|
-
@tokenizer.normalizes_words([
|
63
|
-
[/st\./, 'sankt'],
|
64
|
-
[/stras?s?e?/, 'str']
|
65
|
-
])
|
66
|
-
end
|
67
|
-
it "has normalize_with_patterns" do
|
68
|
-
lambda { @tokenizer.normalize_with_patterns('a b/c.d') }.should_not raise_error
|
69
|
-
end
|
70
|
-
it "normalizes, but just the first one" do
|
71
|
-
@tokenizer.normalize_with_patterns('st. wegstrasse').should == 'sankt wegstrasse'
|
72
|
-
end
|
19
|
+
context 'with normal instance' do
|
20
|
+
before(:each) do
|
21
|
+
@tokenizer = Tokenizers::Base.new
|
73
22
|
end
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
it "has split" do
|
79
|
-
lambda { @tokenizer.split('any') }.should_not raise_error
|
80
|
-
end
|
81
|
-
it 'should define a method split that splits by default on \s' do
|
82
|
-
@tokenizer.split('a b/c.d').should == ['a', 'b/c.d']
|
23
|
+
|
24
|
+
describe 'reject_token_if' do
|
25
|
+
it 'rejects empty tokens by default' do
|
26
|
+
@tokenizer.reject(['a', nil, '', 'b']).should == ['a', 'b']
|
83
27
|
end
|
84
|
-
it '
|
85
|
-
@tokenizer.
|
28
|
+
it 'rejects tokens based on the given rejection criteria if set' do
|
29
|
+
@tokenizer.reject_token_if &:nil?
|
30
|
+
|
31
|
+
@tokenizer.reject(['a', nil, '', 'b']).should == ['a', '', 'b']
|
86
32
|
end
|
87
33
|
end
|
88
|
-
|
89
|
-
|
90
|
-
|
34
|
+
|
35
|
+
describe "substitute(s)_characters*" do
|
36
|
+
it "doesn't substitute if there is no substituter" do
|
37
|
+
@tokenizer.substitute_characters('abcdefghijklmnopqrstuvwxyzäöü').should == 'abcdefghijklmnopqrstuvwxyzäöü'
|
91
38
|
end
|
92
|
-
it "
|
93
|
-
|
39
|
+
it "uses the substituter to replace characters" do
|
40
|
+
@tokenizer.substitutes_characters_with CharacterSubstituters::WestEuropean.new
|
41
|
+
|
42
|
+
@tokenizer.substitute_characters('abcdefghijklmnopqrstuvwxyzäöü').should == 'abcdefghijklmnopqrstuvwxyzaeoeue'
|
94
43
|
end
|
95
|
-
it "
|
96
|
-
@tokenizer.
|
44
|
+
it "uses the european substituter as default" do
|
45
|
+
@tokenizer.substitutes_characters_with
|
46
|
+
|
47
|
+
@tokenizer.substitute_characters('abcdefghijklmnopqrstuvwxyzäöü').should == 'abcdefghijklmnopqrstuvwxyzaeoeue'
|
97
48
|
end
|
98
49
|
end
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
50
|
+
|
51
|
+
describe "removes_characters_after_splitting" do
|
52
|
+
context "without removes_characters_after_splitting called" do
|
53
|
+
it "has remove_after_normalizing_illegals" do
|
54
|
+
lambda { @tokenizer.remove_after_normalizing_illegals('any') }.should_not raise_error
|
55
|
+
end
|
56
|
+
it 'should define a remove_after_normalizing_illegals normalize_with_patterns does nothing' do
|
57
|
+
unchanging = stub :unchanging
|
58
|
+
@tokenizer.remove_after_normalizing_illegals unchanging
|
59
|
+
end
|
60
|
+
end
|
61
|
+
context "with removes_characters_after_splitting called" do
|
62
|
+
before(:each) do
|
63
|
+
@tokenizer.removes_characters_after_splitting(/[afo]/)
|
64
|
+
end
|
65
|
+
it "has remove_after_normalizing_illegals" do
|
66
|
+
lambda { @tokenizer.remove_after_normalizing_illegals('abcdefghijklmnop') }.should_not raise_error
|
67
|
+
end
|
68
|
+
it "removes illegal characters" do
|
69
|
+
@tokenizer.remove_after_normalizing_illegals('abcdefghijklmnop').should == 'bcdeghijklmnp'
|
70
|
+
end
|
109
71
|
end
|
110
72
|
end
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
73
|
+
|
74
|
+
describe "normalizes_words" do
|
75
|
+
context "without normalizes_words called" do
|
76
|
+
it "has normalize_with_patterns" do
|
77
|
+
lambda { @tokenizer.normalize_with_patterns('any') }.should_not raise_error
|
78
|
+
end
|
79
|
+
it 'should define a method normalize_with_patterns does nothing' do
|
80
|
+
unchanging = stub :unchanging
|
81
|
+
@tokenizer.normalize_with_patterns(unchanging).should == unchanging
|
82
|
+
end
|
83
|
+
end
|
84
|
+
context "with normalizes_words called" do
|
85
|
+
before(:each) do
|
86
|
+
@tokenizer.normalizes_words([
|
87
|
+
[/st\./, 'sankt'],
|
88
|
+
[/stras?s?e?/, 'str']
|
89
|
+
])
|
90
|
+
end
|
91
|
+
it "has normalize_with_patterns" do
|
92
|
+
lambda { @tokenizer.normalize_with_patterns('a b/c.d') }.should_not raise_error
|
93
|
+
end
|
94
|
+
it "normalizes, but just the first one" do
|
95
|
+
@tokenizer.normalize_with_patterns('st. wegstrasse').should == 'sankt wegstrasse'
|
96
|
+
end
|
120
97
|
end
|
121
98
|
end
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
99
|
+
|
100
|
+
describe "splits_text_on" do
|
101
|
+
context "without splits_text_on called" do
|
102
|
+
it "has split" do
|
103
|
+
lambda { @tokenizer.split('any') }.should_not raise_error
|
104
|
+
end
|
105
|
+
it 'should define a method split that splits by default on \s' do
|
106
|
+
@tokenizer.split('a b/c.d').should == ['a', 'b/c.d']
|
107
|
+
end
|
108
|
+
it 'splits text on /\s/ by default' do
|
109
|
+
@tokenizer.split('this is a test').should == ['this', 'is', 'a', 'test']
|
110
|
+
end
|
111
|
+
end
|
112
|
+
context "with removes_characters called" do
|
113
|
+
before(:each) do
|
114
|
+
@tokenizer.splits_text_on(/[\s\.\/]/)
|
115
|
+
end
|
116
|
+
it "has split" do
|
117
|
+
lambda { @tokenizer.split('a b/c.d') }.should_not raise_error
|
118
|
+
end
|
119
|
+
it "removes illegal characters" do
|
120
|
+
@tokenizer.split('a b/c.d').should == ['a','b','c','d']
|
121
|
+
end
|
135
122
|
end
|
136
123
|
end
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
124
|
+
|
125
|
+
describe "removes_characters" do
|
126
|
+
context "without removes_characters called" do
|
127
|
+
it "has remove_illegals" do
|
128
|
+
lambda { @tokenizer.remove_illegals('any') }.should_not raise_error
|
129
|
+
end
|
130
|
+
it 'should define a method remove_illegals that does nothing' do
|
131
|
+
unchanging = stub :unchanging
|
132
|
+
@tokenizer.remove_illegals unchanging
|
133
|
+
end
|
134
|
+
end
|
135
|
+
context "with removes_characters called" do
|
136
|
+
before(:each) do
|
137
|
+
@tokenizer.removes_characters(/[afo]/)
|
138
|
+
end
|
139
|
+
it "has remove_illegals" do
|
140
|
+
lambda { @tokenizer.remove_illegals('abcdefghijklmnop') }.should_not raise_error
|
141
|
+
end
|
142
|
+
it "removes illegal characters" do
|
143
|
+
@tokenizer.remove_illegals('abcdefghijklmnop').should == 'bcdeghijklmnp'
|
144
|
+
end
|
155
145
|
end
|
156
146
|
end
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
147
|
+
|
148
|
+
describe 'stopwords' do
|
149
|
+
context 'without stopwords given' do
|
150
|
+
it 'should define a method remove_stopwords' do
|
151
|
+
lambda { @tokenizer.remove_stopwords('from this text') }.should_not raise_error
|
152
|
+
end
|
153
|
+
it 'should define a method remove_stopwords that does nothing' do
|
154
|
+
@tokenizer.remove_stopwords('from this text').should == 'from this text'
|
155
|
+
end
|
156
|
+
it 'should define a method remove_non_single_stopwords' do
|
157
|
+
lambda { @tokenizer.remove_non_single_stopwords('from this text') }.should_not raise_error
|
158
|
+
|
159
|
+
end
|
160
|
+
end
|
161
|
+
context 'with stopwords given' do
|
162
|
+
before(:each) do
|
163
|
+
@tokenizer.stopwords(/r|e/)
|
164
|
+
end
|
165
|
+
it 'should define a method remove_stopwords' do
|
166
|
+
lambda { @tokenizer.remove_stopwords('from this text') }.should_not raise_error
|
167
|
+
end
|
168
|
+
it 'should define a method stopwords that removes stopwords' do
|
169
|
+
@tokenizer.remove_stopwords('from this text').should == 'fom this txt'
|
170
|
+
end
|
171
|
+
it 'should define a method remove_non_single_stopwords' do
|
172
|
+
lambda { @tokenizer.remove_non_single_stopwords('from this text') }.should_not raise_error
|
173
|
+
end
|
174
|
+
it 'should define a method remove_non_single_stopwords that removes non-single stopwords' do
|
175
|
+
@tokenizer.remove_non_single_stopwords('rerere rerere').should == ' '
|
176
|
+
end
|
177
|
+
it 'should define a method remove_non_single_stopwords that does not single stopwords' do
|
178
|
+
@tokenizer.remove_non_single_stopwords('rerere').should == 'rerere'
|
179
|
+
end
|
180
|
+
end
|
181
|
+
context 'error case' do
|
182
|
+
before(:each) do
|
183
|
+
@tokenizer.stopwords(/any/)
|
184
|
+
end
|
185
|
+
it 'should not remove non-single stopwords with a star' do
|
186
|
+
@tokenizer.remove_non_single_stopwords('a*').should == 'a*'
|
187
|
+
end
|
188
|
+
it 'should not remove non-single stopwords with a tilde' do
|
189
|
+
@tokenizer.remove_non_single_stopwords('a~').should == 'a~'
|
190
|
+
end
|
166
191
|
end
|
167
192
|
end
|
168
193
|
end
|
194
|
+
|
169
195
|
end
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 1
|
7
7
|
- 2
|
8
|
-
-
|
9
|
-
version: 1.2.
|
8
|
+
- 4
|
9
|
+
version: 1.2.4
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Florian Hanke
|
@@ -14,7 +14,7 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2010-12-
|
17
|
+
date: 2010-12-30 00:00:00 +01:00
|
18
18
|
default_executable: picky
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
@@ -73,8 +73,6 @@ files:
|
|
73
73
|
- lib/picky/extensions/module.rb
|
74
74
|
- lib/picky/extensions/object.rb
|
75
75
|
- lib/picky/extensions/symbol.rb
|
76
|
-
- lib/picky/helpers/cache.rb
|
77
|
-
- lib/picky/helpers/gc.rb
|
78
76
|
- lib/picky/helpers/measuring.rb
|
79
77
|
- lib/picky/index/bundle.rb
|
80
78
|
- lib/picky/index/file/basic.rb
|
@@ -169,8 +167,6 @@ files:
|
|
169
167
|
- spec/lib/extensions/module_spec.rb
|
170
168
|
- spec/lib/extensions/object_spec.rb
|
171
169
|
- spec/lib/extensions/symbol_spec.rb
|
172
|
-
- spec/lib/helpers/cache_spec.rb
|
173
|
-
- spec/lib/helpers/gc_spec.rb
|
174
170
|
- spec/lib/helpers/measuring_spec.rb
|
175
171
|
- spec/lib/index/bundle_spec.rb
|
176
172
|
- spec/lib/index/file/basic_spec.rb
|
@@ -191,6 +187,7 @@ files:
|
|
191
187
|
- spec/lib/indexing/bundle_spec.rb
|
192
188
|
- spec/lib/indexing/category_spec.rb
|
193
189
|
- spec/lib/indexing/index_spec.rb
|
190
|
+
- spec/lib/indexing/indexes_spec.rb
|
194
191
|
- spec/lib/loader_spec.rb
|
195
192
|
- spec/lib/loggers/search_spec.rb
|
196
193
|
- spec/lib/query/allocation_spec.rb
|
@@ -276,8 +273,6 @@ test_files:
|
|
276
273
|
- spec/lib/extensions/module_spec.rb
|
277
274
|
- spec/lib/extensions/object_spec.rb
|
278
275
|
- spec/lib/extensions/symbol_spec.rb
|
279
|
-
- spec/lib/helpers/cache_spec.rb
|
280
|
-
- spec/lib/helpers/gc_spec.rb
|
281
276
|
- spec/lib/helpers/measuring_spec.rb
|
282
277
|
- spec/lib/index/bundle_spec.rb
|
283
278
|
- spec/lib/index/file/basic_spec.rb
|
@@ -298,6 +293,7 @@ test_files:
|
|
298
293
|
- spec/lib/indexing/bundle_spec.rb
|
299
294
|
- spec/lib/indexing/category_spec.rb
|
300
295
|
- spec/lib/indexing/index_spec.rb
|
296
|
+
- spec/lib/indexing/indexes_spec.rb
|
301
297
|
- spec/lib/loader_spec.rb
|
302
298
|
- spec/lib/loggers/search_spec.rb
|
303
299
|
- spec/lib/query/allocation_spec.rb
|
data/lib/picky/helpers/cache.rb
DELETED
@@ -1,25 +0,0 @@
|
|
1
|
-
# TODO Not used anymore? Remove.
|
2
|
-
#
|
3
|
-
module Helpers # :nodoc:all
|
4
|
-
|
5
|
-
module Cache
|
6
|
-
# This is a simple cache.
|
7
|
-
# The store needs to be able to answer to [] and []=.
|
8
|
-
#
|
9
|
-
def cached store, key, &block
|
10
|
-
# Get cached result
|
11
|
-
#
|
12
|
-
results = store[key]
|
13
|
-
return results if results
|
14
|
-
|
15
|
-
results = lambda(&block).call
|
16
|
-
|
17
|
-
# Store results
|
18
|
-
#
|
19
|
-
store[key] = results
|
20
|
-
|
21
|
-
results
|
22
|
-
end
|
23
|
-
end
|
24
|
-
|
25
|
-
end
|
data/lib/picky/helpers/gc.rb
DELETED
@@ -1,35 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
describe Helpers::Cache do
|
4
|
-
include Helpers::Cache
|
5
|
-
|
6
|
-
describe "#cached" do
|
7
|
-
attr_reader :store, :key
|
8
|
-
before(:each) do
|
9
|
-
@store = {}
|
10
|
-
@key = 'some key'
|
11
|
-
end
|
12
|
-
describe "not yet cached" do
|
13
|
-
it "should cache" do
|
14
|
-
store.should_receive(:[]=).once.with(@key, 'value')
|
15
|
-
cached @store, @key do
|
16
|
-
'value'
|
17
|
-
end
|
18
|
-
end
|
19
|
-
end
|
20
|
-
describe "already cached" do
|
21
|
-
before(:each) do
|
22
|
-
cached @store, @key do
|
23
|
-
'value'
|
24
|
-
end
|
25
|
-
end
|
26
|
-
it "should not cache" do
|
27
|
-
store.should_receive(:[]=).never
|
28
|
-
cached @store, @key do
|
29
|
-
'value'
|
30
|
-
end
|
31
|
-
end
|
32
|
-
end
|
33
|
-
end
|
34
|
-
|
35
|
-
end
|
data/spec/lib/helpers/gc_spec.rb
DELETED
@@ -1,71 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
describe Helpers::GC do
|
4
|
-
include Helpers::GC
|
5
|
-
|
6
|
-
before(:each) do
|
7
|
-
::GC.stub!(:disable)
|
8
|
-
::GC.stub!(:enable)
|
9
|
-
::GC.stub!(:start)
|
10
|
-
end
|
11
|
-
|
12
|
-
describe "block calling" do
|
13
|
-
it 'should call the block' do
|
14
|
-
inside_block = mock :inside
|
15
|
-
inside_block.should_receive(:call).once
|
16
|
-
|
17
|
-
disabled do
|
18
|
-
inside_block.call
|
19
|
-
end
|
20
|
-
end
|
21
|
-
it 'should call the block' do
|
22
|
-
inside_block = mock :inside
|
23
|
-
inside_block.should_receive(:call).once
|
24
|
-
|
25
|
-
gc_disabled do
|
26
|
-
inside_block.call
|
27
|
-
end
|
28
|
-
end
|
29
|
-
end
|
30
|
-
|
31
|
-
describe "gc calls" do
|
32
|
-
after(:each) do
|
33
|
-
disabled {}
|
34
|
-
end
|
35
|
-
it 'should disable the garbage collector' do
|
36
|
-
::GC.should_receive(:disable)
|
37
|
-
end
|
38
|
-
it 'should enable the garbage collector' do
|
39
|
-
::GC.should_receive(:enable)
|
40
|
-
end
|
41
|
-
it 'should start the garbage collector' do
|
42
|
-
::GC.should_receive(:start)
|
43
|
-
end
|
44
|
-
it 'should disable the gc, call the block, enable the gc and start the gc' do
|
45
|
-
::GC.should_receive(:disable).ordered
|
46
|
-
::GC.should_receive(:enable).ordered
|
47
|
-
::GC.should_receive(:start).ordered
|
48
|
-
end
|
49
|
-
end
|
50
|
-
|
51
|
-
describe "gc calls" do
|
52
|
-
after(:each) do
|
53
|
-
gc_disabled {}
|
54
|
-
end
|
55
|
-
it 'should disable the garbage collector' do
|
56
|
-
::GC.should_receive(:disable)
|
57
|
-
end
|
58
|
-
it 'should enable the garbage collector' do
|
59
|
-
::GC.should_receive(:enable)
|
60
|
-
end
|
61
|
-
it 'should start the garbage collector' do
|
62
|
-
::GC.should_receive(:start)
|
63
|
-
end
|
64
|
-
it 'should disable the gc, call the block, enable the gc and start the gc' do
|
65
|
-
::GC.should_receive(:disable).ordered
|
66
|
-
::GC.should_receive(:enable).ordered
|
67
|
-
::GC.should_receive(:start).ordered
|
68
|
-
end
|
69
|
-
end
|
70
|
-
|
71
|
-
end
|