picky 1.2.3 → 1.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/picky/application.rb +1 -1
- data/lib/picky/cli.rb +6 -5
- data/lib/picky/extensions/array.rb +0 -2
- data/lib/picky/indexed/categories.rb +1 -3
- data/lib/picky/indexed/category.rb +1 -1
- data/lib/picky/indexed/index.rb +3 -1
- data/lib/picky/indexed/wrappers/exact_first.rb +1 -1
- data/lib/picky/indexes_api.rb +1 -1
- data/lib/picky/indexing/indexes.rb +1 -1
- data/lib/picky/loader.rb +0 -2
- data/lib/picky/query/allocations.rb +0 -1
- data/lib/picky/signals.rb +1 -4
- data/lib/picky/tokenizers/base.rb +13 -6
- data/lib/picky/tokenizers/index.rb +0 -13
- data/lib/picky/tokenizers/query.rb +1 -1
- data/spec/lib/application_spec.rb +9 -2
- data/spec/lib/cli_spec.rb +20 -0
- data/spec/lib/configuration/index_spec.rb +5 -5
- data/spec/lib/index/files_spec.rb +4 -4
- data/spec/lib/indexed/bundle_spec.rb +4 -4
- data/spec/lib/indexed/index_spec.rb +19 -2
- data/spec/lib/indexing/indexes_spec.rb +36 -0
- data/spec/lib/sources/db_spec.rb +1 -1
- data/spec/lib/tokenizers/base_spec.rb +164 -138
- metadata +5 -9
- data/lib/picky/helpers/cache.rb +0 -25
- data/lib/picky/helpers/gc.rb +0 -13
- data/spec/lib/helpers/cache_spec.rb +0 -35
- data/spec/lib/helpers/gc_spec.rb +0 -71
data/lib/picky/application.rb
CHANGED
@@ -179,7 +179,7 @@ class Application
|
|
179
179
|
# * source: The source the data comes from. See Sources::Base. # TODO Sources (all).
|
180
180
|
#
|
181
181
|
# Options:
|
182
|
-
# * result_identifier:
|
182
|
+
# * result_identifier: Use if you'd like a different identifier/name in the results JSON than the name of the index.
|
183
183
|
#
|
184
184
|
def index name, source, options = {}
|
185
185
|
IndexAPI.new name, source, options
|
data/lib/picky/cli.rb
CHANGED
@@ -9,10 +9,13 @@ module Picky
|
|
9
9
|
# Note: By default, help is displayed. I.e. when no command is given.
|
10
10
|
#
|
11
11
|
def execute selector = nil, *args
|
12
|
-
executor_class, *params =
|
12
|
+
executor_class, *params = executor_class_for selector
|
13
13
|
executor = executor_class.new
|
14
14
|
executor.execute selector, args, params
|
15
15
|
end
|
16
|
+
def executor_class_for selector = nil
|
17
|
+
selector && @@mapping[selector.to_sym] || [Help]
|
18
|
+
end
|
16
19
|
|
17
20
|
class Base
|
18
21
|
def usage name, params
|
@@ -63,12 +66,10 @@ module Picky
|
|
63
66
|
|
64
67
|
# Maps commands to the other gem's command.
|
65
68
|
#
|
66
|
-
# TODO Add optional params.
|
67
|
-
#
|
68
69
|
@@mapping = {
|
69
|
-
:generate => [Generate, '
|
70
|
+
:generate => [Generate, 'sinatra_client | unicorn_server | empty_unicorn_server', 'app_directory_name (optional)'],
|
70
71
|
:help => [Help],
|
71
|
-
:stats => [Statistics, '
|
72
|
+
:stats => [Statistics, 'logfile, e.g. log/search.log', 'port (optional)']
|
72
73
|
}
|
73
74
|
def self.mapping
|
74
75
|
@@mapping
|
@@ -60,13 +60,11 @@ module Indexed
|
|
60
60
|
|
61
61
|
# Returns possible Combinations for the token.
|
62
62
|
#
|
63
|
-
# The
|
63
|
+
# The preselected_categories param is an optimization.
|
64
64
|
#
|
65
65
|
# TODO Return [RemovedCategory(token, nil)]
|
66
66
|
# If the search is ...
|
67
67
|
#
|
68
|
-
# TODO Make categories also a collection class.
|
69
|
-
#
|
70
68
|
# TODO Return [] if not ok, nil if needs to be removed?
|
71
69
|
# Somehow unnice, but…
|
72
70
|
#
|
data/lib/picky/indexed/index.rb
CHANGED
@@ -18,7 +18,7 @@ module Indexed
|
|
18
18
|
@categories = Categories.new ignore_unassigned_tokens: ignore_unassigned_tokens
|
19
19
|
end
|
20
20
|
|
21
|
-
# TODO
|
21
|
+
# TODO Doc.
|
22
22
|
#
|
23
23
|
def define_category category_name, options = {}
|
24
24
|
new_category = Category.new category_name, self, options
|
@@ -26,7 +26,9 @@ module Indexed
|
|
26
26
|
new_category
|
27
27
|
end
|
28
28
|
|
29
|
+
# Return the possible combinations for this token.
|
29
30
|
#
|
31
|
+
# A combination is a tuple <token, index_bundle>.
|
30
32
|
#
|
31
33
|
def possible_combinations token
|
32
34
|
categories.possible_combinations_for token
|
data/lib/picky/indexes_api.rb
CHANGED
data/lib/picky/loader.rb
CHANGED
data/lib/picky/signals.rb
CHANGED
@@ -82,6 +82,17 @@ module Tokenizers # :nodoc:all
|
|
82
82
|
substituter?? substituter.substitute(text) : text
|
83
83
|
end
|
84
84
|
|
85
|
+
# Reject tokens after tokenizing based on the given criteria.
|
86
|
+
#
|
87
|
+
# Note: Currently only for indexing. TODO Redesign and write for both!
|
88
|
+
#
|
89
|
+
def reject_token_if &condition
|
90
|
+
@reject_condition = condition
|
91
|
+
end
|
92
|
+
def reject tokens
|
93
|
+
tokens.reject! &@reject_condition
|
94
|
+
end
|
95
|
+
|
85
96
|
|
86
97
|
# Returns a number of tokens, generated from the given text.
|
87
98
|
#
|
@@ -111,6 +122,7 @@ module Tokenizers # :nodoc:all
|
|
111
122
|
# Defaults.
|
112
123
|
#
|
113
124
|
splits_text_on options[:splits_text_on] || /\s/
|
125
|
+
reject_token_if &(options[:reject_token_if] || :blank?)
|
114
126
|
end
|
115
127
|
|
116
128
|
# Hooks.
|
@@ -125,15 +137,10 @@ module Tokenizers # :nodoc:all
|
|
125
137
|
# Postprocessing.
|
126
138
|
#
|
127
139
|
def process tokens
|
128
|
-
reject tokens
|
140
|
+
reject tokens # Reject any tokens that don't meet criteria
|
129
141
|
tokens
|
130
142
|
end
|
131
143
|
|
132
|
-
# Rejects blank tokens.
|
133
|
-
#
|
134
|
-
def reject tokens
|
135
|
-
tokens.reject! &:blank?
|
136
|
-
end
|
137
144
|
# Converts words into real tokens.
|
138
145
|
#
|
139
146
|
def tokens_for words
|
@@ -39,8 +39,6 @@ module Tokenizers
|
|
39
39
|
# 1. Split the text into words.
|
40
40
|
# 2. Normalize each word.
|
41
41
|
#
|
42
|
-
# TODO Rename into wordize? Or somesuch?
|
43
|
-
#
|
44
42
|
def pretokenize text
|
45
43
|
words = split text
|
46
44
|
words.collect! do |word|
|
@@ -56,16 +54,5 @@ module Tokenizers
|
|
56
54
|
symbolize text
|
57
55
|
end
|
58
56
|
|
59
|
-
# Rejects tokens if they are too short (or blank).
|
60
|
-
#
|
61
|
-
# Override in subclasses to redefine behaviour.
|
62
|
-
#
|
63
|
-
# TODO TODO TODO Make parametrizable! reject { |token| }
|
64
|
-
#
|
65
|
-
def reject tokens
|
66
|
-
tokens.reject! &:blank?
|
67
|
-
# tokens.reject! { |token| token.to_s.size < 2 }
|
68
|
-
end
|
69
|
-
|
70
57
|
end
|
71
58
|
end
|
@@ -57,7 +57,7 @@ module Tokenizers
|
|
57
57
|
# TODO Perhaps move to Normalizer?
|
58
58
|
#
|
59
59
|
def normalize text
|
60
|
-
text = substitute_characters text # Substitute special characters
|
60
|
+
text = substitute_characters text # Substitute special characters
|
61
61
|
text.downcase! # Downcase all text
|
62
62
|
normalize_with_patterns text # normalize
|
63
63
|
text.to_sym # symbolize
|
@@ -24,13 +24,15 @@ describe Application do
|
|
24
24
|
end
|
25
25
|
it "should run ok" do
|
26
26
|
lambda {
|
27
|
-
#
|
27
|
+
# Here we just test if the API can be called ok.
|
28
28
|
#
|
29
29
|
class TestApplication < Application
|
30
30
|
default_indexing removes_characters: /[^a-zA-Z0-9\s\/\-\"\&\.]/,
|
31
31
|
stopwords: /\b(and|the|of|it|in|for)\b/,
|
32
32
|
splits_text_on: /[\s\/\-\"\&\.]/,
|
33
|
-
removes_characters_after_splitting: /[\.]
|
33
|
+
removes_characters_after_splitting: /[\.]/,
|
34
|
+
normalizes_words: [[/\$(\w+)/i, '\1 dollars']],
|
35
|
+
reject_token_if: lambda { |token| token.blank? || token == :amistad }
|
34
36
|
|
35
37
|
default_querying removes_characters: /[^a-zA-Z0-9äöü\s\/\-\,\&\"\~\*\:]/,
|
36
38
|
stopwords: /\b(and|the|of|it|in|for)\b/,
|
@@ -48,6 +50,11 @@ describe Application do
|
|
48
50
|
books_index.define_category :isbn,
|
49
51
|
partial: Partial::None.new # Partially searching on an ISBN makes not much sense.
|
50
52
|
|
53
|
+
geo_index = index :geo, Sources::CSV.new(:location, :north, :east, file: 'data/ch.csv', col_sep: ',')
|
54
|
+
geo_index.define_category :location
|
55
|
+
geo_index.define_ranged_category(:north1, 1, precision: 3, from: :north)
|
56
|
+
.define_ranged_category(:east1, 1, precision: 3, from: :east)
|
57
|
+
|
51
58
|
full = Query::Full.new books_index
|
52
59
|
live = Query::Live.new books_index
|
53
60
|
|
data/spec/lib/cli_spec.rb
CHANGED
@@ -10,6 +10,26 @@ require File.expand_path '../../../lib/picky/cli', __FILE__
|
|
10
10
|
#
|
11
11
|
describe Picky::CLI do
|
12
12
|
|
13
|
+
describe 'instance' do
|
14
|
+
before(:each) do
|
15
|
+
@cli = Picky::CLI.new
|
16
|
+
end
|
17
|
+
describe 'executor_class_for' do
|
18
|
+
it 'returns Help by default' do
|
19
|
+
@cli.executor_class_for.should == [Picky::CLI::Help]
|
20
|
+
end
|
21
|
+
it 'returns Generator for generate' do
|
22
|
+
@cli.executor_class_for(:generate).should == [Picky::CLI::Generate, "sinatra_client | unicorn_server | empty_unicorn_server", "app_directory_name (optional)"]
|
23
|
+
end
|
24
|
+
it 'returns Help for help' do
|
25
|
+
@cli.executor_class_for(:help).should == [Picky::CLI::Help]
|
26
|
+
end
|
27
|
+
it 'returns Statistics for stats' do
|
28
|
+
@cli.executor_class_for(:stats).should == [Picky::CLI::Statistics, "logfile, e.g. log/search.log", "port (optional)"]
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
13
33
|
describe Picky::CLI::Base do
|
14
34
|
before(:each) do
|
15
35
|
@executor = Picky::CLI::Base.new
|
@@ -24,7 +24,7 @@ describe 'Configuration::Index' do
|
|
24
24
|
@config.index_path(:some_bundle, :some_name).should_not equal(@config.index_path(:some_bundle, :some_name))
|
25
25
|
end
|
26
26
|
it "returns the right thing" do
|
27
|
-
@config.index_path(:some_bundle, :some_name).should == '
|
27
|
+
@config.index_path(:some_bundle, :some_name).should == 'spec/test_directory/index/test/some_index/some_category_some_bundle_some_name'
|
28
28
|
end
|
29
29
|
end
|
30
30
|
|
@@ -50,7 +50,7 @@ describe 'Configuration::Index' do
|
|
50
50
|
@config.index_root.should equal(@config.index_root)
|
51
51
|
end
|
52
52
|
it "returns the right thing" do
|
53
|
-
@config.index_root.should == '
|
53
|
+
@config.index_root.should == 'spec/test_directory/index'
|
54
54
|
end
|
55
55
|
end
|
56
56
|
describe "index_directory" do
|
@@ -58,7 +58,7 @@ describe 'Configuration::Index' do
|
|
58
58
|
@config.index_directory.should equal(@config.index_directory)
|
59
59
|
end
|
60
60
|
it "returns the right thing" do
|
61
|
-
@config.index_directory.should == '
|
61
|
+
@config.index_directory.should == 'spec/test_directory/index/test/some_index'
|
62
62
|
end
|
63
63
|
end
|
64
64
|
describe "prepared_index_path" do
|
@@ -66,12 +66,12 @@ describe 'Configuration::Index' do
|
|
66
66
|
@config.prepared_index_path.should equal(@config.prepared_index_path)
|
67
67
|
end
|
68
68
|
it "returns the right thing" do
|
69
|
-
@config.prepared_index_path.should == '
|
69
|
+
@config.prepared_index_path.should == 'spec/test_directory/index/test/some_index/prepared_some_category_index'
|
70
70
|
end
|
71
71
|
end
|
72
72
|
describe "prepare_index_directory" do
|
73
73
|
it "calls the right thing" do
|
74
|
-
FileUtils.should_receive(:mkdir_p).once.with '
|
74
|
+
FileUtils.should_receive(:mkdir_p).once.with 'spec/test_directory/index/test/some_index'
|
75
75
|
|
76
76
|
@config.prepare_index_directory
|
77
77
|
end
|
@@ -64,7 +64,7 @@ describe Index::Files do
|
|
64
64
|
it "uses the right file" do
|
65
65
|
Yajl::Parser.stub! :parse
|
66
66
|
|
67
|
-
File.should_receive(:open).once.with '
|
67
|
+
File.should_receive(:open).once.with 'spec/test_directory/index/test/some_index/some_category_some_name_index.json', 'r'
|
68
68
|
|
69
69
|
@files.load_index
|
70
70
|
end
|
@@ -73,7 +73,7 @@ describe Index::Files do
|
|
73
73
|
it "uses the right file" do
|
74
74
|
Yajl::Parser.stub! :parse
|
75
75
|
|
76
|
-
File.should_receive(:open).once.with '
|
76
|
+
File.should_receive(:open).once.with 'spec/test_directory/index/test/some_index/some_category_some_name_weights.json', 'r'
|
77
77
|
|
78
78
|
@files.load_weights
|
79
79
|
end
|
@@ -82,7 +82,7 @@ describe Index::Files do
|
|
82
82
|
it "uses the right file" do
|
83
83
|
Marshal.stub! :load
|
84
84
|
|
85
|
-
File.should_receive(:open).once.with '
|
85
|
+
File.should_receive(:open).once.with 'spec/test_directory/index/test/some_index/some_category_some_name_similarity.dump', 'r:binary'
|
86
86
|
|
87
87
|
@files.load_similarity
|
88
88
|
end
|
@@ -91,7 +91,7 @@ describe Index::Files do
|
|
91
91
|
it "uses the right file" do
|
92
92
|
Yajl::Parser.stub! :parse
|
93
93
|
|
94
|
-
File.should_receive(:open).once.with '
|
94
|
+
File.should_receive(:open).once.with 'spec/test_directory/index/test/some_index/some_category_some_name_configuration.json', 'r'
|
95
95
|
|
96
96
|
@files.load_configuration
|
97
97
|
end
|
@@ -60,7 +60,7 @@ describe Indexed::Bundle do
|
|
60
60
|
it "uses the right file" do
|
61
61
|
Yajl::Parser.stub! :parse
|
62
62
|
|
63
|
-
File.should_receive(:open).once.with '
|
63
|
+
File.should_receive(:open).once.with 'spec/test_directory/index/test/some_index/some_category_some_name_index.json', 'r'
|
64
64
|
|
65
65
|
@bundle.load_index
|
66
66
|
end
|
@@ -69,7 +69,7 @@ describe Indexed::Bundle do
|
|
69
69
|
it "uses the right file" do
|
70
70
|
Yajl::Parser.stub! :parse
|
71
71
|
|
72
|
-
File.should_receive(:open).once.with '
|
72
|
+
File.should_receive(:open).once.with 'spec/test_directory/index/test/some_index/some_category_some_name_weights.json', 'r'
|
73
73
|
|
74
74
|
@bundle.load_weights
|
75
75
|
end
|
@@ -78,7 +78,7 @@ describe Indexed::Bundle do
|
|
78
78
|
it "uses the right file" do
|
79
79
|
Marshal.stub! :load
|
80
80
|
|
81
|
-
File.should_receive(:open).once.with '
|
81
|
+
File.should_receive(:open).once.with 'spec/test_directory/index/test/some_index/some_category_some_name_similarity.dump', 'r:binary'
|
82
82
|
|
83
83
|
@bundle.load_similarity
|
84
84
|
end
|
@@ -87,7 +87,7 @@ describe Indexed::Bundle do
|
|
87
87
|
it "uses the right file" do
|
88
88
|
Yajl::Parser.stub! :parse
|
89
89
|
|
90
|
-
File.should_receive(:open).once.with '
|
90
|
+
File.should_receive(:open).once.with 'spec/test_directory/index/test/some_index/some_category_some_name_configuration.json', 'r'
|
91
91
|
|
92
92
|
@bundle.load_configuration
|
93
93
|
end
|
@@ -2,7 +2,24 @@ require 'spec_helper'
|
|
2
2
|
|
3
3
|
describe Indexed::Index do
|
4
4
|
|
5
|
-
context
|
5
|
+
context 'without stubbed categories' do
|
6
|
+
before(:each) do
|
7
|
+
@index = Indexed::Index.new :some_index_name
|
8
|
+
end
|
9
|
+
|
10
|
+
describe 'define_category' do
|
11
|
+
it 'adds a new category to the categories' do
|
12
|
+
@index.define_category :some_category_name
|
13
|
+
|
14
|
+
@index.categories.categories.size.should == 1
|
15
|
+
end
|
16
|
+
it 'returns the new category' do
|
17
|
+
@index.define_category(:some_category_name).should be_kind_of(Indexed::Category)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
context "with stubbed categories" do
|
6
23
|
before(:each) do
|
7
24
|
@categories = stub :categories
|
8
25
|
|
@@ -12,7 +29,7 @@ describe Indexed::Index do
|
|
12
29
|
|
13
30
|
@index.stub! :categories => @categories
|
14
31
|
end
|
15
|
-
|
32
|
+
|
16
33
|
describe "load_from_cache" do
|
17
34
|
it "delegates to each category" do
|
18
35
|
@categories.should_receive(:load_from_cache).once.with
|
@@ -0,0 +1,36 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Indexing::Indexes do
|
4
|
+
|
5
|
+
before(:each) do
|
6
|
+
@indexes = Indexing::Indexes.new
|
7
|
+
end
|
8
|
+
|
9
|
+
describe 'indexes' do
|
10
|
+
it 'exists' do
|
11
|
+
lambda { @indexes.indexes }.should_not raise_error
|
12
|
+
end
|
13
|
+
it 'is empty by default' do
|
14
|
+
@indexes.indexes.should be_empty
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
describe 'clear' do
|
19
|
+
it 'clears the indexes' do
|
20
|
+
@indexes.register :some_index
|
21
|
+
|
22
|
+
@indexes.clear
|
23
|
+
|
24
|
+
@indexes.indexes.should == []
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
describe 'register' do
|
29
|
+
it 'adds the given index to the indexes' do
|
30
|
+
@indexes.register :some_index
|
31
|
+
|
32
|
+
@indexes.indexes.should == [:some_index]
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
end
|
data/spec/lib/sources/db_spec.rb
CHANGED
@@ -53,7 +53,7 @@ describe Sources::DB do
|
|
53
53
|
end
|
54
54
|
context "with file" do
|
55
55
|
it "opens the config file relative to root" do
|
56
|
-
File.should_receive(:open).once.with '
|
56
|
+
File.should_receive(:open).once.with 'spec/test_directory/app/bla.yml'
|
57
57
|
|
58
58
|
@source.configure :file => 'app/bla.yml'
|
59
59
|
end
|
@@ -3,167 +3,193 @@
|
|
3
3
|
require 'spec_helper'
|
4
4
|
|
5
5
|
describe Tokenizers::Base do
|
6
|
-
|
7
|
-
before(:each) do
|
8
|
-
@tokenizer = Tokenizers::Base.new
|
9
|
-
end
|
10
6
|
|
11
|
-
|
12
|
-
|
13
|
-
@tokenizer.
|
14
|
-
end
|
15
|
-
it "uses the substituter to replace characters" do
|
16
|
-
@tokenizer.substitutes_characters_with CharacterSubstituters::WestEuropean.new
|
17
|
-
|
18
|
-
@tokenizer.substitute_characters('abcdefghijklmnopqrstuvwxyzäöü').should == 'abcdefghijklmnopqrstuvwxyzaeoeue'
|
7
|
+
context 'with special instance' do
|
8
|
+
before(:each) do
|
9
|
+
@tokenizer = Tokenizers::Base.new reject_token_if: lambda { |token| token.to_s.length < 2 || token == :hello }
|
19
10
|
end
|
20
|
-
it
|
21
|
-
@tokenizer.
|
22
|
-
|
23
|
-
@tokenizer.substitute_characters('abcdefghijklmnopqrstuvwxyzäöü').should == 'abcdefghijklmnopqrstuvwxyzaeoeue'
|
24
|
-
end
|
25
|
-
end
|
26
|
-
|
27
|
-
describe "removes_characters_after_splitting" do
|
28
|
-
context "without removes_characters_after_splitting called" do
|
29
|
-
it "has remove_after_normalizing_illegals" do
|
30
|
-
lambda { @tokenizer.remove_after_normalizing_illegals('any') }.should_not raise_error
|
31
|
-
end
|
32
|
-
it 'should define a remove_after_normalizing_illegals normalize_with_patterns does nothing' do
|
33
|
-
unchanging = stub :unchanging
|
34
|
-
@tokenizer.remove_after_normalizing_illegals unchanging
|
35
|
-
end
|
11
|
+
it 'rejects tokens with length < 2' do
|
12
|
+
@tokenizer.reject([:'', :a, :ab, :abc]).should == [:ab, :abc]
|
36
13
|
end
|
37
|
-
|
38
|
-
|
39
|
-
@tokenizer.removes_characters_after_splitting(/[afo]/)
|
40
|
-
end
|
41
|
-
it "has remove_after_normalizing_illegals" do
|
42
|
-
lambda { @tokenizer.remove_after_normalizing_illegals('abcdefghijklmnop') }.should_not raise_error
|
43
|
-
end
|
44
|
-
it "removes illegal characters" do
|
45
|
-
@tokenizer.remove_after_normalizing_illegals('abcdefghijklmnop').should == 'bcdeghijklmnp'
|
46
|
-
end
|
14
|
+
it 'rejects tokens that are called :hello' do
|
15
|
+
@tokenizer.reject([:hel, :hell, :hello]).should == [:hel, :hell]
|
47
16
|
end
|
48
17
|
end
|
49
18
|
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
lambda { @tokenizer.normalize_with_patterns('any') }.should_not raise_error
|
54
|
-
end
|
55
|
-
it 'should define a method normalize_with_patterns does nothing' do
|
56
|
-
unchanging = stub :unchanging
|
57
|
-
@tokenizer.normalize_with_patterns(unchanging).should == unchanging
|
58
|
-
end
|
59
|
-
end
|
60
|
-
context "with normalizes_words called" do
|
61
|
-
before(:each) do
|
62
|
-
@tokenizer.normalizes_words([
|
63
|
-
[/st\./, 'sankt'],
|
64
|
-
[/stras?s?e?/, 'str']
|
65
|
-
])
|
66
|
-
end
|
67
|
-
it "has normalize_with_patterns" do
|
68
|
-
lambda { @tokenizer.normalize_with_patterns('a b/c.d') }.should_not raise_error
|
69
|
-
end
|
70
|
-
it "normalizes, but just the first one" do
|
71
|
-
@tokenizer.normalize_with_patterns('st. wegstrasse').should == 'sankt wegstrasse'
|
72
|
-
end
|
19
|
+
context 'with normal instance' do
|
20
|
+
before(:each) do
|
21
|
+
@tokenizer = Tokenizers::Base.new
|
73
22
|
end
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
it "has split" do
|
79
|
-
lambda { @tokenizer.split('any') }.should_not raise_error
|
80
|
-
end
|
81
|
-
it 'should define a method split that splits by default on \s' do
|
82
|
-
@tokenizer.split('a b/c.d').should == ['a', 'b/c.d']
|
23
|
+
|
24
|
+
describe 'reject_token_if' do
|
25
|
+
it 'rejects empty tokens by default' do
|
26
|
+
@tokenizer.reject(['a', nil, '', 'b']).should == ['a', 'b']
|
83
27
|
end
|
84
|
-
it '
|
85
|
-
@tokenizer.
|
28
|
+
it 'rejects tokens based on the given rejection criteria if set' do
|
29
|
+
@tokenizer.reject_token_if &:nil?
|
30
|
+
|
31
|
+
@tokenizer.reject(['a', nil, '', 'b']).should == ['a', '', 'b']
|
86
32
|
end
|
87
33
|
end
|
88
|
-
|
89
|
-
|
90
|
-
|
34
|
+
|
35
|
+
describe "substitute(s)_characters*" do
|
36
|
+
it "doesn't substitute if there is no substituter" do
|
37
|
+
@tokenizer.substitute_characters('abcdefghijklmnopqrstuvwxyzäöü').should == 'abcdefghijklmnopqrstuvwxyzäöü'
|
91
38
|
end
|
92
|
-
it "
|
93
|
-
|
39
|
+
it "uses the substituter to replace characters" do
|
40
|
+
@tokenizer.substitutes_characters_with CharacterSubstituters::WestEuropean.new
|
41
|
+
|
42
|
+
@tokenizer.substitute_characters('abcdefghijklmnopqrstuvwxyzäöü').should == 'abcdefghijklmnopqrstuvwxyzaeoeue'
|
94
43
|
end
|
95
|
-
it "
|
96
|
-
@tokenizer.
|
44
|
+
it "uses the european substituter as default" do
|
45
|
+
@tokenizer.substitutes_characters_with
|
46
|
+
|
47
|
+
@tokenizer.substitute_characters('abcdefghijklmnopqrstuvwxyzäöü').should == 'abcdefghijklmnopqrstuvwxyzaeoeue'
|
97
48
|
end
|
98
49
|
end
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
50
|
+
|
51
|
+
describe "removes_characters_after_splitting" do
|
52
|
+
context "without removes_characters_after_splitting called" do
|
53
|
+
it "has remove_after_normalizing_illegals" do
|
54
|
+
lambda { @tokenizer.remove_after_normalizing_illegals('any') }.should_not raise_error
|
55
|
+
end
|
56
|
+
it 'should define a remove_after_normalizing_illegals normalize_with_patterns does nothing' do
|
57
|
+
unchanging = stub :unchanging
|
58
|
+
@tokenizer.remove_after_normalizing_illegals unchanging
|
59
|
+
end
|
60
|
+
end
|
61
|
+
context "with removes_characters_after_splitting called" do
|
62
|
+
before(:each) do
|
63
|
+
@tokenizer.removes_characters_after_splitting(/[afo]/)
|
64
|
+
end
|
65
|
+
it "has remove_after_normalizing_illegals" do
|
66
|
+
lambda { @tokenizer.remove_after_normalizing_illegals('abcdefghijklmnop') }.should_not raise_error
|
67
|
+
end
|
68
|
+
it "removes illegal characters" do
|
69
|
+
@tokenizer.remove_after_normalizing_illegals('abcdefghijklmnop').should == 'bcdeghijklmnp'
|
70
|
+
end
|
109
71
|
end
|
110
72
|
end
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
73
|
+
|
74
|
+
describe "normalizes_words" do
|
75
|
+
context "without normalizes_words called" do
|
76
|
+
it "has normalize_with_patterns" do
|
77
|
+
lambda { @tokenizer.normalize_with_patterns('any') }.should_not raise_error
|
78
|
+
end
|
79
|
+
it 'should define a method normalize_with_patterns does nothing' do
|
80
|
+
unchanging = stub :unchanging
|
81
|
+
@tokenizer.normalize_with_patterns(unchanging).should == unchanging
|
82
|
+
end
|
83
|
+
end
|
84
|
+
context "with normalizes_words called" do
|
85
|
+
before(:each) do
|
86
|
+
@tokenizer.normalizes_words([
|
87
|
+
[/st\./, 'sankt'],
|
88
|
+
[/stras?s?e?/, 'str']
|
89
|
+
])
|
90
|
+
end
|
91
|
+
it "has normalize_with_patterns" do
|
92
|
+
lambda { @tokenizer.normalize_with_patterns('a b/c.d') }.should_not raise_error
|
93
|
+
end
|
94
|
+
it "normalizes, but just the first one" do
|
95
|
+
@tokenizer.normalize_with_patterns('st. wegstrasse').should == 'sankt wegstrasse'
|
96
|
+
end
|
120
97
|
end
|
121
98
|
end
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
99
|
+
|
100
|
+
describe "splits_text_on" do
|
101
|
+
context "without splits_text_on called" do
|
102
|
+
it "has split" do
|
103
|
+
lambda { @tokenizer.split('any') }.should_not raise_error
|
104
|
+
end
|
105
|
+
it 'should define a method split that splits by default on \s' do
|
106
|
+
@tokenizer.split('a b/c.d').should == ['a', 'b/c.d']
|
107
|
+
end
|
108
|
+
it 'splits text on /\s/ by default' do
|
109
|
+
@tokenizer.split('this is a test').should == ['this', 'is', 'a', 'test']
|
110
|
+
end
|
111
|
+
end
|
112
|
+
context "with removes_characters called" do
|
113
|
+
before(:each) do
|
114
|
+
@tokenizer.splits_text_on(/[\s\.\/]/)
|
115
|
+
end
|
116
|
+
it "has split" do
|
117
|
+
lambda { @tokenizer.split('a b/c.d') }.should_not raise_error
|
118
|
+
end
|
119
|
+
it "removes illegal characters" do
|
120
|
+
@tokenizer.split('a b/c.d').should == ['a','b','c','d']
|
121
|
+
end
|
135
122
|
end
|
136
123
|
end
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
124
|
+
|
125
|
+
describe "removes_characters" do
|
126
|
+
context "without removes_characters called" do
|
127
|
+
it "has remove_illegals" do
|
128
|
+
lambda { @tokenizer.remove_illegals('any') }.should_not raise_error
|
129
|
+
end
|
130
|
+
it 'should define a method remove_illegals that does nothing' do
|
131
|
+
unchanging = stub :unchanging
|
132
|
+
@tokenizer.remove_illegals unchanging
|
133
|
+
end
|
134
|
+
end
|
135
|
+
context "with removes_characters called" do
|
136
|
+
before(:each) do
|
137
|
+
@tokenizer.removes_characters(/[afo]/)
|
138
|
+
end
|
139
|
+
it "has remove_illegals" do
|
140
|
+
lambda { @tokenizer.remove_illegals('abcdefghijklmnop') }.should_not raise_error
|
141
|
+
end
|
142
|
+
it "removes illegal characters" do
|
143
|
+
@tokenizer.remove_illegals('abcdefghijklmnop').should == 'bcdeghijklmnp'
|
144
|
+
end
|
155
145
|
end
|
156
146
|
end
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
147
|
+
|
148
|
+
describe 'stopwords' do
|
149
|
+
context 'without stopwords given' do
|
150
|
+
it 'should define a method remove_stopwords' do
|
151
|
+
lambda { @tokenizer.remove_stopwords('from this text') }.should_not raise_error
|
152
|
+
end
|
153
|
+
it 'should define a method remove_stopwords that does nothing' do
|
154
|
+
@tokenizer.remove_stopwords('from this text').should == 'from this text'
|
155
|
+
end
|
156
|
+
it 'should define a method remove_non_single_stopwords' do
|
157
|
+
lambda { @tokenizer.remove_non_single_stopwords('from this text') }.should_not raise_error
|
158
|
+
|
159
|
+
end
|
160
|
+
end
|
161
|
+
context 'with stopwords given' do
|
162
|
+
before(:each) do
|
163
|
+
@tokenizer.stopwords(/r|e/)
|
164
|
+
end
|
165
|
+
it 'should define a method remove_stopwords' do
|
166
|
+
lambda { @tokenizer.remove_stopwords('from this text') }.should_not raise_error
|
167
|
+
end
|
168
|
+
it 'should define a method stopwords that removes stopwords' do
|
169
|
+
@tokenizer.remove_stopwords('from this text').should == 'fom this txt'
|
170
|
+
end
|
171
|
+
it 'should define a method remove_non_single_stopwords' do
|
172
|
+
lambda { @tokenizer.remove_non_single_stopwords('from this text') }.should_not raise_error
|
173
|
+
end
|
174
|
+
it 'should define a method remove_non_single_stopwords that removes non-single stopwords' do
|
175
|
+
@tokenizer.remove_non_single_stopwords('rerere rerere').should == ' '
|
176
|
+
end
|
177
|
+
it 'should define a method remove_non_single_stopwords that does not single stopwords' do
|
178
|
+
@tokenizer.remove_non_single_stopwords('rerere').should == 'rerere'
|
179
|
+
end
|
180
|
+
end
|
181
|
+
context 'error case' do
|
182
|
+
before(:each) do
|
183
|
+
@tokenizer.stopwords(/any/)
|
184
|
+
end
|
185
|
+
it 'should not remove non-single stopwords with a star' do
|
186
|
+
@tokenizer.remove_non_single_stopwords('a*').should == 'a*'
|
187
|
+
end
|
188
|
+
it 'should not remove non-single stopwords with a tilde' do
|
189
|
+
@tokenizer.remove_non_single_stopwords('a~').should == 'a~'
|
190
|
+
end
|
166
191
|
end
|
167
192
|
end
|
168
193
|
end
|
194
|
+
|
169
195
|
end
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 1
|
7
7
|
- 2
|
8
|
-
-
|
9
|
-
version: 1.2.
|
8
|
+
- 4
|
9
|
+
version: 1.2.4
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Florian Hanke
|
@@ -14,7 +14,7 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2010-12-
|
17
|
+
date: 2010-12-30 00:00:00 +01:00
|
18
18
|
default_executable: picky
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
@@ -73,8 +73,6 @@ files:
|
|
73
73
|
- lib/picky/extensions/module.rb
|
74
74
|
- lib/picky/extensions/object.rb
|
75
75
|
- lib/picky/extensions/symbol.rb
|
76
|
-
- lib/picky/helpers/cache.rb
|
77
|
-
- lib/picky/helpers/gc.rb
|
78
76
|
- lib/picky/helpers/measuring.rb
|
79
77
|
- lib/picky/index/bundle.rb
|
80
78
|
- lib/picky/index/file/basic.rb
|
@@ -169,8 +167,6 @@ files:
|
|
169
167
|
- spec/lib/extensions/module_spec.rb
|
170
168
|
- spec/lib/extensions/object_spec.rb
|
171
169
|
- spec/lib/extensions/symbol_spec.rb
|
172
|
-
- spec/lib/helpers/cache_spec.rb
|
173
|
-
- spec/lib/helpers/gc_spec.rb
|
174
170
|
- spec/lib/helpers/measuring_spec.rb
|
175
171
|
- spec/lib/index/bundle_spec.rb
|
176
172
|
- spec/lib/index/file/basic_spec.rb
|
@@ -191,6 +187,7 @@ files:
|
|
191
187
|
- spec/lib/indexing/bundle_spec.rb
|
192
188
|
- spec/lib/indexing/category_spec.rb
|
193
189
|
- spec/lib/indexing/index_spec.rb
|
190
|
+
- spec/lib/indexing/indexes_spec.rb
|
194
191
|
- spec/lib/loader_spec.rb
|
195
192
|
- spec/lib/loggers/search_spec.rb
|
196
193
|
- spec/lib/query/allocation_spec.rb
|
@@ -276,8 +273,6 @@ test_files:
|
|
276
273
|
- spec/lib/extensions/module_spec.rb
|
277
274
|
- spec/lib/extensions/object_spec.rb
|
278
275
|
- spec/lib/extensions/symbol_spec.rb
|
279
|
-
- spec/lib/helpers/cache_spec.rb
|
280
|
-
- spec/lib/helpers/gc_spec.rb
|
281
276
|
- spec/lib/helpers/measuring_spec.rb
|
282
277
|
- spec/lib/index/bundle_spec.rb
|
283
278
|
- spec/lib/index/file/basic_spec.rb
|
@@ -298,6 +293,7 @@ test_files:
|
|
298
293
|
- spec/lib/indexing/bundle_spec.rb
|
299
294
|
- spec/lib/indexing/category_spec.rb
|
300
295
|
- spec/lib/indexing/index_spec.rb
|
296
|
+
- spec/lib/indexing/indexes_spec.rb
|
301
297
|
- spec/lib/loader_spec.rb
|
302
298
|
- spec/lib/loggers/search_spec.rb
|
303
299
|
- spec/lib/query/allocation_spec.rb
|
data/lib/picky/helpers/cache.rb
DELETED
@@ -1,25 +0,0 @@
|
|
1
|
-
# TODO Not used anymore? Remove.
|
2
|
-
#
|
3
|
-
module Helpers # :nodoc:all
|
4
|
-
|
5
|
-
module Cache
|
6
|
-
# This is a simple cache.
|
7
|
-
# The store needs to be able to answer to [] and []=.
|
8
|
-
#
|
9
|
-
def cached store, key, &block
|
10
|
-
# Get cached result
|
11
|
-
#
|
12
|
-
results = store[key]
|
13
|
-
return results if results
|
14
|
-
|
15
|
-
results = lambda(&block).call
|
16
|
-
|
17
|
-
# Store results
|
18
|
-
#
|
19
|
-
store[key] = results
|
20
|
-
|
21
|
-
results
|
22
|
-
end
|
23
|
-
end
|
24
|
-
|
25
|
-
end
|
data/lib/picky/helpers/gc.rb
DELETED
@@ -1,35 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
describe Helpers::Cache do
|
4
|
-
include Helpers::Cache
|
5
|
-
|
6
|
-
describe "#cached" do
|
7
|
-
attr_reader :store, :key
|
8
|
-
before(:each) do
|
9
|
-
@store = {}
|
10
|
-
@key = 'some key'
|
11
|
-
end
|
12
|
-
describe "not yet cached" do
|
13
|
-
it "should cache" do
|
14
|
-
store.should_receive(:[]=).once.with(@key, 'value')
|
15
|
-
cached @store, @key do
|
16
|
-
'value'
|
17
|
-
end
|
18
|
-
end
|
19
|
-
end
|
20
|
-
describe "already cached" do
|
21
|
-
before(:each) do
|
22
|
-
cached @store, @key do
|
23
|
-
'value'
|
24
|
-
end
|
25
|
-
end
|
26
|
-
it "should not cache" do
|
27
|
-
store.should_receive(:[]=).never
|
28
|
-
cached @store, @key do
|
29
|
-
'value'
|
30
|
-
end
|
31
|
-
end
|
32
|
-
end
|
33
|
-
end
|
34
|
-
|
35
|
-
end
|
data/spec/lib/helpers/gc_spec.rb
DELETED
@@ -1,71 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
describe Helpers::GC do
|
4
|
-
include Helpers::GC
|
5
|
-
|
6
|
-
before(:each) do
|
7
|
-
::GC.stub!(:disable)
|
8
|
-
::GC.stub!(:enable)
|
9
|
-
::GC.stub!(:start)
|
10
|
-
end
|
11
|
-
|
12
|
-
describe "block calling" do
|
13
|
-
it 'should call the block' do
|
14
|
-
inside_block = mock :inside
|
15
|
-
inside_block.should_receive(:call).once
|
16
|
-
|
17
|
-
disabled do
|
18
|
-
inside_block.call
|
19
|
-
end
|
20
|
-
end
|
21
|
-
it 'should call the block' do
|
22
|
-
inside_block = mock :inside
|
23
|
-
inside_block.should_receive(:call).once
|
24
|
-
|
25
|
-
gc_disabled do
|
26
|
-
inside_block.call
|
27
|
-
end
|
28
|
-
end
|
29
|
-
end
|
30
|
-
|
31
|
-
describe "gc calls" do
|
32
|
-
after(:each) do
|
33
|
-
disabled {}
|
34
|
-
end
|
35
|
-
it 'should disable the garbage collector' do
|
36
|
-
::GC.should_receive(:disable)
|
37
|
-
end
|
38
|
-
it 'should enable the garbage collector' do
|
39
|
-
::GC.should_receive(:enable)
|
40
|
-
end
|
41
|
-
it 'should start the garbage collector' do
|
42
|
-
::GC.should_receive(:start)
|
43
|
-
end
|
44
|
-
it 'should disable the gc, call the block, enable the gc and start the gc' do
|
45
|
-
::GC.should_receive(:disable).ordered
|
46
|
-
::GC.should_receive(:enable).ordered
|
47
|
-
::GC.should_receive(:start).ordered
|
48
|
-
end
|
49
|
-
end
|
50
|
-
|
51
|
-
describe "gc calls" do
|
52
|
-
after(:each) do
|
53
|
-
gc_disabled {}
|
54
|
-
end
|
55
|
-
it 'should disable the garbage collector' do
|
56
|
-
::GC.should_receive(:disable)
|
57
|
-
end
|
58
|
-
it 'should enable the garbage collector' do
|
59
|
-
::GC.should_receive(:enable)
|
60
|
-
end
|
61
|
-
it 'should start the garbage collector' do
|
62
|
-
::GC.should_receive(:start)
|
63
|
-
end
|
64
|
-
it 'should disable the gc, call the block, enable the gc and start the gc' do
|
65
|
-
::GC.should_receive(:disable).ordered
|
66
|
-
::GC.should_receive(:enable).ordered
|
67
|
-
::GC.should_receive(:start).ordered
|
68
|
-
end
|
69
|
-
end
|
70
|
-
|
71
|
-
end
|