picky 1.2.3 → 1.2.4

Sign up to get free protection for your applications and to get access to all the features.
@@ -179,7 +179,7 @@ class Application
179
179
  # * source: The source the data comes from. See Sources::Base. # TODO Sources (all).
180
180
  #
181
181
  # Options:
182
- # * result_identifier: # TODO Rename.
182
+ # * result_identifier: Use if you'd like a different identifier/name in the results JSON than the name of the index.
183
183
  #
184
184
  def index name, source, options = {}
185
185
  IndexAPI.new name, source, options
data/lib/picky/cli.rb CHANGED
@@ -9,10 +9,13 @@ module Picky
9
9
  # Note: By default, help is displayed. I.e. when no command is given.
10
10
  #
11
11
  def execute selector = nil, *args
12
- executor_class, *params = selector && @@mapping[selector.to_sym] || Help
12
+ executor_class, *params = executor_class_for selector
13
13
  executor = executor_class.new
14
14
  executor.execute selector, args, params
15
15
  end
16
+ def executor_class_for selector = nil
17
+ selector && @@mapping[selector.to_sym] || [Help]
18
+ end
16
19
 
17
20
  class Base
18
21
  def usage name, params
@@ -63,12 +66,10 @@ module Picky
63
66
 
64
67
  # Maps commands to the other gem's command.
65
68
  #
66
- # TODO Add optional params.
67
- #
68
69
  @@mapping = {
69
- :generate => [Generate, 'thing_to_generate: e.g. "unicorn_server"', :parameters],
70
+ :generate => [Generate, 'sinatra_client | unicorn_server | empty_unicorn_server', 'app_directory_name (optional)'],
70
71
  :help => [Help],
71
- :stats => [Statistics, 'logfile_to_use: e.g. log/search.log', 'port (optional)']
72
+ :stats => [Statistics, 'logfile, e.g. log/search.log', 'port (optional)']
72
73
  }
73
74
  def self.mapping
74
75
  @@mapping
@@ -35,8 +35,6 @@ class Array # :nodoc:all
35
35
  def sort_by_levenshtein! from
36
36
  from = from.to_s
37
37
  sort! do |this, that|
38
- # TODO Cache for speed?
39
- #
40
38
  Text::Levenshtein.distance(this.to_s, from) <=> Text::Levenshtein.distance(that.to_s, from)
41
39
  end
42
40
  end
@@ -60,13 +60,11 @@ module Indexed
60
60
 
61
61
  # Returns possible Combinations for the token.
62
62
  #
63
- # The categories param is an optimization.
63
+ # The preselected_categories param is an optimization.
64
64
  #
65
65
  # TODO Return [RemovedCategory(token, nil)]
66
66
  # If the search is ...
67
67
  #
68
- # TODO Make categories also a collection class.
69
- #
70
68
  # TODO Return [] if not ok, nil if needs to be removed?
71
69
  # Somehow unnice, but…
72
70
  #
@@ -65,7 +65,7 @@ module Indexed
65
65
  # Returns the right index bundle for this token.
66
66
  #
67
67
  def bundle_for token
68
- token.partial? ? partial : exact
68
+ token.partial?? partial : exact
69
69
  end
70
70
 
71
71
  # The partial strategy defines whether to really use the partial index.
@@ -18,7 +18,7 @@ module Indexed
18
18
  @categories = Categories.new ignore_unassigned_tokens: ignore_unassigned_tokens
19
19
  end
20
20
 
21
- # TODO Spec. Doc.
21
+ # TODO Doc.
22
22
  #
23
23
  def define_category category_name, options = {}
24
24
  new_category = Category.new category_name, self, options
@@ -26,7 +26,9 @@ module Indexed
26
26
  new_category
27
27
  end
28
28
 
29
+ # Return the possible combinations for this token.
29
30
  #
31
+ # A combination is a tuple <token, index_bundle>.
30
32
  #
31
33
  def possible_combinations token
32
34
  categories.possible_combinations_for token
@@ -2,7 +2,7 @@
2
2
  #
3
3
  module Indexed
4
4
 
5
- # FIXME and spec
5
+ # TODO Spec
6
6
  #
7
7
  module Wrappers
8
8
 
@@ -29,7 +29,7 @@ class IndexesAPI # :nodoc:all
29
29
  self.index_mapping[index.name] = index
30
30
 
31
31
  @indexing.register index.indexing
32
- @indexed.register index.indexed # TODO Even necessary?
32
+ @indexed.register index.indexed
33
33
  end
34
34
 
35
35
  def [] name
@@ -17,7 +17,7 @@ module Indexing
17
17
  clear
18
18
  end
19
19
 
20
- # TODO Spec.
20
+ # TODO Doc.
21
21
  #
22
22
  def clear
23
23
  @indexes = []
data/lib/picky/loader.rb CHANGED
@@ -100,8 +100,6 @@ module Loader # :nodoc:all
100
100
 
101
101
  # Requiring Helpers
102
102
  #
103
- load_relative 'helpers/gc'
104
- load_relative 'helpers/cache'
105
103
  load_relative 'helpers/measuring'
106
104
 
107
105
  # Character Substituters
@@ -63,7 +63,6 @@ module Query
63
63
  # But still TODO try for a faster one.
64
64
  #
65
65
  def random_ids amount = 1
66
- # TODO can there be no @allocations???
67
66
  return [] if @allocations.empty?
68
67
  ids = @allocations.first.ids
69
68
  indexes = Array.new(ids.size) { |id| id }.sort_by { rand }
data/lib/picky/signals.rb CHANGED
@@ -1,11 +1,8 @@
1
1
  # TODO Cleanup and move to project_prototype.
2
2
  #
3
3
  # Signal.trap 'USR1' do
4
- # Indexed.reload
4
+ # Indexes.reload
5
5
  # end
6
6
  # Signal.trap 'USR2' do
7
7
  # Loader.reload
8
- # end
9
- # Signal.trap 'INT' do
10
- # exit!
11
8
  # end
@@ -82,6 +82,17 @@ module Tokenizers # :nodoc:all
82
82
  substituter?? substituter.substitute(text) : text
83
83
  end
84
84
 
85
+ # Reject tokens after tokenizing based on the given criteria.
86
+ #
87
+ # Note: Currently only for indexing. TODO Redesign and write for both!
88
+ #
89
+ def reject_token_if &condition
90
+ @reject_condition = condition
91
+ end
92
+ def reject tokens
93
+ tokens.reject! &@reject_condition
94
+ end
95
+
85
96
 
86
97
  # Returns a number of tokens, generated from the given text.
87
98
  #
@@ -111,6 +122,7 @@ module Tokenizers # :nodoc:all
111
122
  # Defaults.
112
123
  #
113
124
  splits_text_on options[:splits_text_on] || /\s/
125
+ reject_token_if &(options[:reject_token_if] || :blank?)
114
126
  end
115
127
 
116
128
  # Hooks.
@@ -125,15 +137,10 @@ module Tokenizers # :nodoc:all
125
137
  # Postprocessing.
126
138
  #
127
139
  def process tokens
128
- reject tokens # Reject any tokens that don't meet criteria
140
+ reject tokens # Reject any tokens that don't meet criteria
129
141
  tokens
130
142
  end
131
143
 
132
- # Rejects blank tokens.
133
- #
134
- def reject tokens
135
- tokens.reject! &:blank?
136
- end
137
144
  # Converts words into real tokens.
138
145
  #
139
146
  def tokens_for words
@@ -39,8 +39,6 @@ module Tokenizers
39
39
  # 1. Split the text into words.
40
40
  # 2. Normalize each word.
41
41
  #
42
- # TODO Rename into wordize? Or somesuch?
43
- #
44
42
  def pretokenize text
45
43
  words = split text
46
44
  words.collect! do |word|
@@ -56,16 +54,5 @@ module Tokenizers
56
54
  symbolize text
57
55
  end
58
56
 
59
- # Rejects tokens if they are too short (or blank).
60
- #
61
- # Override in subclasses to redefine behaviour.
62
- #
63
- # TODO TODO TODO Make parametrizable! reject { |token| }
64
- #
65
- def reject tokens
66
- tokens.reject! &:blank?
67
- # tokens.reject! { |token| token.to_s.size < 2 }
68
- end
69
-
70
57
  end
71
58
  end
@@ -57,7 +57,7 @@ module Tokenizers
57
57
  # TODO Perhaps move to Normalizer?
58
58
  #
59
59
  def normalize text
60
- text = substitute_characters text # Substitute special characters TODO Move to subclass
60
+ text = substitute_characters text # Substitute special characters
61
61
  text.downcase! # Downcase all text
62
62
  normalize_with_patterns text # normalize
63
63
  text.to_sym # symbolize
@@ -24,13 +24,15 @@ describe Application do
24
24
  end
25
25
  it "should run ok" do
26
26
  lambda {
27
- # TODO Add all possible cases.
27
+ # Here we just test if the API can be called ok.
28
28
  #
29
29
  class TestApplication < Application
30
30
  default_indexing removes_characters: /[^a-zA-Z0-9\s\/\-\"\&\.]/,
31
31
  stopwords: /\b(and|the|of|it|in|for)\b/,
32
32
  splits_text_on: /[\s\/\-\"\&\.]/,
33
- removes_characters_after_splitting: /[\.]/
33
+ removes_characters_after_splitting: /[\.]/,
34
+ normalizes_words: [[/\$(\w+)/i, '\1 dollars']],
35
+ reject_token_if: lambda { |token| token.blank? || token == :amistad }
34
36
 
35
37
  default_querying removes_characters: /[^a-zA-Z0-9äöü\s\/\-\,\&\"\~\*\:]/,
36
38
  stopwords: /\b(and|the|of|it|in|for)\b/,
@@ -48,6 +50,11 @@ describe Application do
48
50
  books_index.define_category :isbn,
49
51
  partial: Partial::None.new # Partially searching on an ISBN makes not much sense.
50
52
 
53
+ geo_index = index :geo, Sources::CSV.new(:location, :north, :east, file: 'data/ch.csv', col_sep: ',')
54
+ geo_index.define_category :location
55
+ geo_index.define_ranged_category(:north1, 1, precision: 3, from: :north)
56
+ .define_ranged_category(:east1, 1, precision: 3, from: :east)
57
+
51
58
  full = Query::Full.new books_index
52
59
  live = Query::Live.new books_index
53
60
 
data/spec/lib/cli_spec.rb CHANGED
@@ -10,6 +10,26 @@ require File.expand_path '../../../lib/picky/cli', __FILE__
10
10
  #
11
11
  describe Picky::CLI do
12
12
 
13
+ describe 'instance' do
14
+ before(:each) do
15
+ @cli = Picky::CLI.new
16
+ end
17
+ describe 'executor_class_for' do
18
+ it 'returns Help by default' do
19
+ @cli.executor_class_for.should == [Picky::CLI::Help]
20
+ end
21
+ it 'returns Generator for generate' do
22
+ @cli.executor_class_for(:generate).should == [Picky::CLI::Generate, "sinatra_client | unicorn_server | empty_unicorn_server", "app_directory_name (optional)"]
23
+ end
24
+ it 'returns Help for help' do
25
+ @cli.executor_class_for(:help).should == [Picky::CLI::Help]
26
+ end
27
+ it 'returns Statistics for stats' do
28
+ @cli.executor_class_for(:stats).should == [Picky::CLI::Statistics, "logfile, e.g. log/search.log", "port (optional)"]
29
+ end
30
+ end
31
+ end
32
+
13
33
  describe Picky::CLI::Base do
14
34
  before(:each) do
15
35
  @executor = Picky::CLI::Base.new
@@ -24,7 +24,7 @@ describe 'Configuration::Index' do
24
24
  @config.index_path(:some_bundle, :some_name).should_not equal(@config.index_path(:some_bundle, :some_name))
25
25
  end
26
26
  it "returns the right thing" do
27
- @config.index_path(:some_bundle, :some_name).should == 'some/search/root/index/test/some_index/some_category_some_bundle_some_name'
27
+ @config.index_path(:some_bundle, :some_name).should == 'spec/test_directory/index/test/some_index/some_category_some_bundle_some_name'
28
28
  end
29
29
  end
30
30
 
@@ -50,7 +50,7 @@ describe 'Configuration::Index' do
50
50
  @config.index_root.should equal(@config.index_root)
51
51
  end
52
52
  it "returns the right thing" do
53
- @config.index_root.should == 'some/search/root/index'
53
+ @config.index_root.should == 'spec/test_directory/index'
54
54
  end
55
55
  end
56
56
  describe "index_directory" do
@@ -58,7 +58,7 @@ describe 'Configuration::Index' do
58
58
  @config.index_directory.should equal(@config.index_directory)
59
59
  end
60
60
  it "returns the right thing" do
61
- @config.index_directory.should == 'some/search/root/index/test/some_index'
61
+ @config.index_directory.should == 'spec/test_directory/index/test/some_index'
62
62
  end
63
63
  end
64
64
  describe "prepared_index_path" do
@@ -66,12 +66,12 @@ describe 'Configuration::Index' do
66
66
  @config.prepared_index_path.should equal(@config.prepared_index_path)
67
67
  end
68
68
  it "returns the right thing" do
69
- @config.prepared_index_path.should == 'some/search/root/index/test/some_index/prepared_some_category_index'
69
+ @config.prepared_index_path.should == 'spec/test_directory/index/test/some_index/prepared_some_category_index'
70
70
  end
71
71
  end
72
72
  describe "prepare_index_directory" do
73
73
  it "calls the right thing" do
74
- FileUtils.should_receive(:mkdir_p).once.with 'some/search/root/index/test/some_index'
74
+ FileUtils.should_receive(:mkdir_p).once.with 'spec/test_directory/index/test/some_index'
75
75
 
76
76
  @config.prepare_index_directory
77
77
  end
@@ -64,7 +64,7 @@ describe Index::Files do
64
64
  it "uses the right file" do
65
65
  Yajl::Parser.stub! :parse
66
66
 
67
- File.should_receive(:open).once.with 'some/search/root/index/test/some_index/some_category_some_name_index.json', 'r'
67
+ File.should_receive(:open).once.with 'spec/test_directory/index/test/some_index/some_category_some_name_index.json', 'r'
68
68
 
69
69
  @files.load_index
70
70
  end
@@ -73,7 +73,7 @@ describe Index::Files do
73
73
  it "uses the right file" do
74
74
  Yajl::Parser.stub! :parse
75
75
 
76
- File.should_receive(:open).once.with 'some/search/root/index/test/some_index/some_category_some_name_weights.json', 'r'
76
+ File.should_receive(:open).once.with 'spec/test_directory/index/test/some_index/some_category_some_name_weights.json', 'r'
77
77
 
78
78
  @files.load_weights
79
79
  end
@@ -82,7 +82,7 @@ describe Index::Files do
82
82
  it "uses the right file" do
83
83
  Marshal.stub! :load
84
84
 
85
- File.should_receive(:open).once.with 'some/search/root/index/test/some_index/some_category_some_name_similarity.dump', 'r:binary'
85
+ File.should_receive(:open).once.with 'spec/test_directory/index/test/some_index/some_category_some_name_similarity.dump', 'r:binary'
86
86
 
87
87
  @files.load_similarity
88
88
  end
@@ -91,7 +91,7 @@ describe Index::Files do
91
91
  it "uses the right file" do
92
92
  Yajl::Parser.stub! :parse
93
93
 
94
- File.should_receive(:open).once.with 'some/search/root/index/test/some_index/some_category_some_name_configuration.json', 'r'
94
+ File.should_receive(:open).once.with 'spec/test_directory/index/test/some_index/some_category_some_name_configuration.json', 'r'
95
95
 
96
96
  @files.load_configuration
97
97
  end
@@ -60,7 +60,7 @@ describe Indexed::Bundle do
60
60
  it "uses the right file" do
61
61
  Yajl::Parser.stub! :parse
62
62
 
63
- File.should_receive(:open).once.with 'some/search/root/index/test/some_index/some_category_some_name_index.json', 'r'
63
+ File.should_receive(:open).once.with 'spec/test_directory/index/test/some_index/some_category_some_name_index.json', 'r'
64
64
 
65
65
  @bundle.load_index
66
66
  end
@@ -69,7 +69,7 @@ describe Indexed::Bundle do
69
69
  it "uses the right file" do
70
70
  Yajl::Parser.stub! :parse
71
71
 
72
- File.should_receive(:open).once.with 'some/search/root/index/test/some_index/some_category_some_name_weights.json', 'r'
72
+ File.should_receive(:open).once.with 'spec/test_directory/index/test/some_index/some_category_some_name_weights.json', 'r'
73
73
 
74
74
  @bundle.load_weights
75
75
  end
@@ -78,7 +78,7 @@ describe Indexed::Bundle do
78
78
  it "uses the right file" do
79
79
  Marshal.stub! :load
80
80
 
81
- File.should_receive(:open).once.with 'some/search/root/index/test/some_index/some_category_some_name_similarity.dump', 'r:binary'
81
+ File.should_receive(:open).once.with 'spec/test_directory/index/test/some_index/some_category_some_name_similarity.dump', 'r:binary'
82
82
 
83
83
  @bundle.load_similarity
84
84
  end
@@ -87,7 +87,7 @@ describe Indexed::Bundle do
87
87
  it "uses the right file" do
88
88
  Yajl::Parser.stub! :parse
89
89
 
90
- File.should_receive(:open).once.with 'some/search/root/index/test/some_index/some_category_some_name_configuration.json', 'r'
90
+ File.should_receive(:open).once.with 'spec/test_directory/index/test/some_index/some_category_some_name_configuration.json', 'r'
91
91
 
92
92
  @bundle.load_configuration
93
93
  end
@@ -2,7 +2,24 @@ require 'spec_helper'
2
2
 
3
3
  describe Indexed::Index do
4
4
 
5
- context "with categories" do
5
+ context 'without stubbed categories' do
6
+ before(:each) do
7
+ @index = Indexed::Index.new :some_index_name
8
+ end
9
+
10
+ describe 'define_category' do
11
+ it 'adds a new category to the categories' do
12
+ @index.define_category :some_category_name
13
+
14
+ @index.categories.categories.size.should == 1
15
+ end
16
+ it 'returns the new category' do
17
+ @index.define_category(:some_category_name).should be_kind_of(Indexed::Category)
18
+ end
19
+ end
20
+ end
21
+
22
+ context "with stubbed categories" do
6
23
  before(:each) do
7
24
  @categories = stub :categories
8
25
 
@@ -12,7 +29,7 @@ describe Indexed::Index do
12
29
 
13
30
  @index.stub! :categories => @categories
14
31
  end
15
-
32
+
16
33
  describe "load_from_cache" do
17
34
  it "delegates to each category" do
18
35
  @categories.should_receive(:load_from_cache).once.with
@@ -0,0 +1,36 @@
1
+ require 'spec_helper'
2
+
3
+ describe Indexing::Indexes do
4
+
5
+ before(:each) do
6
+ @indexes = Indexing::Indexes.new
7
+ end
8
+
9
+ describe 'indexes' do
10
+ it 'exists' do
11
+ lambda { @indexes.indexes }.should_not raise_error
12
+ end
13
+ it 'is empty by default' do
14
+ @indexes.indexes.should be_empty
15
+ end
16
+ end
17
+
18
+ describe 'clear' do
19
+ it 'clears the indexes' do
20
+ @indexes.register :some_index
21
+
22
+ @indexes.clear
23
+
24
+ @indexes.indexes.should == []
25
+ end
26
+ end
27
+
28
+ describe 'register' do
29
+ it 'adds the given index to the indexes' do
30
+ @indexes.register :some_index
31
+
32
+ @indexes.indexes.should == [:some_index]
33
+ end
34
+ end
35
+
36
+ end
@@ -53,7 +53,7 @@ describe Sources::DB do
53
53
  end
54
54
  context "with file" do
55
55
  it "opens the config file relative to root" do
56
- File.should_receive(:open).once.with 'some/search/root/app/bla.yml'
56
+ File.should_receive(:open).once.with 'spec/test_directory/app/bla.yml'
57
57
 
58
58
  @source.configure :file => 'app/bla.yml'
59
59
  end
@@ -3,167 +3,193 @@
3
3
  require 'spec_helper'
4
4
 
5
5
  describe Tokenizers::Base do
6
-
7
- before(:each) do
8
- @tokenizer = Tokenizers::Base.new
9
- end
10
6
 
11
- describe "substitute(s)_characters*" do
12
- it "doesn't substitute if there is no substituter" do
13
- @tokenizer.substitute_characters('abcdefghijklmnopqrstuvwxyzäöü').should == 'abcdefghijklmnopqrstuvwxyzäöü'
14
- end
15
- it "uses the substituter to replace characters" do
16
- @tokenizer.substitutes_characters_with CharacterSubstituters::WestEuropean.new
17
-
18
- @tokenizer.substitute_characters('abcdefghijklmnopqrstuvwxyzäöü').should == 'abcdefghijklmnopqrstuvwxyzaeoeue'
7
+ context 'with special instance' do
8
+ before(:each) do
9
+ @tokenizer = Tokenizers::Base.new reject_token_if: lambda { |token| token.to_s.length < 2 || token == :hello }
19
10
  end
20
- it "uses the european substituter as default" do
21
- @tokenizer.substitutes_characters_with
22
-
23
- @tokenizer.substitute_characters('abcdefghijklmnopqrstuvwxyzäöü').should == 'abcdefghijklmnopqrstuvwxyzaeoeue'
24
- end
25
- end
26
-
27
- describe "removes_characters_after_splitting" do
28
- context "without removes_characters_after_splitting called" do
29
- it "has remove_after_normalizing_illegals" do
30
- lambda { @tokenizer.remove_after_normalizing_illegals('any') }.should_not raise_error
31
- end
32
- it 'should define a remove_after_normalizing_illegals normalize_with_patterns does nothing' do
33
- unchanging = stub :unchanging
34
- @tokenizer.remove_after_normalizing_illegals unchanging
35
- end
11
+ it 'rejects tokens with length < 2' do
12
+ @tokenizer.reject([:'', :a, :ab, :abc]).should == [:ab, :abc]
36
13
  end
37
- context "with removes_characters_after_splitting called" do
38
- before(:each) do
39
- @tokenizer.removes_characters_after_splitting(/[afo]/)
40
- end
41
- it "has remove_after_normalizing_illegals" do
42
- lambda { @tokenizer.remove_after_normalizing_illegals('abcdefghijklmnop') }.should_not raise_error
43
- end
44
- it "removes illegal characters" do
45
- @tokenizer.remove_after_normalizing_illegals('abcdefghijklmnop').should == 'bcdeghijklmnp'
46
- end
14
+ it 'rejects tokens that are called :hello' do
15
+ @tokenizer.reject([:hel, :hell, :hello]).should == [:hel, :hell]
47
16
  end
48
17
  end
49
18
 
50
- describe "normalizes_words" do
51
- context "without normalizes_words called" do
52
- it "has normalize_with_patterns" do
53
- lambda { @tokenizer.normalize_with_patterns('any') }.should_not raise_error
54
- end
55
- it 'should define a method normalize_with_patterns does nothing' do
56
- unchanging = stub :unchanging
57
- @tokenizer.normalize_with_patterns(unchanging).should == unchanging
58
- end
59
- end
60
- context "with normalizes_words called" do
61
- before(:each) do
62
- @tokenizer.normalizes_words([
63
- [/st\./, 'sankt'],
64
- [/stras?s?e?/, 'str']
65
- ])
66
- end
67
- it "has normalize_with_patterns" do
68
- lambda { @tokenizer.normalize_with_patterns('a b/c.d') }.should_not raise_error
69
- end
70
- it "normalizes, but just the first one" do
71
- @tokenizer.normalize_with_patterns('st. wegstrasse').should == 'sankt wegstrasse'
72
- end
19
+ context 'with normal instance' do
20
+ before(:each) do
21
+ @tokenizer = Tokenizers::Base.new
73
22
  end
74
- end
75
-
76
- describe "splits_text_on" do
77
- context "without splits_text_on called" do
78
- it "has split" do
79
- lambda { @tokenizer.split('any') }.should_not raise_error
80
- end
81
- it 'should define a method split that splits by default on \s' do
82
- @tokenizer.split('a b/c.d').should == ['a', 'b/c.d']
23
+
24
+ describe 'reject_token_if' do
25
+ it 'rejects empty tokens by default' do
26
+ @tokenizer.reject(['a', nil, '', 'b']).should == ['a', 'b']
83
27
  end
84
- it 'splits text on /\s/ by default' do
85
- @tokenizer.split('this is a test').should == ['this', 'is', 'a', 'test']
28
+ it 'rejects tokens based on the given rejection criteria if set' do
29
+ @tokenizer.reject_token_if &:nil?
30
+
31
+ @tokenizer.reject(['a', nil, '', 'b']).should == ['a', '', 'b']
86
32
  end
87
33
  end
88
- context "with removes_characters called" do
89
- before(:each) do
90
- @tokenizer.splits_text_on(/[\s\.\/]/)
34
+
35
+ describe "substitute(s)_characters*" do
36
+ it "doesn't substitute if there is no substituter" do
37
+ @tokenizer.substitute_characters('abcdefghijklmnopqrstuvwxyzäöü').should == 'abcdefghijklmnopqrstuvwxyzäöü'
91
38
  end
92
- it "has split" do
93
- lambda { @tokenizer.split('a b/c.d') }.should_not raise_error
39
+ it "uses the substituter to replace characters" do
40
+ @tokenizer.substitutes_characters_with CharacterSubstituters::WestEuropean.new
41
+
42
+ @tokenizer.substitute_characters('abcdefghijklmnopqrstuvwxyzäöü').should == 'abcdefghijklmnopqrstuvwxyzaeoeue'
94
43
  end
95
- it "removes illegal characters" do
96
- @tokenizer.split('a b/c.d').should == ['a','b','c','d']
44
+ it "uses the european substituter as default" do
45
+ @tokenizer.substitutes_characters_with
46
+
47
+ @tokenizer.substitute_characters('abcdefghijklmnopqrstuvwxyzäöü').should == 'abcdefghijklmnopqrstuvwxyzaeoeue'
97
48
  end
98
49
  end
99
- end
100
-
101
- describe "removes_characters" do
102
- context "without removes_characters called" do
103
- it "has remove_illegals" do
104
- lambda { @tokenizer.remove_illegals('any') }.should_not raise_error
105
- end
106
- it 'should define a method remove_illegals that does nothing' do
107
- unchanging = stub :unchanging
108
- @tokenizer.remove_illegals unchanging
50
+
51
+ describe "removes_characters_after_splitting" do
52
+ context "without removes_characters_after_splitting called" do
53
+ it "has remove_after_normalizing_illegals" do
54
+ lambda { @tokenizer.remove_after_normalizing_illegals('any') }.should_not raise_error
55
+ end
56
+ it 'should define a remove_after_normalizing_illegals normalize_with_patterns does nothing' do
57
+ unchanging = stub :unchanging
58
+ @tokenizer.remove_after_normalizing_illegals unchanging
59
+ end
60
+ end
61
+ context "with removes_characters_after_splitting called" do
62
+ before(:each) do
63
+ @tokenizer.removes_characters_after_splitting(/[afo]/)
64
+ end
65
+ it "has remove_after_normalizing_illegals" do
66
+ lambda { @tokenizer.remove_after_normalizing_illegals('abcdefghijklmnop') }.should_not raise_error
67
+ end
68
+ it "removes illegal characters" do
69
+ @tokenizer.remove_after_normalizing_illegals('abcdefghijklmnop').should == 'bcdeghijklmnp'
70
+ end
109
71
  end
110
72
  end
111
- context "with removes_characters called" do
112
- before(:each) do
113
- @tokenizer.removes_characters(/[afo]/)
114
- end
115
- it "has remove_illegals" do
116
- lambda { @tokenizer.remove_illegals('abcdefghijklmnop') }.should_not raise_error
117
- end
118
- it "removes illegal characters" do
119
- @tokenizer.remove_illegals('abcdefghijklmnop').should == 'bcdeghijklmnp'
73
+
74
+ describe "normalizes_words" do
75
+ context "without normalizes_words called" do
76
+ it "has normalize_with_patterns" do
77
+ lambda { @tokenizer.normalize_with_patterns('any') }.should_not raise_error
78
+ end
79
+ it 'should define a method normalize_with_patterns does nothing' do
80
+ unchanging = stub :unchanging
81
+ @tokenizer.normalize_with_patterns(unchanging).should == unchanging
82
+ end
83
+ end
84
+ context "with normalizes_words called" do
85
+ before(:each) do
86
+ @tokenizer.normalizes_words([
87
+ [/st\./, 'sankt'],
88
+ [/stras?s?e?/, 'str']
89
+ ])
90
+ end
91
+ it "has normalize_with_patterns" do
92
+ lambda { @tokenizer.normalize_with_patterns('a b/c.d') }.should_not raise_error
93
+ end
94
+ it "normalizes, but just the first one" do
95
+ @tokenizer.normalize_with_patterns('st. wegstrasse').should == 'sankt wegstrasse'
96
+ end
120
97
  end
121
98
  end
122
- end
123
-
124
- describe 'stopwords' do
125
- context 'without stopwords given' do
126
- it 'should define a method remove_stopwords' do
127
- lambda { @tokenizer.remove_stopwords('from this text') }.should_not raise_error
128
- end
129
- it 'should define a method remove_stopwords that does nothing' do
130
- @tokenizer.remove_stopwords('from this text').should == 'from this text'
131
- end
132
- it 'should define a method remove_non_single_stopwords' do
133
- lambda { @tokenizer.remove_non_single_stopwords('from this text') }.should_not raise_error
134
-
99
+
100
+ describe "splits_text_on" do
101
+ context "without splits_text_on called" do
102
+ it "has split" do
103
+ lambda { @tokenizer.split('any') }.should_not raise_error
104
+ end
105
+ it 'should define a method split that splits by default on \s' do
106
+ @tokenizer.split('a b/c.d').should == ['a', 'b/c.d']
107
+ end
108
+ it 'splits text on /\s/ by default' do
109
+ @tokenizer.split('this is a test').should == ['this', 'is', 'a', 'test']
110
+ end
111
+ end
112
+ context "with removes_characters called" do
113
+ before(:each) do
114
+ @tokenizer.splits_text_on(/[\s\.\/]/)
115
+ end
116
+ it "has split" do
117
+ lambda { @tokenizer.split('a b/c.d') }.should_not raise_error
118
+ end
119
+ it "removes illegal characters" do
120
+ @tokenizer.split('a b/c.d').should == ['a','b','c','d']
121
+ end
135
122
  end
136
123
  end
137
- context 'with stopwords given' do
138
- before(:each) do
139
- @tokenizer.stopwords(/r|e/)
140
- end
141
- it 'should define a method remove_stopwords' do
142
- lambda { @tokenizer.remove_stopwords('from this text') }.should_not raise_error
143
- end
144
- it 'should define a method stopwords that removes stopwords' do
145
- @tokenizer.remove_stopwords('from this text').should == 'fom this txt'
146
- end
147
- it 'should define a method remove_non_single_stopwords' do
148
- lambda { @tokenizer.remove_non_single_stopwords('from this text') }.should_not raise_error
149
- end
150
- it 'should define a method remove_non_single_stopwords that removes non-single stopwords' do
151
- @tokenizer.remove_non_single_stopwords('rerere rerere').should == ' '
152
- end
153
- it 'should define a method remove_non_single_stopwords that does not single stopwords' do
154
- @tokenizer.remove_non_single_stopwords('rerere').should == 'rerere'
124
+
125
+ describe "removes_characters" do
126
+ context "without removes_characters called" do
127
+ it "has remove_illegals" do
128
+ lambda { @tokenizer.remove_illegals('any') }.should_not raise_error
129
+ end
130
+ it 'should define a method remove_illegals that does nothing' do
131
+ unchanging = stub :unchanging
132
+ @tokenizer.remove_illegals unchanging
133
+ end
134
+ end
135
+ context "with removes_characters called" do
136
+ before(:each) do
137
+ @tokenizer.removes_characters(/[afo]/)
138
+ end
139
+ it "has remove_illegals" do
140
+ lambda { @tokenizer.remove_illegals('abcdefghijklmnop') }.should_not raise_error
141
+ end
142
+ it "removes illegal characters" do
143
+ @tokenizer.remove_illegals('abcdefghijklmnop').should == 'bcdeghijklmnp'
144
+ end
155
145
  end
156
146
  end
157
- context 'error case' do
158
- before(:each) do
159
- @tokenizer.stopwords(/any/)
160
- end
161
- it 'should not remove non-single stopwords with a star' do
162
- @tokenizer.remove_non_single_stopwords('a*').should == 'a*'
163
- end
164
- it 'should not remove non-single stopwords with a tilde' do
165
- @tokenizer.remove_non_single_stopwords('a~').should == 'a~'
147
+
148
+ describe 'stopwords' do
149
+ context 'without stopwords given' do
150
+ it 'should define a method remove_stopwords' do
151
+ lambda { @tokenizer.remove_stopwords('from this text') }.should_not raise_error
152
+ end
153
+ it 'should define a method remove_stopwords that does nothing' do
154
+ @tokenizer.remove_stopwords('from this text').should == 'from this text'
155
+ end
156
+ it 'should define a method remove_non_single_stopwords' do
157
+ lambda { @tokenizer.remove_non_single_stopwords('from this text') }.should_not raise_error
158
+
159
+ end
160
+ end
161
+ context 'with stopwords given' do
162
+ before(:each) do
163
+ @tokenizer.stopwords(/r|e/)
164
+ end
165
+ it 'should define a method remove_stopwords' do
166
+ lambda { @tokenizer.remove_stopwords('from this text') }.should_not raise_error
167
+ end
168
+ it 'should define a method stopwords that removes stopwords' do
169
+ @tokenizer.remove_stopwords('from this text').should == 'fom this txt'
170
+ end
171
+ it 'should define a method remove_non_single_stopwords' do
172
+ lambda { @tokenizer.remove_non_single_stopwords('from this text') }.should_not raise_error
173
+ end
174
+ it 'should define a method remove_non_single_stopwords that removes non-single stopwords' do
175
+ @tokenizer.remove_non_single_stopwords('rerere rerere').should == ' '
176
+ end
177
+ it 'should define a method remove_non_single_stopwords that does not single stopwords' do
178
+ @tokenizer.remove_non_single_stopwords('rerere').should == 'rerere'
179
+ end
180
+ end
181
+ context 'error case' do
182
+ before(:each) do
183
+ @tokenizer.stopwords(/any/)
184
+ end
185
+ it 'should not remove non-single stopwords with a star' do
186
+ @tokenizer.remove_non_single_stopwords('a*').should == 'a*'
187
+ end
188
+ it 'should not remove non-single stopwords with a tilde' do
189
+ @tokenizer.remove_non_single_stopwords('a~').should == 'a~'
190
+ end
166
191
  end
167
192
  end
168
193
  end
194
+
169
195
  end
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 1
7
7
  - 2
8
- - 3
9
- version: 1.2.3
8
+ - 4
9
+ version: 1.2.4
10
10
  platform: ruby
11
11
  authors:
12
12
  - Florian Hanke
@@ -14,7 +14,7 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2010-12-28 00:00:00 +01:00
17
+ date: 2010-12-30 00:00:00 +01:00
18
18
  default_executable: picky
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
@@ -73,8 +73,6 @@ files:
73
73
  - lib/picky/extensions/module.rb
74
74
  - lib/picky/extensions/object.rb
75
75
  - lib/picky/extensions/symbol.rb
76
- - lib/picky/helpers/cache.rb
77
- - lib/picky/helpers/gc.rb
78
76
  - lib/picky/helpers/measuring.rb
79
77
  - lib/picky/index/bundle.rb
80
78
  - lib/picky/index/file/basic.rb
@@ -169,8 +167,6 @@ files:
169
167
  - spec/lib/extensions/module_spec.rb
170
168
  - spec/lib/extensions/object_spec.rb
171
169
  - spec/lib/extensions/symbol_spec.rb
172
- - spec/lib/helpers/cache_spec.rb
173
- - spec/lib/helpers/gc_spec.rb
174
170
  - spec/lib/helpers/measuring_spec.rb
175
171
  - spec/lib/index/bundle_spec.rb
176
172
  - spec/lib/index/file/basic_spec.rb
@@ -191,6 +187,7 @@ files:
191
187
  - spec/lib/indexing/bundle_spec.rb
192
188
  - spec/lib/indexing/category_spec.rb
193
189
  - spec/lib/indexing/index_spec.rb
190
+ - spec/lib/indexing/indexes_spec.rb
194
191
  - spec/lib/loader_spec.rb
195
192
  - spec/lib/loggers/search_spec.rb
196
193
  - spec/lib/query/allocation_spec.rb
@@ -276,8 +273,6 @@ test_files:
276
273
  - spec/lib/extensions/module_spec.rb
277
274
  - spec/lib/extensions/object_spec.rb
278
275
  - spec/lib/extensions/symbol_spec.rb
279
- - spec/lib/helpers/cache_spec.rb
280
- - spec/lib/helpers/gc_spec.rb
281
276
  - spec/lib/helpers/measuring_spec.rb
282
277
  - spec/lib/index/bundle_spec.rb
283
278
  - spec/lib/index/file/basic_spec.rb
@@ -298,6 +293,7 @@ test_files:
298
293
  - spec/lib/indexing/bundle_spec.rb
299
294
  - spec/lib/indexing/category_spec.rb
300
295
  - spec/lib/indexing/index_spec.rb
296
+ - spec/lib/indexing/indexes_spec.rb
301
297
  - spec/lib/loader_spec.rb
302
298
  - spec/lib/loggers/search_spec.rb
303
299
  - spec/lib/query/allocation_spec.rb
@@ -1,25 +0,0 @@
1
- # TODO Not used anymore? Remove.
2
- #
3
- module Helpers # :nodoc:all
4
-
5
- module Cache
6
- # This is a simple cache.
7
- # The store needs to be able to answer to [] and []=.
8
- #
9
- def cached store, key, &block
10
- # Get cached result
11
- #
12
- results = store[key]
13
- return results if results
14
-
15
- results = lambda(&block).call
16
-
17
- # Store results
18
- #
19
- store[key] = results
20
-
21
- results
22
- end
23
- end
24
-
25
- end
@@ -1,13 +0,0 @@
1
- # TODO Not used anymore? Remove.
2
- #
3
- module Helpers
4
- module GC
5
- def gc_disabled &block
6
- ::GC.disable
7
- block.call
8
- ::GC.enable
9
- ::GC.start
10
- end
11
- alias disabled gc_disabled
12
- end
13
- end
@@ -1,35 +0,0 @@
1
- require 'spec_helper'
2
-
3
- describe Helpers::Cache do
4
- include Helpers::Cache
5
-
6
- describe "#cached" do
7
- attr_reader :store, :key
8
- before(:each) do
9
- @store = {}
10
- @key = 'some key'
11
- end
12
- describe "not yet cached" do
13
- it "should cache" do
14
- store.should_receive(:[]=).once.with(@key, 'value')
15
- cached @store, @key do
16
- 'value'
17
- end
18
- end
19
- end
20
- describe "already cached" do
21
- before(:each) do
22
- cached @store, @key do
23
- 'value'
24
- end
25
- end
26
- it "should not cache" do
27
- store.should_receive(:[]=).never
28
- cached @store, @key do
29
- 'value'
30
- end
31
- end
32
- end
33
- end
34
-
35
- end
@@ -1,71 +0,0 @@
1
- require 'spec_helper'
2
-
3
- describe Helpers::GC do
4
- include Helpers::GC
5
-
6
- before(:each) do
7
- ::GC.stub!(:disable)
8
- ::GC.stub!(:enable)
9
- ::GC.stub!(:start)
10
- end
11
-
12
- describe "block calling" do
13
- it 'should call the block' do
14
- inside_block = mock :inside
15
- inside_block.should_receive(:call).once
16
-
17
- disabled do
18
- inside_block.call
19
- end
20
- end
21
- it 'should call the block' do
22
- inside_block = mock :inside
23
- inside_block.should_receive(:call).once
24
-
25
- gc_disabled do
26
- inside_block.call
27
- end
28
- end
29
- end
30
-
31
- describe "gc calls" do
32
- after(:each) do
33
- disabled {}
34
- end
35
- it 'should disable the garbage collector' do
36
- ::GC.should_receive(:disable)
37
- end
38
- it 'should enable the garbage collector' do
39
- ::GC.should_receive(:enable)
40
- end
41
- it 'should start the garbage collector' do
42
- ::GC.should_receive(:start)
43
- end
44
- it 'should disable the gc, call the block, enable the gc and start the gc' do
45
- ::GC.should_receive(:disable).ordered
46
- ::GC.should_receive(:enable).ordered
47
- ::GC.should_receive(:start).ordered
48
- end
49
- end
50
-
51
- describe "gc calls" do
52
- after(:each) do
53
- gc_disabled {}
54
- end
55
- it 'should disable the garbage collector' do
56
- ::GC.should_receive(:disable)
57
- end
58
- it 'should enable the garbage collector' do
59
- ::GC.should_receive(:enable)
60
- end
61
- it 'should start the garbage collector' do
62
- ::GC.should_receive(:start)
63
- end
64
- it 'should disable the gc, call the block, enable the gc and start the gc' do
65
- ::GC.should_receive(:disable).ordered
66
- ::GC.should_receive(:enable).ordered
67
- ::GC.should_receive(:start).ordered
68
- end
69
- end
70
-
71
- end