picky 1.2.3 → 1.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -179,7 +179,7 @@ class Application
179
179
  # * source: The source the data comes from. See Sources::Base. # TODO Sources (all).
180
180
  #
181
181
  # Options:
182
- # * result_identifier: # TODO Rename.
182
+ # * result_identifier: Use if you'd like a different identifier/name in the results JSON than the name of the index.
183
183
  #
184
184
  def index name, source, options = {}
185
185
  IndexAPI.new name, source, options
data/lib/picky/cli.rb CHANGED
@@ -9,10 +9,13 @@ module Picky
9
9
  # Note: By default, help is displayed. I.e. when no command is given.
10
10
  #
11
11
  def execute selector = nil, *args
12
- executor_class, *params = selector && @@mapping[selector.to_sym] || Help
12
+ executor_class, *params = executor_class_for selector
13
13
  executor = executor_class.new
14
14
  executor.execute selector, args, params
15
15
  end
16
+ def executor_class_for selector = nil
17
+ selector && @@mapping[selector.to_sym] || [Help]
18
+ end
16
19
 
17
20
  class Base
18
21
  def usage name, params
@@ -63,12 +66,10 @@ module Picky
63
66
 
64
67
  # Maps commands to the other gem's command.
65
68
  #
66
- # TODO Add optional params.
67
- #
68
69
  @@mapping = {
69
- :generate => [Generate, 'thing_to_generate: e.g. "unicorn_server"', :parameters],
70
+ :generate => [Generate, 'sinatra_client | unicorn_server | empty_unicorn_server', 'app_directory_name (optional)'],
70
71
  :help => [Help],
71
- :stats => [Statistics, 'logfile_to_use: e.g. log/search.log', 'port (optional)']
72
+ :stats => [Statistics, 'logfile, e.g. log/search.log', 'port (optional)']
72
73
  }
73
74
  def self.mapping
74
75
  @@mapping
@@ -35,8 +35,6 @@ class Array # :nodoc:all
35
35
  def sort_by_levenshtein! from
36
36
  from = from.to_s
37
37
  sort! do |this, that|
38
- # TODO Cache for speed?
39
- #
40
38
  Text::Levenshtein.distance(this.to_s, from) <=> Text::Levenshtein.distance(that.to_s, from)
41
39
  end
42
40
  end
@@ -60,13 +60,11 @@ module Indexed
60
60
 
61
61
  # Returns possible Combinations for the token.
62
62
  #
63
- # The categories param is an optimization.
63
+ # The preselected_categories param is an optimization.
64
64
  #
65
65
  # TODO Return [RemovedCategory(token, nil)]
66
66
  # If the search is ...
67
67
  #
68
- # TODO Make categories also a collection class.
69
- #
70
68
  # TODO Return [] if not ok, nil if needs to be removed?
71
69
  # Somehow unnice, but…
72
70
  #
@@ -65,7 +65,7 @@ module Indexed
65
65
  # Returns the right index bundle for this token.
66
66
  #
67
67
  def bundle_for token
68
- token.partial? ? partial : exact
68
+ token.partial?? partial : exact
69
69
  end
70
70
 
71
71
  # The partial strategy defines whether to really use the partial index.
@@ -18,7 +18,7 @@ module Indexed
18
18
  @categories = Categories.new ignore_unassigned_tokens: ignore_unassigned_tokens
19
19
  end
20
20
 
21
- # TODO Spec. Doc.
21
+ # TODO Doc.
22
22
  #
23
23
  def define_category category_name, options = {}
24
24
  new_category = Category.new category_name, self, options
@@ -26,7 +26,9 @@ module Indexed
26
26
  new_category
27
27
  end
28
28
 
29
+ # Return the possible combinations for this token.
29
30
  #
31
+ # A combination is a tuple <token, index_bundle>.
30
32
  #
31
33
  def possible_combinations token
32
34
  categories.possible_combinations_for token
@@ -2,7 +2,7 @@
2
2
  #
3
3
  module Indexed
4
4
 
5
- # FIXME and spec
5
+ # TODO Spec
6
6
  #
7
7
  module Wrappers
8
8
 
@@ -29,7 +29,7 @@ class IndexesAPI # :nodoc:all
29
29
  self.index_mapping[index.name] = index
30
30
 
31
31
  @indexing.register index.indexing
32
- @indexed.register index.indexed # TODO Even necessary?
32
+ @indexed.register index.indexed
33
33
  end
34
34
 
35
35
  def [] name
@@ -17,7 +17,7 @@ module Indexing
17
17
  clear
18
18
  end
19
19
 
20
- # TODO Spec.
20
+ # TODO Doc.
21
21
  #
22
22
  def clear
23
23
  @indexes = []
data/lib/picky/loader.rb CHANGED
@@ -100,8 +100,6 @@ module Loader # :nodoc:all
100
100
 
101
101
  # Requiring Helpers
102
102
  #
103
- load_relative 'helpers/gc'
104
- load_relative 'helpers/cache'
105
103
  load_relative 'helpers/measuring'
106
104
 
107
105
  # Character Substituters
@@ -63,7 +63,6 @@ module Query
63
63
  # But still TODO try for a faster one.
64
64
  #
65
65
  def random_ids amount = 1
66
- # TODO can there be no @allocations???
67
66
  return [] if @allocations.empty?
68
67
  ids = @allocations.first.ids
69
68
  indexes = Array.new(ids.size) { |id| id }.sort_by { rand }
data/lib/picky/signals.rb CHANGED
@@ -1,11 +1,8 @@
1
1
  # TODO Cleanup and move to project_prototype.
2
2
  #
3
3
  # Signal.trap 'USR1' do
4
- # Indexed.reload
4
+ # Indexes.reload
5
5
  # end
6
6
  # Signal.trap 'USR2' do
7
7
  # Loader.reload
8
- # end
9
- # Signal.trap 'INT' do
10
- # exit!
11
8
  # end
@@ -82,6 +82,17 @@ module Tokenizers # :nodoc:all
82
82
  substituter?? substituter.substitute(text) : text
83
83
  end
84
84
 
85
+ # Reject tokens after tokenizing based on the given criteria.
86
+ #
87
+ # Note: Currently only for indexing. TODO Redesign and write for both!
88
+ #
89
+ def reject_token_if &condition
90
+ @reject_condition = condition
91
+ end
92
+ def reject tokens
93
+ tokens.reject! &@reject_condition
94
+ end
95
+
85
96
 
86
97
  # Returns a number of tokens, generated from the given text.
87
98
  #
@@ -111,6 +122,7 @@ module Tokenizers # :nodoc:all
111
122
  # Defaults.
112
123
  #
113
124
  splits_text_on options[:splits_text_on] || /\s/
125
+ reject_token_if &(options[:reject_token_if] || :blank?)
114
126
  end
115
127
 
116
128
  # Hooks.
@@ -125,15 +137,10 @@ module Tokenizers # :nodoc:all
125
137
  # Postprocessing.
126
138
  #
127
139
  def process tokens
128
- reject tokens # Reject any tokens that don't meet criteria
140
+ reject tokens # Reject any tokens that don't meet criteria
129
141
  tokens
130
142
  end
131
143
 
132
- # Rejects blank tokens.
133
- #
134
- def reject tokens
135
- tokens.reject! &:blank?
136
- end
137
144
  # Converts words into real tokens.
138
145
  #
139
146
  def tokens_for words
@@ -39,8 +39,6 @@ module Tokenizers
39
39
  # 1. Split the text into words.
40
40
  # 2. Normalize each word.
41
41
  #
42
- # TODO Rename into wordize? Or somesuch?
43
- #
44
42
  def pretokenize text
45
43
  words = split text
46
44
  words.collect! do |word|
@@ -56,16 +54,5 @@ module Tokenizers
56
54
  symbolize text
57
55
  end
58
56
 
59
- # Rejects tokens if they are too short (or blank).
60
- #
61
- # Override in subclasses to redefine behaviour.
62
- #
63
- # TODO TODO TODO Make parametrizable! reject { |token| }
64
- #
65
- def reject tokens
66
- tokens.reject! &:blank?
67
- # tokens.reject! { |token| token.to_s.size < 2 }
68
- end
69
-
70
57
  end
71
58
  end
@@ -57,7 +57,7 @@ module Tokenizers
57
57
  # TODO Perhaps move to Normalizer?
58
58
  #
59
59
  def normalize text
60
- text = substitute_characters text # Substitute special characters TODO Move to subclass
60
+ text = substitute_characters text # Substitute special characters
61
61
  text.downcase! # Downcase all text
62
62
  normalize_with_patterns text # normalize
63
63
  text.to_sym # symbolize
@@ -24,13 +24,15 @@ describe Application do
24
24
  end
25
25
  it "should run ok" do
26
26
  lambda {
27
- # TODO Add all possible cases.
27
+ # Here we just test if the API can be called ok.
28
28
  #
29
29
  class TestApplication < Application
30
30
  default_indexing removes_characters: /[^a-zA-Z0-9\s\/\-\"\&\.]/,
31
31
  stopwords: /\b(and|the|of|it|in|for)\b/,
32
32
  splits_text_on: /[\s\/\-\"\&\.]/,
33
- removes_characters_after_splitting: /[\.]/
33
+ removes_characters_after_splitting: /[\.]/,
34
+ normalizes_words: [[/\$(\w+)/i, '\1 dollars']],
35
+ reject_token_if: lambda { |token| token.blank? || token == :amistad }
34
36
 
35
37
  default_querying removes_characters: /[^a-zA-Z0-9äöü\s\/\-\,\&\"\~\*\:]/,
36
38
  stopwords: /\b(and|the|of|it|in|for)\b/,
@@ -48,6 +50,11 @@ describe Application do
48
50
  books_index.define_category :isbn,
49
51
  partial: Partial::None.new # Partially searching on an ISBN makes not much sense.
50
52
 
53
+ geo_index = index :geo, Sources::CSV.new(:location, :north, :east, file: 'data/ch.csv', col_sep: ',')
54
+ geo_index.define_category :location
55
+ geo_index.define_ranged_category(:north1, 1, precision: 3, from: :north)
56
+ .define_ranged_category(:east1, 1, precision: 3, from: :east)
57
+
51
58
  full = Query::Full.new books_index
52
59
  live = Query::Live.new books_index
53
60
 
data/spec/lib/cli_spec.rb CHANGED
@@ -10,6 +10,26 @@ require File.expand_path '../../../lib/picky/cli', __FILE__
10
10
  #
11
11
  describe Picky::CLI do
12
12
 
13
+ describe 'instance' do
14
+ before(:each) do
15
+ @cli = Picky::CLI.new
16
+ end
17
+ describe 'executor_class_for' do
18
+ it 'returns Help by default' do
19
+ @cli.executor_class_for.should == [Picky::CLI::Help]
20
+ end
21
+ it 'returns Generator for generate' do
22
+ @cli.executor_class_for(:generate).should == [Picky::CLI::Generate, "sinatra_client | unicorn_server | empty_unicorn_server", "app_directory_name (optional)"]
23
+ end
24
+ it 'returns Help for help' do
25
+ @cli.executor_class_for(:help).should == [Picky::CLI::Help]
26
+ end
27
+ it 'returns Statistics for stats' do
28
+ @cli.executor_class_for(:stats).should == [Picky::CLI::Statistics, "logfile, e.g. log/search.log", "port (optional)"]
29
+ end
30
+ end
31
+ end
32
+
13
33
  describe Picky::CLI::Base do
14
34
  before(:each) do
15
35
  @executor = Picky::CLI::Base.new
@@ -24,7 +24,7 @@ describe 'Configuration::Index' do
24
24
  @config.index_path(:some_bundle, :some_name).should_not equal(@config.index_path(:some_bundle, :some_name))
25
25
  end
26
26
  it "returns the right thing" do
27
- @config.index_path(:some_bundle, :some_name).should == 'some/search/root/index/test/some_index/some_category_some_bundle_some_name'
27
+ @config.index_path(:some_bundle, :some_name).should == 'spec/test_directory/index/test/some_index/some_category_some_bundle_some_name'
28
28
  end
29
29
  end
30
30
 
@@ -50,7 +50,7 @@ describe 'Configuration::Index' do
50
50
  @config.index_root.should equal(@config.index_root)
51
51
  end
52
52
  it "returns the right thing" do
53
- @config.index_root.should == 'some/search/root/index'
53
+ @config.index_root.should == 'spec/test_directory/index'
54
54
  end
55
55
  end
56
56
  describe "index_directory" do
@@ -58,7 +58,7 @@ describe 'Configuration::Index' do
58
58
  @config.index_directory.should equal(@config.index_directory)
59
59
  end
60
60
  it "returns the right thing" do
61
- @config.index_directory.should == 'some/search/root/index/test/some_index'
61
+ @config.index_directory.should == 'spec/test_directory/index/test/some_index'
62
62
  end
63
63
  end
64
64
  describe "prepared_index_path" do
@@ -66,12 +66,12 @@ describe 'Configuration::Index' do
66
66
  @config.prepared_index_path.should equal(@config.prepared_index_path)
67
67
  end
68
68
  it "returns the right thing" do
69
- @config.prepared_index_path.should == 'some/search/root/index/test/some_index/prepared_some_category_index'
69
+ @config.prepared_index_path.should == 'spec/test_directory/index/test/some_index/prepared_some_category_index'
70
70
  end
71
71
  end
72
72
  describe "prepare_index_directory" do
73
73
  it "calls the right thing" do
74
- FileUtils.should_receive(:mkdir_p).once.with 'some/search/root/index/test/some_index'
74
+ FileUtils.should_receive(:mkdir_p).once.with 'spec/test_directory/index/test/some_index'
75
75
 
76
76
  @config.prepare_index_directory
77
77
  end
@@ -64,7 +64,7 @@ describe Index::Files do
64
64
  it "uses the right file" do
65
65
  Yajl::Parser.stub! :parse
66
66
 
67
- File.should_receive(:open).once.with 'some/search/root/index/test/some_index/some_category_some_name_index.json', 'r'
67
+ File.should_receive(:open).once.with 'spec/test_directory/index/test/some_index/some_category_some_name_index.json', 'r'
68
68
 
69
69
  @files.load_index
70
70
  end
@@ -73,7 +73,7 @@ describe Index::Files do
73
73
  it "uses the right file" do
74
74
  Yajl::Parser.stub! :parse
75
75
 
76
- File.should_receive(:open).once.with 'some/search/root/index/test/some_index/some_category_some_name_weights.json', 'r'
76
+ File.should_receive(:open).once.with 'spec/test_directory/index/test/some_index/some_category_some_name_weights.json', 'r'
77
77
 
78
78
  @files.load_weights
79
79
  end
@@ -82,7 +82,7 @@ describe Index::Files do
82
82
  it "uses the right file" do
83
83
  Marshal.stub! :load
84
84
 
85
- File.should_receive(:open).once.with 'some/search/root/index/test/some_index/some_category_some_name_similarity.dump', 'r:binary'
85
+ File.should_receive(:open).once.with 'spec/test_directory/index/test/some_index/some_category_some_name_similarity.dump', 'r:binary'
86
86
 
87
87
  @files.load_similarity
88
88
  end
@@ -91,7 +91,7 @@ describe Index::Files do
91
91
  it "uses the right file" do
92
92
  Yajl::Parser.stub! :parse
93
93
 
94
- File.should_receive(:open).once.with 'some/search/root/index/test/some_index/some_category_some_name_configuration.json', 'r'
94
+ File.should_receive(:open).once.with 'spec/test_directory/index/test/some_index/some_category_some_name_configuration.json', 'r'
95
95
 
96
96
  @files.load_configuration
97
97
  end
@@ -60,7 +60,7 @@ describe Indexed::Bundle do
60
60
  it "uses the right file" do
61
61
  Yajl::Parser.stub! :parse
62
62
 
63
- File.should_receive(:open).once.with 'some/search/root/index/test/some_index/some_category_some_name_index.json', 'r'
63
+ File.should_receive(:open).once.with 'spec/test_directory/index/test/some_index/some_category_some_name_index.json', 'r'
64
64
 
65
65
  @bundle.load_index
66
66
  end
@@ -69,7 +69,7 @@ describe Indexed::Bundle do
69
69
  it "uses the right file" do
70
70
  Yajl::Parser.stub! :parse
71
71
 
72
- File.should_receive(:open).once.with 'some/search/root/index/test/some_index/some_category_some_name_weights.json', 'r'
72
+ File.should_receive(:open).once.with 'spec/test_directory/index/test/some_index/some_category_some_name_weights.json', 'r'
73
73
 
74
74
  @bundle.load_weights
75
75
  end
@@ -78,7 +78,7 @@ describe Indexed::Bundle do
78
78
  it "uses the right file" do
79
79
  Marshal.stub! :load
80
80
 
81
- File.should_receive(:open).once.with 'some/search/root/index/test/some_index/some_category_some_name_similarity.dump', 'r:binary'
81
+ File.should_receive(:open).once.with 'spec/test_directory/index/test/some_index/some_category_some_name_similarity.dump', 'r:binary'
82
82
 
83
83
  @bundle.load_similarity
84
84
  end
@@ -87,7 +87,7 @@ describe Indexed::Bundle do
87
87
  it "uses the right file" do
88
88
  Yajl::Parser.stub! :parse
89
89
 
90
- File.should_receive(:open).once.with 'some/search/root/index/test/some_index/some_category_some_name_configuration.json', 'r'
90
+ File.should_receive(:open).once.with 'spec/test_directory/index/test/some_index/some_category_some_name_configuration.json', 'r'
91
91
 
92
92
  @bundle.load_configuration
93
93
  end
@@ -2,7 +2,24 @@ require 'spec_helper'
2
2
 
3
3
  describe Indexed::Index do
4
4
 
5
- context "with categories" do
5
+ context 'without stubbed categories' do
6
+ before(:each) do
7
+ @index = Indexed::Index.new :some_index_name
8
+ end
9
+
10
+ describe 'define_category' do
11
+ it 'adds a new category to the categories' do
12
+ @index.define_category :some_category_name
13
+
14
+ @index.categories.categories.size.should == 1
15
+ end
16
+ it 'returns the new category' do
17
+ @index.define_category(:some_category_name).should be_kind_of(Indexed::Category)
18
+ end
19
+ end
20
+ end
21
+
22
+ context "with stubbed categories" do
6
23
  before(:each) do
7
24
  @categories = stub :categories
8
25
 
@@ -12,7 +29,7 @@ describe Indexed::Index do
12
29
 
13
30
  @index.stub! :categories => @categories
14
31
  end
15
-
32
+
16
33
  describe "load_from_cache" do
17
34
  it "delegates to each category" do
18
35
  @categories.should_receive(:load_from_cache).once.with
@@ -0,0 +1,36 @@
1
+ require 'spec_helper'
2
+
3
+ describe Indexing::Indexes do
4
+
5
+ before(:each) do
6
+ @indexes = Indexing::Indexes.new
7
+ end
8
+
9
+ describe 'indexes' do
10
+ it 'exists' do
11
+ lambda { @indexes.indexes }.should_not raise_error
12
+ end
13
+ it 'is empty by default' do
14
+ @indexes.indexes.should be_empty
15
+ end
16
+ end
17
+
18
+ describe 'clear' do
19
+ it 'clears the indexes' do
20
+ @indexes.register :some_index
21
+
22
+ @indexes.clear
23
+
24
+ @indexes.indexes.should == []
25
+ end
26
+ end
27
+
28
+ describe 'register' do
29
+ it 'adds the given index to the indexes' do
30
+ @indexes.register :some_index
31
+
32
+ @indexes.indexes.should == [:some_index]
33
+ end
34
+ end
35
+
36
+ end
@@ -53,7 +53,7 @@ describe Sources::DB do
53
53
  end
54
54
  context "with file" do
55
55
  it "opens the config file relative to root" do
56
- File.should_receive(:open).once.with 'some/search/root/app/bla.yml'
56
+ File.should_receive(:open).once.with 'spec/test_directory/app/bla.yml'
57
57
 
58
58
  @source.configure :file => 'app/bla.yml'
59
59
  end
@@ -3,167 +3,193 @@
3
3
  require 'spec_helper'
4
4
 
5
5
  describe Tokenizers::Base do
6
-
7
- before(:each) do
8
- @tokenizer = Tokenizers::Base.new
9
- end
10
6
 
11
- describe "substitute(s)_characters*" do
12
- it "doesn't substitute if there is no substituter" do
13
- @tokenizer.substitute_characters('abcdefghijklmnopqrstuvwxyzäöü').should == 'abcdefghijklmnopqrstuvwxyzäöü'
14
- end
15
- it "uses the substituter to replace characters" do
16
- @tokenizer.substitutes_characters_with CharacterSubstituters::WestEuropean.new
17
-
18
- @tokenizer.substitute_characters('abcdefghijklmnopqrstuvwxyzäöü').should == 'abcdefghijklmnopqrstuvwxyzaeoeue'
7
+ context 'with special instance' do
8
+ before(:each) do
9
+ @tokenizer = Tokenizers::Base.new reject_token_if: lambda { |token| token.to_s.length < 2 || token == :hello }
19
10
  end
20
- it "uses the european substituter as default" do
21
- @tokenizer.substitutes_characters_with
22
-
23
- @tokenizer.substitute_characters('abcdefghijklmnopqrstuvwxyzäöü').should == 'abcdefghijklmnopqrstuvwxyzaeoeue'
24
- end
25
- end
26
-
27
- describe "removes_characters_after_splitting" do
28
- context "without removes_characters_after_splitting called" do
29
- it "has remove_after_normalizing_illegals" do
30
- lambda { @tokenizer.remove_after_normalizing_illegals('any') }.should_not raise_error
31
- end
32
- it 'should define a remove_after_normalizing_illegals normalize_with_patterns does nothing' do
33
- unchanging = stub :unchanging
34
- @tokenizer.remove_after_normalizing_illegals unchanging
35
- end
11
+ it 'rejects tokens with length < 2' do
12
+ @tokenizer.reject([:'', :a, :ab, :abc]).should == [:ab, :abc]
36
13
  end
37
- context "with removes_characters_after_splitting called" do
38
- before(:each) do
39
- @tokenizer.removes_characters_after_splitting(/[afo]/)
40
- end
41
- it "has remove_after_normalizing_illegals" do
42
- lambda { @tokenizer.remove_after_normalizing_illegals('abcdefghijklmnop') }.should_not raise_error
43
- end
44
- it "removes illegal characters" do
45
- @tokenizer.remove_after_normalizing_illegals('abcdefghijklmnop').should == 'bcdeghijklmnp'
46
- end
14
+ it 'rejects tokens that are called :hello' do
15
+ @tokenizer.reject([:hel, :hell, :hello]).should == [:hel, :hell]
47
16
  end
48
17
  end
49
18
 
50
- describe "normalizes_words" do
51
- context "without normalizes_words called" do
52
- it "has normalize_with_patterns" do
53
- lambda { @tokenizer.normalize_with_patterns('any') }.should_not raise_error
54
- end
55
- it 'should define a method normalize_with_patterns does nothing' do
56
- unchanging = stub :unchanging
57
- @tokenizer.normalize_with_patterns(unchanging).should == unchanging
58
- end
59
- end
60
- context "with normalizes_words called" do
61
- before(:each) do
62
- @tokenizer.normalizes_words([
63
- [/st\./, 'sankt'],
64
- [/stras?s?e?/, 'str']
65
- ])
66
- end
67
- it "has normalize_with_patterns" do
68
- lambda { @tokenizer.normalize_with_patterns('a b/c.d') }.should_not raise_error
69
- end
70
- it "normalizes, but just the first one" do
71
- @tokenizer.normalize_with_patterns('st. wegstrasse').should == 'sankt wegstrasse'
72
- end
19
+ context 'with normal instance' do
20
+ before(:each) do
21
+ @tokenizer = Tokenizers::Base.new
73
22
  end
74
- end
75
-
76
- describe "splits_text_on" do
77
- context "without splits_text_on called" do
78
- it "has split" do
79
- lambda { @tokenizer.split('any') }.should_not raise_error
80
- end
81
- it 'should define a method split that splits by default on \s' do
82
- @tokenizer.split('a b/c.d').should == ['a', 'b/c.d']
23
+
24
+ describe 'reject_token_if' do
25
+ it 'rejects empty tokens by default' do
26
+ @tokenizer.reject(['a', nil, '', 'b']).should == ['a', 'b']
83
27
  end
84
- it 'splits text on /\s/ by default' do
85
- @tokenizer.split('this is a test').should == ['this', 'is', 'a', 'test']
28
+ it 'rejects tokens based on the given rejection criteria if set' do
29
+ @tokenizer.reject_token_if &:nil?
30
+
31
+ @tokenizer.reject(['a', nil, '', 'b']).should == ['a', '', 'b']
86
32
  end
87
33
  end
88
- context "with removes_characters called" do
89
- before(:each) do
90
- @tokenizer.splits_text_on(/[\s\.\/]/)
34
+
35
+ describe "substitute(s)_characters*" do
36
+ it "doesn't substitute if there is no substituter" do
37
+ @tokenizer.substitute_characters('abcdefghijklmnopqrstuvwxyzäöü').should == 'abcdefghijklmnopqrstuvwxyzäöü'
91
38
  end
92
- it "has split" do
93
- lambda { @tokenizer.split('a b/c.d') }.should_not raise_error
39
+ it "uses the substituter to replace characters" do
40
+ @tokenizer.substitutes_characters_with CharacterSubstituters::WestEuropean.new
41
+
42
+ @tokenizer.substitute_characters('abcdefghijklmnopqrstuvwxyzäöü').should == 'abcdefghijklmnopqrstuvwxyzaeoeue'
94
43
  end
95
- it "removes illegal characters" do
96
- @tokenizer.split('a b/c.d').should == ['a','b','c','d']
44
+ it "uses the european substituter as default" do
45
+ @tokenizer.substitutes_characters_with
46
+
47
+ @tokenizer.substitute_characters('abcdefghijklmnopqrstuvwxyzäöü').should == 'abcdefghijklmnopqrstuvwxyzaeoeue'
97
48
  end
98
49
  end
99
- end
100
-
101
- describe "removes_characters" do
102
- context "without removes_characters called" do
103
- it "has remove_illegals" do
104
- lambda { @tokenizer.remove_illegals('any') }.should_not raise_error
105
- end
106
- it 'should define a method remove_illegals that does nothing' do
107
- unchanging = stub :unchanging
108
- @tokenizer.remove_illegals unchanging
50
+
51
+ describe "removes_characters_after_splitting" do
52
+ context "without removes_characters_after_splitting called" do
53
+ it "has remove_after_normalizing_illegals" do
54
+ lambda { @tokenizer.remove_after_normalizing_illegals('any') }.should_not raise_error
55
+ end
56
+ it 'should define a remove_after_normalizing_illegals normalize_with_patterns does nothing' do
57
+ unchanging = stub :unchanging
58
+ @tokenizer.remove_after_normalizing_illegals unchanging
59
+ end
60
+ end
61
+ context "with removes_characters_after_splitting called" do
62
+ before(:each) do
63
+ @tokenizer.removes_characters_after_splitting(/[afo]/)
64
+ end
65
+ it "has remove_after_normalizing_illegals" do
66
+ lambda { @tokenizer.remove_after_normalizing_illegals('abcdefghijklmnop') }.should_not raise_error
67
+ end
68
+ it "removes illegal characters" do
69
+ @tokenizer.remove_after_normalizing_illegals('abcdefghijklmnop').should == 'bcdeghijklmnp'
70
+ end
109
71
  end
110
72
  end
111
- context "with removes_characters called" do
112
- before(:each) do
113
- @tokenizer.removes_characters(/[afo]/)
114
- end
115
- it "has remove_illegals" do
116
- lambda { @tokenizer.remove_illegals('abcdefghijklmnop') }.should_not raise_error
117
- end
118
- it "removes illegal characters" do
119
- @tokenizer.remove_illegals('abcdefghijklmnop').should == 'bcdeghijklmnp'
73
+
74
+ describe "normalizes_words" do
75
+ context "without normalizes_words called" do
76
+ it "has normalize_with_patterns" do
77
+ lambda { @tokenizer.normalize_with_patterns('any') }.should_not raise_error
78
+ end
79
+ it 'should define a method normalize_with_patterns does nothing' do
80
+ unchanging = stub :unchanging
81
+ @tokenizer.normalize_with_patterns(unchanging).should == unchanging
82
+ end
83
+ end
84
+ context "with normalizes_words called" do
85
+ before(:each) do
86
+ @tokenizer.normalizes_words([
87
+ [/st\./, 'sankt'],
88
+ [/stras?s?e?/, 'str']
89
+ ])
90
+ end
91
+ it "has normalize_with_patterns" do
92
+ lambda { @tokenizer.normalize_with_patterns('a b/c.d') }.should_not raise_error
93
+ end
94
+ it "normalizes, but just the first one" do
95
+ @tokenizer.normalize_with_patterns('st. wegstrasse').should == 'sankt wegstrasse'
96
+ end
120
97
  end
121
98
  end
122
- end
123
-
124
- describe 'stopwords' do
125
- context 'without stopwords given' do
126
- it 'should define a method remove_stopwords' do
127
- lambda { @tokenizer.remove_stopwords('from this text') }.should_not raise_error
128
- end
129
- it 'should define a method remove_stopwords that does nothing' do
130
- @tokenizer.remove_stopwords('from this text').should == 'from this text'
131
- end
132
- it 'should define a method remove_non_single_stopwords' do
133
- lambda { @tokenizer.remove_non_single_stopwords('from this text') }.should_not raise_error
134
-
99
+
100
+ describe "splits_text_on" do
101
+ context "without splits_text_on called" do
102
+ it "has split" do
103
+ lambda { @tokenizer.split('any') }.should_not raise_error
104
+ end
105
+ it 'should define a method split that splits by default on \s' do
106
+ @tokenizer.split('a b/c.d').should == ['a', 'b/c.d']
107
+ end
108
+ it 'splits text on /\s/ by default' do
109
+ @tokenizer.split('this is a test').should == ['this', 'is', 'a', 'test']
110
+ end
111
+ end
112
+ context "with removes_characters called" do
113
+ before(:each) do
114
+ @tokenizer.splits_text_on(/[\s\.\/]/)
115
+ end
116
+ it "has split" do
117
+ lambda { @tokenizer.split('a b/c.d') }.should_not raise_error
118
+ end
119
+ it "removes illegal characters" do
120
+ @tokenizer.split('a b/c.d').should == ['a','b','c','d']
121
+ end
135
122
  end
136
123
  end
137
- context 'with stopwords given' do
138
- before(:each) do
139
- @tokenizer.stopwords(/r|e/)
140
- end
141
- it 'should define a method remove_stopwords' do
142
- lambda { @tokenizer.remove_stopwords('from this text') }.should_not raise_error
143
- end
144
- it 'should define a method stopwords that removes stopwords' do
145
- @tokenizer.remove_stopwords('from this text').should == 'fom this txt'
146
- end
147
- it 'should define a method remove_non_single_stopwords' do
148
- lambda { @tokenizer.remove_non_single_stopwords('from this text') }.should_not raise_error
149
- end
150
- it 'should define a method remove_non_single_stopwords that removes non-single stopwords' do
151
- @tokenizer.remove_non_single_stopwords('rerere rerere').should == ' '
152
- end
153
- it 'should define a method remove_non_single_stopwords that does not single stopwords' do
154
- @tokenizer.remove_non_single_stopwords('rerere').should == 'rerere'
124
+
125
+ describe "removes_characters" do
126
+ context "without removes_characters called" do
127
+ it "has remove_illegals" do
128
+ lambda { @tokenizer.remove_illegals('any') }.should_not raise_error
129
+ end
130
+ it 'should define a method remove_illegals that does nothing' do
131
+ unchanging = stub :unchanging
132
+ @tokenizer.remove_illegals unchanging
133
+ end
134
+ end
135
+ context "with removes_characters called" do
136
+ before(:each) do
137
+ @tokenizer.removes_characters(/[afo]/)
138
+ end
139
+ it "has remove_illegals" do
140
+ lambda { @tokenizer.remove_illegals('abcdefghijklmnop') }.should_not raise_error
141
+ end
142
+ it "removes illegal characters" do
143
+ @tokenizer.remove_illegals('abcdefghijklmnop').should == 'bcdeghijklmnp'
144
+ end
155
145
  end
156
146
  end
157
- context 'error case' do
158
- before(:each) do
159
- @tokenizer.stopwords(/any/)
160
- end
161
- it 'should not remove non-single stopwords with a star' do
162
- @tokenizer.remove_non_single_stopwords('a*').should == 'a*'
163
- end
164
- it 'should not remove non-single stopwords with a tilde' do
165
- @tokenizer.remove_non_single_stopwords('a~').should == 'a~'
147
+
148
+ describe 'stopwords' do
149
+ context 'without stopwords given' do
150
+ it 'should define a method remove_stopwords' do
151
+ lambda { @tokenizer.remove_stopwords('from this text') }.should_not raise_error
152
+ end
153
+ it 'should define a method remove_stopwords that does nothing' do
154
+ @tokenizer.remove_stopwords('from this text').should == 'from this text'
155
+ end
156
+ it 'should define a method remove_non_single_stopwords' do
157
+ lambda { @tokenizer.remove_non_single_stopwords('from this text') }.should_not raise_error
158
+
159
+ end
160
+ end
161
+ context 'with stopwords given' do
162
+ before(:each) do
163
+ @tokenizer.stopwords(/r|e/)
164
+ end
165
+ it 'should define a method remove_stopwords' do
166
+ lambda { @tokenizer.remove_stopwords('from this text') }.should_not raise_error
167
+ end
168
+ it 'should define a method stopwords that removes stopwords' do
169
+ @tokenizer.remove_stopwords('from this text').should == 'fom this txt'
170
+ end
171
+ it 'should define a method remove_non_single_stopwords' do
172
+ lambda { @tokenizer.remove_non_single_stopwords('from this text') }.should_not raise_error
173
+ end
174
+ it 'should define a method remove_non_single_stopwords that removes non-single stopwords' do
175
+ @tokenizer.remove_non_single_stopwords('rerere rerere').should == ' '
176
+ end
177
+ it 'should define a method remove_non_single_stopwords that does not single stopwords' do
178
+ @tokenizer.remove_non_single_stopwords('rerere').should == 'rerere'
179
+ end
180
+ end
181
+ context 'error case' do
182
+ before(:each) do
183
+ @tokenizer.stopwords(/any/)
184
+ end
185
+ it 'should not remove non-single stopwords with a star' do
186
+ @tokenizer.remove_non_single_stopwords('a*').should == 'a*'
187
+ end
188
+ it 'should not remove non-single stopwords with a tilde' do
189
+ @tokenizer.remove_non_single_stopwords('a~').should == 'a~'
190
+ end
166
191
  end
167
192
  end
168
193
  end
194
+
169
195
  end
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 1
7
7
  - 2
8
- - 3
9
- version: 1.2.3
8
+ - 4
9
+ version: 1.2.4
10
10
  platform: ruby
11
11
  authors:
12
12
  - Florian Hanke
@@ -14,7 +14,7 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2010-12-28 00:00:00 +01:00
17
+ date: 2010-12-30 00:00:00 +01:00
18
18
  default_executable: picky
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
@@ -73,8 +73,6 @@ files:
73
73
  - lib/picky/extensions/module.rb
74
74
  - lib/picky/extensions/object.rb
75
75
  - lib/picky/extensions/symbol.rb
76
- - lib/picky/helpers/cache.rb
77
- - lib/picky/helpers/gc.rb
78
76
  - lib/picky/helpers/measuring.rb
79
77
  - lib/picky/index/bundle.rb
80
78
  - lib/picky/index/file/basic.rb
@@ -169,8 +167,6 @@ files:
169
167
  - spec/lib/extensions/module_spec.rb
170
168
  - spec/lib/extensions/object_spec.rb
171
169
  - spec/lib/extensions/symbol_spec.rb
172
- - spec/lib/helpers/cache_spec.rb
173
- - spec/lib/helpers/gc_spec.rb
174
170
  - spec/lib/helpers/measuring_spec.rb
175
171
  - spec/lib/index/bundle_spec.rb
176
172
  - spec/lib/index/file/basic_spec.rb
@@ -191,6 +187,7 @@ files:
191
187
  - spec/lib/indexing/bundle_spec.rb
192
188
  - spec/lib/indexing/category_spec.rb
193
189
  - spec/lib/indexing/index_spec.rb
190
+ - spec/lib/indexing/indexes_spec.rb
194
191
  - spec/lib/loader_spec.rb
195
192
  - spec/lib/loggers/search_spec.rb
196
193
  - spec/lib/query/allocation_spec.rb
@@ -276,8 +273,6 @@ test_files:
276
273
  - spec/lib/extensions/module_spec.rb
277
274
  - spec/lib/extensions/object_spec.rb
278
275
  - spec/lib/extensions/symbol_spec.rb
279
- - spec/lib/helpers/cache_spec.rb
280
- - spec/lib/helpers/gc_spec.rb
281
276
  - spec/lib/helpers/measuring_spec.rb
282
277
  - spec/lib/index/bundle_spec.rb
283
278
  - spec/lib/index/file/basic_spec.rb
@@ -298,6 +293,7 @@ test_files:
298
293
  - spec/lib/indexing/bundle_spec.rb
299
294
  - spec/lib/indexing/category_spec.rb
300
295
  - spec/lib/indexing/index_spec.rb
296
+ - spec/lib/indexing/indexes_spec.rb
301
297
  - spec/lib/loader_spec.rb
302
298
  - spec/lib/loggers/search_spec.rb
303
299
  - spec/lib/query/allocation_spec.rb
@@ -1,25 +0,0 @@
1
- # TODO Not used anymore? Remove.
2
- #
3
- module Helpers # :nodoc:all
4
-
5
- module Cache
6
- # This is a simple cache.
7
- # The store needs to be able to answer to [] and []=.
8
- #
9
- def cached store, key, &block
10
- # Get cached result
11
- #
12
- results = store[key]
13
- return results if results
14
-
15
- results = lambda(&block).call
16
-
17
- # Store results
18
- #
19
- store[key] = results
20
-
21
- results
22
- end
23
- end
24
-
25
- end
@@ -1,13 +0,0 @@
1
- # TODO Not used anymore? Remove.
2
- #
3
- module Helpers
4
- module GC
5
- def gc_disabled &block
6
- ::GC.disable
7
- block.call
8
- ::GC.enable
9
- ::GC.start
10
- end
11
- alias disabled gc_disabled
12
- end
13
- end
@@ -1,35 +0,0 @@
1
- require 'spec_helper'
2
-
3
- describe Helpers::Cache do
4
- include Helpers::Cache
5
-
6
- describe "#cached" do
7
- attr_reader :store, :key
8
- before(:each) do
9
- @store = {}
10
- @key = 'some key'
11
- end
12
- describe "not yet cached" do
13
- it "should cache" do
14
- store.should_receive(:[]=).once.with(@key, 'value')
15
- cached @store, @key do
16
- 'value'
17
- end
18
- end
19
- end
20
- describe "already cached" do
21
- before(:each) do
22
- cached @store, @key do
23
- 'value'
24
- end
25
- end
26
- it "should not cache" do
27
- store.should_receive(:[]=).never
28
- cached @store, @key do
29
- 'value'
30
- end
31
- end
32
- end
33
- end
34
-
35
- end
@@ -1,71 +0,0 @@
1
- require 'spec_helper'
2
-
3
- describe Helpers::GC do
4
- include Helpers::GC
5
-
6
- before(:each) do
7
- ::GC.stub!(:disable)
8
- ::GC.stub!(:enable)
9
- ::GC.stub!(:start)
10
- end
11
-
12
- describe "block calling" do
13
- it 'should call the block' do
14
- inside_block = mock :inside
15
- inside_block.should_receive(:call).once
16
-
17
- disabled do
18
- inside_block.call
19
- end
20
- end
21
- it 'should call the block' do
22
- inside_block = mock :inside
23
- inside_block.should_receive(:call).once
24
-
25
- gc_disabled do
26
- inside_block.call
27
- end
28
- end
29
- end
30
-
31
- describe "gc calls" do
32
- after(:each) do
33
- disabled {}
34
- end
35
- it 'should disable the garbage collector' do
36
- ::GC.should_receive(:disable)
37
- end
38
- it 'should enable the garbage collector' do
39
- ::GC.should_receive(:enable)
40
- end
41
- it 'should start the garbage collector' do
42
- ::GC.should_receive(:start)
43
- end
44
- it 'should disable the gc, call the block, enable the gc and start the gc' do
45
- ::GC.should_receive(:disable).ordered
46
- ::GC.should_receive(:enable).ordered
47
- ::GC.should_receive(:start).ordered
48
- end
49
- end
50
-
51
- describe "gc calls" do
52
- after(:each) do
53
- gc_disabled {}
54
- end
55
- it 'should disable the garbage collector' do
56
- ::GC.should_receive(:disable)
57
- end
58
- it 'should enable the garbage collector' do
59
- ::GC.should_receive(:enable)
60
- end
61
- it 'should start the garbage collector' do
62
- ::GC.should_receive(:start)
63
- end
64
- it 'should disable the gc, call the block, enable the gc and start the gc' do
65
- ::GC.should_receive(:disable).ordered
66
- ::GC.should_receive(:enable).ordered
67
- ::GC.should_receive(:start).ordered
68
- end
69
- end
70
-
71
- end