picky 4.12.2 → 4.12.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,21 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ module Picky
4
+
5
+ module CharacterSubstituters
6
+
7
+ class Base
8
+
9
+ def initialize
10
+ @chars = ActiveSupport::Multibyte.proxy_class
11
+ end
12
+
13
+ def to_s
14
+ self.class.name
15
+ end
16
+
17
+ end
18
+
19
+ end
20
+
21
+ end
@@ -0,0 +1,24 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ module Picky
4
+
5
+ module CharacterSubstituters
6
+
7
+ class Polish < Base
8
+
9
+ def substitute text
10
+ trans = @chars.new(text).normalize :kd
11
+
12
+ trans.gsub! 'Ł', 'L'
13
+ trans.gsub! 'ł', 'l'
14
+
15
+ trans.unpack('U*').select { |cp|
16
+ cp < 0x0300 || cp > 0x035F
17
+ }.pack 'U*'
18
+ end
19
+
20
+ end
21
+
22
+ end
23
+
24
+ end
@@ -10,11 +10,7 @@ module Picky
10
10
  # ä, ö, ü => ae, oe, ue.
11
11
  # (and more, see specs)
12
12
  #
13
- class WestEuropean
14
-
15
- def initialize
16
- @chars = ActiveSupport::Multibyte.proxy_class
17
- end
13
+ class WestEuropean < Base
18
14
 
19
15
  # Substitutes occurrences of certain characters
20
16
  # (like Umlauts) with ASCII representations of them.
@@ -45,10 +41,6 @@ module Picky
45
41
  }.pack 'U*'
46
42
  end
47
43
 
48
- def to_s
49
- self.class.name
50
- end
51
-
52
44
  end
53
45
 
54
46
  end
data/lib/picky/loader.rb CHANGED
@@ -268,7 +268,9 @@ module Picky
268
268
  load_relative 'tokenizer/regexp_wrapper'
269
269
  load_relative 'tokenizer'
270
270
  # load_relative 'rack/harakiri' # Needs to be explicitly loaded/required.
271
+ load_relative 'character_substituters/base'
271
272
  load_relative 'character_substituters/west_european'
273
+ load_relative 'character_substituters/polish'
272
274
  load_relative 'splitters/automatic'
273
275
  load_generators
274
276
  load_inner_api
@@ -0,0 +1,74 @@
1
+ # encoding: utf-8
2
+ #
3
+ require 'spec_helper'
4
+
5
+ describe Picky::CharacterSubstituters::Polish do
6
+
7
+ let(:substituter) { described_class.new.tap { |s| s.substitute '' } }
8
+
9
+ # A bit of metaprogramming to help with the myriads of its.
10
+ #
11
+ def self.it_should_substitute special_character, normal_character
12
+ it "should substitute #{special_character} with #{normal_character}" do
13
+ substituter.substitute(special_character).should == normal_character
14
+ end
15
+ end
16
+ def self.it_should_not_substitute special_character
17
+ it "should not substitute #{special_character}" do
18
+ substituter.substitute(special_character).should == special_character
19
+ end
20
+ end
21
+
22
+ # Speed spec at the top since the order of the describes made the
23
+ # speed spec trip. And not on mushrooms either.
24
+ #
25
+ describe "speed" do
26
+ it "is fast" do
27
+ substituter.substitute 'ą' # Prerun
28
+ result = performance_of { substituter.substitute('ą') }
29
+ result.should < 0.00009
30
+ end
31
+ it "is fast" do
32
+ result = performance_of { substituter.substitute('abcdefghijklmnopqrstuvwxyz1234567890') }
33
+ result.should < 0.00015
34
+ end
35
+ end
36
+
37
+ describe 'to_s' do
38
+ it 'outputs correctly' do
39
+ substituter.to_s.should == 'Picky::CharacterSubstituters::Polish'
40
+ end
41
+ end
42
+
43
+ describe "normal characters" do
44
+ it_should_not_substitute('abcdefghijklmnopqrstuvwxyz1234567890')
45
+ end
46
+
47
+ describe "situations" do
48
+ it_should_substitute 'Michał Prawda', 'Michal Prawda'
49
+ it_should_substitute 'Brzęczyszczykiewicz', 'Brzeczyszczykiewicz'
50
+ end
51
+
52
+ describe "diacritics" do
53
+ #ĄąĘęĆ棳ŃńŚśÓ󏟯ż
54
+ it_should_substitute 'ą', 'a'
55
+ it_should_substitute 'Ą', 'A'
56
+ it_should_substitute 'ę', 'e'
57
+ it_should_substitute 'Ę', 'E'
58
+ it_should_substitute 'ć', 'c'
59
+ it_should_substitute 'Ć', 'C'
60
+ it_should_substitute 'ł', 'l'
61
+ it_should_substitute 'Ł', 'L'
62
+ it_should_substitute 'ń', 'n'
63
+ it_should_substitute 'Ń', 'N'
64
+ it_should_substitute 'ś', 's'
65
+ it_should_substitute 'Ś', 'S'
66
+ it_should_substitute 'ó', 'o'
67
+ it_should_substitute 'Ó', 'O'
68
+ it_should_substitute 'ź', 'z'
69
+ it_should_substitute 'Ź', 'Z'
70
+ it_should_substitute 'ż', 'z'
71
+ it_should_substitute 'Ż', 'Z'
72
+ end
73
+
74
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: picky
3
3
  version: !ruby/object:Gem::Version
4
- version: 4.12.2
4
+ version: 4.12.3
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-12-03 00:00:00.000000000 Z
12
+ date: 2012-12-06 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rspec
@@ -34,7 +34,7 @@ dependencies:
34
34
  requirements:
35
35
  - - ~>
36
36
  - !ruby/object:Gem::Version
37
- version: 4.12.2
37
+ version: 4.12.3
38
38
  type: :development
39
39
  prerelease: false
40
40
  version_requirements: !ruby/object:Gem::Requirement
@@ -42,7 +42,7 @@ dependencies:
42
42
  requirements:
43
43
  - - ~>
44
44
  - !ruby/object:Gem::Version
45
- version: 4.12.2
45
+ version: 4.12.3
46
46
  - !ruby/object:Gem::Dependency
47
47
  name: text
48
48
  requirement: !ruby/object:Gem::Requirement
@@ -163,6 +163,8 @@ files:
163
163
  - lib/picky/category_indexed.rb
164
164
  - lib/picky/category_indexing.rb
165
165
  - lib/picky/category_realtime.rb
166
+ - lib/picky/character_substituters/base.rb
167
+ - lib/picky/character_substituters/polish.rb
166
168
  - lib/picky/character_substituters/west_european.rb
167
169
  - lib/picky/console.rb
168
170
  - lib/picky/constants.rb
@@ -331,6 +333,7 @@ files:
331
333
  - spec/lib/category_indexing_spec.rb
332
334
  - spec/lib/category_realtime_spec.rb
333
335
  - spec/lib/category_spec.rb
336
+ - spec/lib/character_substituters/polish_spec.rb
334
337
  - spec/lib/character_substituters/west_european_spec.rb
335
338
  - spec/lib/extensions/array_spec.rb
336
339
  - spec/lib/extensions/hash_spec.rb
@@ -496,6 +499,7 @@ test_files:
496
499
  - spec/lib/category_indexing_spec.rb
497
500
  - spec/lib/category_realtime_spec.rb
498
501
  - spec/lib/category_spec.rb
502
+ - spec/lib/character_substituters/polish_spec.rb
499
503
  - spec/lib/character_substituters/west_european_spec.rb
500
504
  - spec/lib/extensions/array_spec.rb
501
505
  - spec/lib/extensions/hash_spec.rb