picky 4.12.2 → 4.12.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ module Picky
4
+
5
+ module CharacterSubstituters
6
+
7
+ class Base
8
+
9
+ def initialize
10
+ @chars = ActiveSupport::Multibyte.proxy_class
11
+ end
12
+
13
+ def to_s
14
+ self.class.name
15
+ end
16
+
17
+ end
18
+
19
+ end
20
+
21
+ end
@@ -0,0 +1,24 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ module Picky
4
+
5
+ module CharacterSubstituters
6
+
7
+ class Polish < Base
8
+
9
+ def substitute text
10
+ trans = @chars.new(text).normalize :kd
11
+
12
+ trans.gsub! 'Ł', 'L'
13
+ trans.gsub! 'ł', 'l'
14
+
15
+ trans.unpack('U*').select { |cp|
16
+ cp < 0x0300 || cp > 0x035F
17
+ }.pack 'U*'
18
+ end
19
+
20
+ end
21
+
22
+ end
23
+
24
+ end
@@ -10,11 +10,7 @@ module Picky
10
10
  # ä, ö, ü => ae, oe, ue.
11
11
  # (and more, see specs)
12
12
  #
13
- class WestEuropean
14
-
15
- def initialize
16
- @chars = ActiveSupport::Multibyte.proxy_class
17
- end
13
+ class WestEuropean < Base
18
14
 
19
15
  # Substitutes occurrences of certain characters
20
16
  # (like Umlauts) with ASCII representations of them.
@@ -45,10 +41,6 @@ module Picky
45
41
  }.pack 'U*'
46
42
  end
47
43
 
48
- def to_s
49
- self.class.name
50
- end
51
-
52
44
  end
53
45
 
54
46
  end
data/lib/picky/loader.rb CHANGED
@@ -268,7 +268,9 @@ module Picky
268
268
  load_relative 'tokenizer/regexp_wrapper'
269
269
  load_relative 'tokenizer'
270
270
  # load_relative 'rack/harakiri' # Needs to be explicitly loaded/required.
271
+ load_relative 'character_substituters/base'
271
272
  load_relative 'character_substituters/west_european'
273
+ load_relative 'character_substituters/polish'
272
274
  load_relative 'splitters/automatic'
273
275
  load_generators
274
276
  load_inner_api
@@ -0,0 +1,74 @@
1
+ # encoding: utf-8
2
+ #
3
+ require 'spec_helper'
4
+
5
+ describe Picky::CharacterSubstituters::Polish do
6
+
7
+ let(:substituter) { described_class.new.tap { |s| s.substitute '' } }
8
+
9
+ # A bit of metaprogramming to help with the myriads of its.
10
+ #
11
+ def self.it_should_substitute special_character, normal_character
12
+ it "should substitute #{special_character} with #{normal_character}" do
13
+ substituter.substitute(special_character).should == normal_character
14
+ end
15
+ end
16
+ def self.it_should_not_substitute special_character
17
+ it "should not substitute #{special_character}" do
18
+ substituter.substitute(special_character).should == special_character
19
+ end
20
+ end
21
+
22
+ # Speed spec at the top since the order of the describes made the
23
+ # speed spec trip. And not on mushrooms either.
24
+ #
25
+ describe "speed" do
26
+ it "is fast" do
27
+ substituter.substitute 'ą' # Prerun
28
+ result = performance_of { substituter.substitute('ą') }
29
+ result.should < 0.00009
30
+ end
31
+ it "is fast" do
32
+ result = performance_of { substituter.substitute('abcdefghijklmnopqrstuvwxyz1234567890') }
33
+ result.should < 0.00015
34
+ end
35
+ end
36
+
37
+ describe 'to_s' do
38
+ it 'outputs correctly' do
39
+ substituter.to_s.should == 'Picky::CharacterSubstituters::Polish'
40
+ end
41
+ end
42
+
43
+ describe "normal characters" do
44
+ it_should_not_substitute('abcdefghijklmnopqrstuvwxyz1234567890')
45
+ end
46
+
47
+ describe "situations" do
48
+ it_should_substitute 'Michał Prawda', 'Michal Prawda'
49
+ it_should_substitute 'Brzęczyszczykiewicz', 'Brzeczyszczykiewicz'
50
+ end
51
+
52
+ describe "diacritics" do
53
+ #ĄąĘęĆ棳ŃńŚśÓ󏟯ż
54
+ it_should_substitute 'ą', 'a'
55
+ it_should_substitute 'Ą', 'A'
56
+ it_should_substitute 'ę', 'e'
57
+ it_should_substitute 'Ę', 'E'
58
+ it_should_substitute 'ć', 'c'
59
+ it_should_substitute 'Ć', 'C'
60
+ it_should_substitute 'ł', 'l'
61
+ it_should_substitute 'Ł', 'L'
62
+ it_should_substitute 'ń', 'n'
63
+ it_should_substitute 'Ń', 'N'
64
+ it_should_substitute 'ś', 's'
65
+ it_should_substitute 'Ś', 'S'
66
+ it_should_substitute 'ó', 'o'
67
+ it_should_substitute 'Ó', 'O'
68
+ it_should_substitute 'ź', 'z'
69
+ it_should_substitute 'Ź', 'Z'
70
+ it_should_substitute 'ż', 'z'
71
+ it_should_substitute 'Ż', 'Z'
72
+ end
73
+
74
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: picky
3
3
  version: !ruby/object:Gem::Version
4
- version: 4.12.2
4
+ version: 4.12.3
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-12-03 00:00:00.000000000 Z
12
+ date: 2012-12-06 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rspec
@@ -34,7 +34,7 @@ dependencies:
34
34
  requirements:
35
35
  - - ~>
36
36
  - !ruby/object:Gem::Version
37
- version: 4.12.2
37
+ version: 4.12.3
38
38
  type: :development
39
39
  prerelease: false
40
40
  version_requirements: !ruby/object:Gem::Requirement
@@ -42,7 +42,7 @@ dependencies:
42
42
  requirements:
43
43
  - - ~>
44
44
  - !ruby/object:Gem::Version
45
- version: 4.12.2
45
+ version: 4.12.3
46
46
  - !ruby/object:Gem::Dependency
47
47
  name: text
48
48
  requirement: !ruby/object:Gem::Requirement
@@ -163,6 +163,8 @@ files:
163
163
  - lib/picky/category_indexed.rb
164
164
  - lib/picky/category_indexing.rb
165
165
  - lib/picky/category_realtime.rb
166
+ - lib/picky/character_substituters/base.rb
167
+ - lib/picky/character_substituters/polish.rb
166
168
  - lib/picky/character_substituters/west_european.rb
167
169
  - lib/picky/console.rb
168
170
  - lib/picky/constants.rb
@@ -331,6 +333,7 @@ files:
331
333
  - spec/lib/category_indexing_spec.rb
332
334
  - spec/lib/category_realtime_spec.rb
333
335
  - spec/lib/category_spec.rb
336
+ - spec/lib/character_substituters/polish_spec.rb
334
337
  - spec/lib/character_substituters/west_european_spec.rb
335
338
  - spec/lib/extensions/array_spec.rb
336
339
  - spec/lib/extensions/hash_spec.rb
@@ -496,6 +499,7 @@ test_files:
496
499
  - spec/lib/category_indexing_spec.rb
497
500
  - spec/lib/category_realtime_spec.rb
498
501
  - spec/lib/category_spec.rb
502
+ - spec/lib/character_substituters/polish_spec.rb
499
503
  - spec/lib/character_substituters/west_european_spec.rb
500
504
  - spec/lib/extensions/array_spec.rb
501
505
  - spec/lib/extensions/hash_spec.rb