picky 4.12.2 → 4.12.3
Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,24 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
|
3
|
+
module Picky
|
4
|
+
|
5
|
+
module CharacterSubstituters
|
6
|
+
|
7
|
+
class Polish < Base
|
8
|
+
|
9
|
+
def substitute text
|
10
|
+
trans = @chars.new(text).normalize :kd
|
11
|
+
|
12
|
+
trans.gsub! 'Ł', 'L'
|
13
|
+
trans.gsub! 'ł', 'l'
|
14
|
+
|
15
|
+
trans.unpack('U*').select { |cp|
|
16
|
+
cp < 0x0300 || cp > 0x035F
|
17
|
+
}.pack 'U*'
|
18
|
+
end
|
19
|
+
|
20
|
+
end
|
21
|
+
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
@@ -10,11 +10,7 @@ module Picky
|
|
10
10
|
# ä, ö, ü => ae, oe, ue.
|
11
11
|
# (and more, see specs)
|
12
12
|
#
|
13
|
-
class WestEuropean
|
14
|
-
|
15
|
-
def initialize
|
16
|
-
@chars = ActiveSupport::Multibyte.proxy_class
|
17
|
-
end
|
13
|
+
class WestEuropean < Base
|
18
14
|
|
19
15
|
# Substitutes occurrences of certain characters
|
20
16
|
# (like Umlauts) with ASCII representations of them.
|
@@ -45,10 +41,6 @@ module Picky
|
|
45
41
|
}.pack 'U*'
|
46
42
|
end
|
47
43
|
|
48
|
-
def to_s
|
49
|
-
self.class.name
|
50
|
-
end
|
51
|
-
|
52
44
|
end
|
53
45
|
|
54
46
|
end
|
data/lib/picky/loader.rb
CHANGED
@@ -268,7 +268,9 @@ module Picky
|
|
268
268
|
load_relative 'tokenizer/regexp_wrapper'
|
269
269
|
load_relative 'tokenizer'
|
270
270
|
# load_relative 'rack/harakiri' # Needs to be explicitly loaded/required.
|
271
|
+
load_relative 'character_substituters/base'
|
271
272
|
load_relative 'character_substituters/west_european'
|
273
|
+
load_relative 'character_substituters/polish'
|
272
274
|
load_relative 'splitters/automatic'
|
273
275
|
load_generators
|
274
276
|
load_inner_api
|
@@ -0,0 +1,74 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
#
|
3
|
+
require 'spec_helper'
|
4
|
+
|
5
|
+
describe Picky::CharacterSubstituters::Polish do
|
6
|
+
|
7
|
+
let(:substituter) { described_class.new.tap { |s| s.substitute '' } }
|
8
|
+
|
9
|
+
# A bit of metaprogramming to help with the myriads of its.
|
10
|
+
#
|
11
|
+
def self.it_should_substitute special_character, normal_character
|
12
|
+
it "should substitute #{special_character} with #{normal_character}" do
|
13
|
+
substituter.substitute(special_character).should == normal_character
|
14
|
+
end
|
15
|
+
end
|
16
|
+
def self.it_should_not_substitute special_character
|
17
|
+
it "should not substitute #{special_character}" do
|
18
|
+
substituter.substitute(special_character).should == special_character
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
# Speed spec at the top since the order of the describes made the
|
23
|
+
# speed spec trip. And not on mushrooms either.
|
24
|
+
#
|
25
|
+
describe "speed" do
|
26
|
+
it "is fast" do
|
27
|
+
substituter.substitute 'ą' # Prerun
|
28
|
+
result = performance_of { substituter.substitute('ą') }
|
29
|
+
result.should < 0.00009
|
30
|
+
end
|
31
|
+
it "is fast" do
|
32
|
+
result = performance_of { substituter.substitute('abcdefghijklmnopqrstuvwxyz1234567890') }
|
33
|
+
result.should < 0.00015
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
describe 'to_s' do
|
38
|
+
it 'outputs correctly' do
|
39
|
+
substituter.to_s.should == 'Picky::CharacterSubstituters::Polish'
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
describe "normal characters" do
|
44
|
+
it_should_not_substitute('abcdefghijklmnopqrstuvwxyz1234567890')
|
45
|
+
end
|
46
|
+
|
47
|
+
describe "situations" do
|
48
|
+
it_should_substitute 'Michał Prawda', 'Michal Prawda'
|
49
|
+
it_should_substitute 'Brzęczyszczykiewicz', 'Brzeczyszczykiewicz'
|
50
|
+
end
|
51
|
+
|
52
|
+
describe "diacritics" do
|
53
|
+
#ĄąĘęĆ棳ŃńŚśÓ󏟯ż
|
54
|
+
it_should_substitute 'ą', 'a'
|
55
|
+
it_should_substitute 'Ą', 'A'
|
56
|
+
it_should_substitute 'ę', 'e'
|
57
|
+
it_should_substitute 'Ę', 'E'
|
58
|
+
it_should_substitute 'ć', 'c'
|
59
|
+
it_should_substitute 'Ć', 'C'
|
60
|
+
it_should_substitute 'ł', 'l'
|
61
|
+
it_should_substitute 'Ł', 'L'
|
62
|
+
it_should_substitute 'ń', 'n'
|
63
|
+
it_should_substitute 'Ń', 'N'
|
64
|
+
it_should_substitute 'ś', 's'
|
65
|
+
it_should_substitute 'Ś', 'S'
|
66
|
+
it_should_substitute 'ó', 'o'
|
67
|
+
it_should_substitute 'Ó', 'O'
|
68
|
+
it_should_substitute 'ź', 'z'
|
69
|
+
it_should_substitute 'Ź', 'Z'
|
70
|
+
it_should_substitute 'ż', 'z'
|
71
|
+
it_should_substitute 'Ż', 'Z'
|
72
|
+
end
|
73
|
+
|
74
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: picky
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 4.12.
|
4
|
+
version: 4.12.3
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-12-
|
12
|
+
date: 2012-12-06 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rspec
|
@@ -34,7 +34,7 @@ dependencies:
|
|
34
34
|
requirements:
|
35
35
|
- - ~>
|
36
36
|
- !ruby/object:Gem::Version
|
37
|
-
version: 4.12.
|
37
|
+
version: 4.12.3
|
38
38
|
type: :development
|
39
39
|
prerelease: false
|
40
40
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -42,7 +42,7 @@ dependencies:
|
|
42
42
|
requirements:
|
43
43
|
- - ~>
|
44
44
|
- !ruby/object:Gem::Version
|
45
|
-
version: 4.12.
|
45
|
+
version: 4.12.3
|
46
46
|
- !ruby/object:Gem::Dependency
|
47
47
|
name: text
|
48
48
|
requirement: !ruby/object:Gem::Requirement
|
@@ -163,6 +163,8 @@ files:
|
|
163
163
|
- lib/picky/category_indexed.rb
|
164
164
|
- lib/picky/category_indexing.rb
|
165
165
|
- lib/picky/category_realtime.rb
|
166
|
+
- lib/picky/character_substituters/base.rb
|
167
|
+
- lib/picky/character_substituters/polish.rb
|
166
168
|
- lib/picky/character_substituters/west_european.rb
|
167
169
|
- lib/picky/console.rb
|
168
170
|
- lib/picky/constants.rb
|
@@ -331,6 +333,7 @@ files:
|
|
331
333
|
- spec/lib/category_indexing_spec.rb
|
332
334
|
- spec/lib/category_realtime_spec.rb
|
333
335
|
- spec/lib/category_spec.rb
|
336
|
+
- spec/lib/character_substituters/polish_spec.rb
|
334
337
|
- spec/lib/character_substituters/west_european_spec.rb
|
335
338
|
- spec/lib/extensions/array_spec.rb
|
336
339
|
- spec/lib/extensions/hash_spec.rb
|
@@ -496,6 +499,7 @@ test_files:
|
|
496
499
|
- spec/lib/category_indexing_spec.rb
|
497
500
|
- spec/lib/category_realtime_spec.rb
|
498
501
|
- spec/lib/category_spec.rb
|
502
|
+
- spec/lib/character_substituters/polish_spec.rb
|
499
503
|
- spec/lib/character_substituters/west_european_spec.rb
|
500
504
|
- spec/lib/extensions/array_spec.rb
|
501
505
|
- spec/lib/extensions/hash_spec.rb
|