picky 4.12.2 → 4.12.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,24 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
|
3
|
+
module Picky
|
4
|
+
|
5
|
+
module CharacterSubstituters
|
6
|
+
|
7
|
+
class Polish < Base
|
8
|
+
|
9
|
+
def substitute text
|
10
|
+
trans = @chars.new(text).normalize :kd
|
11
|
+
|
12
|
+
trans.gsub! 'Ł', 'L'
|
13
|
+
trans.gsub! 'ł', 'l'
|
14
|
+
|
15
|
+
trans.unpack('U*').select { |cp|
|
16
|
+
cp < 0x0300 || cp > 0x035F
|
17
|
+
}.pack 'U*'
|
18
|
+
end
|
19
|
+
|
20
|
+
end
|
21
|
+
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
@@ -10,11 +10,7 @@ module Picky
|
|
10
10
|
# ä, ö, ü => ae, oe, ue.
|
11
11
|
# (and more, see specs)
|
12
12
|
#
|
13
|
-
class WestEuropean
|
14
|
-
|
15
|
-
def initialize
|
16
|
-
@chars = ActiveSupport::Multibyte.proxy_class
|
17
|
-
end
|
13
|
+
class WestEuropean < Base
|
18
14
|
|
19
15
|
# Substitutes occurrences of certain characters
|
20
16
|
# (like Umlauts) with ASCII representations of them.
|
@@ -45,10 +41,6 @@ module Picky
|
|
45
41
|
}.pack 'U*'
|
46
42
|
end
|
47
43
|
|
48
|
-
def to_s
|
49
|
-
self.class.name
|
50
|
-
end
|
51
|
-
|
52
44
|
end
|
53
45
|
|
54
46
|
end
|
data/lib/picky/loader.rb
CHANGED
@@ -268,7 +268,9 @@ module Picky
|
|
268
268
|
load_relative 'tokenizer/regexp_wrapper'
|
269
269
|
load_relative 'tokenizer'
|
270
270
|
# load_relative 'rack/harakiri' # Needs to be explicitly loaded/required.
|
271
|
+
load_relative 'character_substituters/base'
|
271
272
|
load_relative 'character_substituters/west_european'
|
273
|
+
load_relative 'character_substituters/polish'
|
272
274
|
load_relative 'splitters/automatic'
|
273
275
|
load_generators
|
274
276
|
load_inner_api
|
@@ -0,0 +1,74 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
#
|
3
|
+
require 'spec_helper'
|
4
|
+
|
5
|
+
describe Picky::CharacterSubstituters::Polish do
|
6
|
+
|
7
|
+
let(:substituter) { described_class.new.tap { |s| s.substitute '' } }
|
8
|
+
|
9
|
+
# A bit of metaprogramming to help with the myriads of its.
|
10
|
+
#
|
11
|
+
def self.it_should_substitute special_character, normal_character
|
12
|
+
it "should substitute #{special_character} with #{normal_character}" do
|
13
|
+
substituter.substitute(special_character).should == normal_character
|
14
|
+
end
|
15
|
+
end
|
16
|
+
def self.it_should_not_substitute special_character
|
17
|
+
it "should not substitute #{special_character}" do
|
18
|
+
substituter.substitute(special_character).should == special_character
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
# Speed spec at the top since the order of the describes made the
|
23
|
+
# speed spec trip. And not on mushrooms either.
|
24
|
+
#
|
25
|
+
describe "speed" do
|
26
|
+
it "is fast" do
|
27
|
+
substituter.substitute 'ą' # Prerun
|
28
|
+
result = performance_of { substituter.substitute('ą') }
|
29
|
+
result.should < 0.00009
|
30
|
+
end
|
31
|
+
it "is fast" do
|
32
|
+
result = performance_of { substituter.substitute('abcdefghijklmnopqrstuvwxyz1234567890') }
|
33
|
+
result.should < 0.00015
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
describe 'to_s' do
|
38
|
+
it 'outputs correctly' do
|
39
|
+
substituter.to_s.should == 'Picky::CharacterSubstituters::Polish'
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
describe "normal characters" do
|
44
|
+
it_should_not_substitute('abcdefghijklmnopqrstuvwxyz1234567890')
|
45
|
+
end
|
46
|
+
|
47
|
+
describe "situations" do
|
48
|
+
it_should_substitute 'Michał Prawda', 'Michal Prawda'
|
49
|
+
it_should_substitute 'Brzęczyszczykiewicz', 'Brzeczyszczykiewicz'
|
50
|
+
end
|
51
|
+
|
52
|
+
describe "diacritics" do
|
53
|
+
#ĄąĘęĆ棳ŃńŚśÓ󏟯ż
|
54
|
+
it_should_substitute 'ą', 'a'
|
55
|
+
it_should_substitute 'Ą', 'A'
|
56
|
+
it_should_substitute 'ę', 'e'
|
57
|
+
it_should_substitute 'Ę', 'E'
|
58
|
+
it_should_substitute 'ć', 'c'
|
59
|
+
it_should_substitute 'Ć', 'C'
|
60
|
+
it_should_substitute 'ł', 'l'
|
61
|
+
it_should_substitute 'Ł', 'L'
|
62
|
+
it_should_substitute 'ń', 'n'
|
63
|
+
it_should_substitute 'Ń', 'N'
|
64
|
+
it_should_substitute 'ś', 's'
|
65
|
+
it_should_substitute 'Ś', 'S'
|
66
|
+
it_should_substitute 'ó', 'o'
|
67
|
+
it_should_substitute 'Ó', 'O'
|
68
|
+
it_should_substitute 'ź', 'z'
|
69
|
+
it_should_substitute 'Ź', 'Z'
|
70
|
+
it_should_substitute 'ż', 'z'
|
71
|
+
it_should_substitute 'Ż', 'Z'
|
72
|
+
end
|
73
|
+
|
74
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: picky
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 4.12.
|
4
|
+
version: 4.12.3
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-12-
|
12
|
+
date: 2012-12-06 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rspec
|
@@ -34,7 +34,7 @@ dependencies:
|
|
34
34
|
requirements:
|
35
35
|
- - ~>
|
36
36
|
- !ruby/object:Gem::Version
|
37
|
-
version: 4.12.
|
37
|
+
version: 4.12.3
|
38
38
|
type: :development
|
39
39
|
prerelease: false
|
40
40
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -42,7 +42,7 @@ dependencies:
|
|
42
42
|
requirements:
|
43
43
|
- - ~>
|
44
44
|
- !ruby/object:Gem::Version
|
45
|
-
version: 4.12.
|
45
|
+
version: 4.12.3
|
46
46
|
- !ruby/object:Gem::Dependency
|
47
47
|
name: text
|
48
48
|
requirement: !ruby/object:Gem::Requirement
|
@@ -163,6 +163,8 @@ files:
|
|
163
163
|
- lib/picky/category_indexed.rb
|
164
164
|
- lib/picky/category_indexing.rb
|
165
165
|
- lib/picky/category_realtime.rb
|
166
|
+
- lib/picky/character_substituters/base.rb
|
167
|
+
- lib/picky/character_substituters/polish.rb
|
166
168
|
- lib/picky/character_substituters/west_european.rb
|
167
169
|
- lib/picky/console.rb
|
168
170
|
- lib/picky/constants.rb
|
@@ -331,6 +333,7 @@ files:
|
|
331
333
|
- spec/lib/category_indexing_spec.rb
|
332
334
|
- spec/lib/category_realtime_spec.rb
|
333
335
|
- spec/lib/category_spec.rb
|
336
|
+
- spec/lib/character_substituters/polish_spec.rb
|
334
337
|
- spec/lib/character_substituters/west_european_spec.rb
|
335
338
|
- spec/lib/extensions/array_spec.rb
|
336
339
|
- spec/lib/extensions/hash_spec.rb
|
@@ -496,6 +499,7 @@ test_files:
|
|
496
499
|
- spec/lib/category_indexing_spec.rb
|
497
500
|
- spec/lib/category_realtime_spec.rb
|
498
501
|
- spec/lib/category_spec.rb
|
502
|
+
- spec/lib/character_substituters/polish_spec.rb
|
499
503
|
- spec/lib/character_substituters/west_european_spec.rb
|
500
504
|
- spec/lib/extensions/array_spec.rb
|
501
505
|
- spec/lib/extensions/hash_spec.rb
|