textutils 0.8.1 → 0.8.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/textutils/helper/title_helper.rb +14 -3
- data/lib/textutils/version.rb +1 -1
- data/test/test_title_helper.rb +11 -1
- metadata +8 -8
@@ -45,12 +45,14 @@ module TextUtils
|
|
45
45
|
|
46
46
|
def strip_whitespaces( title )
|
47
47
|
# remove all whitespace and punctuation
|
48
|
-
title.gsub( /[ \t_
|
48
|
+
title.gsub( /[ \t_\-\.!()\[\]'"\/]/, '' )
|
49
49
|
end
|
50
50
|
|
51
51
|
def strip_special_chars( title )
|
52
52
|
# remove special chars (e.g. %°&)
|
53
|
-
|
53
|
+
# e.g. +Malta
|
54
|
+
# Minerva 8:60
|
55
|
+
title.gsub( /[%&°+:]/, '' )
|
54
56
|
end
|
55
57
|
|
56
58
|
def title_to_key( title )
|
@@ -118,24 +120,29 @@ module TextUtils
|
|
118
120
|
['ť', 't' ], # e.g. Měšťan
|
119
121
|
['ü', 'ue'],
|
120
122
|
['ú', 'u' ], # e.g. Fútbol
|
123
|
+
['ù', 'u' ], # e.g. Xyauyù (it)
|
121
124
|
['ū', 'u' ], # e.g. Sūduva
|
122
125
|
['ů', 'u' ], # e.g. Sládkův
|
123
126
|
['ı', 'u' ], # e.g. Bakı # use u?? (Baku) why-why not?
|
124
127
|
['ý', 'y' ], # e.g. Nefitrovaný
|
125
128
|
['ź', 'z' ], # e.g. Łódź
|
126
129
|
['ž', 'z' ], # e.g. Domžale, Petržalka
|
130
|
+
['ż', 'z' ], # e.g. Lomża (polish)
|
127
131
|
|
132
|
+
['Á', 'a' ], # e.g. Águila (es)
|
128
133
|
['Č', 'c' ], # e.g. České
|
129
134
|
['İ', 'i' ], # e.g. İnter
|
130
135
|
['Í', 'i' ], # e.g. ÍBV
|
131
136
|
['Ł', 'l' ], # e.g. Łódź
|
132
137
|
['Ö', 'oe' ], # e.g. Örebro
|
138
|
+
['Ø', 'o' ], # e.g. Nogne Ø Imperial Stout (no)
|
133
139
|
['Ř', 'r' ], # e.g. Řezák
|
134
140
|
['Ś', 's' ], # e.g. Śląsk
|
135
141
|
['Š', 's' ], # e.g. MŠK
|
136
142
|
['Ş', 's' ], # e.g. Şüvälan
|
137
143
|
['Ú', 'u' ], # e.g. Ústí, Újpest
|
138
|
-
['Ž', 'z' ] # e.g. Žilina
|
144
|
+
['Ž', 'z' ], # e.g. Žilina
|
145
|
+
['Ż', 'z' ] # e.g. Żywiec (polish)
|
139
146
|
]
|
140
147
|
|
141
148
|
alternatives.each do |alt|
|
@@ -187,6 +194,10 @@ module TextUtils
|
|
187
194
|
['ú', '(ú|u)'] ## e.g. Fútbol
|
188
195
|
]
|
189
196
|
|
197
|
+
### fix/todo: check for dot+space e.g. . and make dot optional
|
198
|
+
## e.g. U. de. G. or U de G or U.de.G ??
|
199
|
+
## collect some more (real-world) examples first!!!!!
|
200
|
+
|
190
201
|
alternatives.each do |alt|
|
191
202
|
title = title.gsub( alt[0], alt[1] )
|
192
203
|
end
|
data/lib/textutils/version.rb
CHANGED
data/test/test_title_helper.rb
CHANGED
@@ -24,7 +24,17 @@ class TestTitleHelper < MiniTest::Unit::TestCase
|
|
24
24
|
[ '‹Hirter› Pils', 'hirterpils' ],
|
25
25
|
[ '‹Villacher› Märzen', 'villachermaerzen' ],
|
26
26
|
[ 'Bock <Damm>', 'bockdamm' ],
|
27
|
-
[ '<Estrella> <Damm> Inedit', 'estrelladamminedit' ]
|
27
|
+
[ '<Estrella> <Damm> Inedit', 'estrelladamminedit' ],
|
28
|
+
[ 'Żubr', 'zubr' ],
|
29
|
+
[ 'Żywiec', 'zywiec' ],
|
30
|
+
[ 'Lomża Export', 'lomzaexport' ],
|
31
|
+
[ 'Nogne Ø Imperial Stout', 'nogneoimperialstout' ],
|
32
|
+
[ 'Xyauyù', 'xyauyu' ],
|
33
|
+
[ 'Águila', 'aguila' ],
|
34
|
+
[ '+Lupulus', 'lupulus' ],
|
35
|
+
[ '+Malta', 'malta' ],
|
36
|
+
[ 'Minerva 8:60', 'minerva860' ],
|
37
|
+
[ 'Hop Crisis!', 'hopcrisis' ]
|
28
38
|
]
|
29
39
|
|
30
40
|
txt_io.each do |txt|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: textutils
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.8.
|
4
|
+
version: 0.8.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2014-01-
|
12
|
+
date: 2014-01-22 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: logutils
|
16
|
-
requirement: &
|
16
|
+
requirement: &20656452 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ~>
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: '0.5'
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *20656452
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: rdoc
|
27
|
-
requirement: &
|
27
|
+
requirement: &20655864 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ~>
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: '4.0'
|
33
33
|
type: :development
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *20655864
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: hoe
|
38
|
-
requirement: &
|
38
|
+
requirement: &20655132 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ~>
|
@@ -43,7 +43,7 @@ dependencies:
|
|
43
43
|
version: '3.7'
|
44
44
|
type: :development
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *20655132
|
47
47
|
description: textutils - Text Filters, Helpers, Readers and More
|
48
48
|
email: ruby-talk@ruby-lang.org
|
49
49
|
executables: []
|