textutils 0.8.6 → 0.8.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Manifest.txt +3 -0
- data/lib/textutils/helper/title_helper.rb +21 -1
- data/lib/textutils/helper/value_helper.rb +55 -8
- data/lib/textutils/version.rb +1 -1
- data/test/helper.rb +3 -0
- data/test/test_taglist.rb +32 -0
- data/test/test_title_finder.rb +25 -0
- data/test/test_title_mapper.rb +50 -0
- metadata +20 -16
data/Manifest.txt
CHANGED
@@ -30,6 +30,9 @@ lib/textutils/version.rb
|
|
30
30
|
test/helper.rb
|
31
31
|
test/test_address_helper.rb
|
32
32
|
test/test_hypertext_helper.rb
|
33
|
+
test/test_taglist.rb
|
34
|
+
test/test_title_finder.rb
|
33
35
|
test/test_title_helper.rb
|
36
|
+
test/test_title_mapper.rb
|
34
37
|
test/test_unicode_helper.rb
|
35
38
|
test/test_values_reader.rb
|
@@ -157,15 +157,35 @@ module TextUtils
|
|
157
157
|
|
158
158
|
def title_esc_regex( title_unescaped )
|
159
159
|
|
160
|
-
## escape regex special chars e.g.
|
160
|
+
## escape regex special chars e.g.
|
161
|
+
# . to \. and
|
162
|
+
# ( to \(
|
163
|
+
# ) to \)
|
164
|
+
# ? to \? -- zero or one
|
165
|
+
# * to \* -- zero or more
|
166
|
+
# + to \+ -- one or more
|
167
|
+
# $ to \$ -- end of line
|
168
|
+
# ^ to \^ -- start of line etc.
|
169
|
+
|
170
|
+
### add { and } ???
|
171
|
+
### add [ and ] ???
|
172
|
+
### add \ too ???
|
173
|
+
### add | too ???
|
174
|
+
|
161
175
|
# e.g. Benfica Lis.
|
162
176
|
# e.g. Club Atlético Colón (Santa Fe)
|
177
|
+
# e.g. Bauer Anton (????)
|
163
178
|
|
164
179
|
## NB: cannot use Regexp.escape! will escape space '' to '\ '
|
165
180
|
## title = Regexp.escape( title_unescaped )
|
166
181
|
title = title_unescaped.gsub( '.', '\.' )
|
167
182
|
title = title.gsub( '(', '\(' )
|
168
183
|
title = title.gsub( ')', '\)' )
|
184
|
+
title = title.gsub( '?', '\?' )
|
185
|
+
title = title.gsub( '*', '\*' )
|
186
|
+
title = title.gsub( '+', '\+' )
|
187
|
+
title = title.gsub( '$', '\$' )
|
188
|
+
title = title.gsub( '^', '\^' )
|
169
189
|
|
170
190
|
## match accented char with or without accents
|
171
191
|
## add (ü|ue) etc.
|
@@ -45,7 +45,12 @@ module TextUtils
|
|
45
45
|
|
46
46
|
def is_region?( value )
|
47
47
|
# assume region code e.g. TX or N
|
48
|
-
|
48
|
+
#
|
49
|
+
# fix: allow three letter regions too e.g. BRU (brussels)
|
50
|
+
match_result = value =~ /^[A-Z]{1,2}$/
|
51
|
+
# match found if 0,1,2,3 etc or no match if nil
|
52
|
+
# note: return bool e.g. false|true (not 0,1,2,3 etc. and nil)
|
53
|
+
match_result != nil
|
49
54
|
end
|
50
55
|
|
51
56
|
## fix/todo: use match_region_for_country! w/ !!! why? why not?
|
@@ -88,6 +93,8 @@ module TextUtils
|
|
88
93
|
end
|
89
94
|
end
|
90
95
|
|
96
|
+
######
|
97
|
+
## fix: move to worlddb?? why why not??
|
91
98
|
def match_metro_flag( value )
|
92
99
|
if value =~ /^metro$/ # metro(politan area)
|
93
100
|
yield( true )
|
@@ -97,6 +104,8 @@ module TextUtils
|
|
97
104
|
end
|
98
105
|
end
|
99
106
|
|
107
|
+
######
|
108
|
+
## fix: move to worlddb?? why why not??
|
100
109
|
def match_metro_pop( value )
|
101
110
|
if value =~ /^m:/ # m:
|
102
111
|
num = value[2..-1].gsub(/[ _]/, '').to_i # cut off m: prefix; allow space and _ in number
|
@@ -109,7 +118,9 @@ module TextUtils
|
|
109
118
|
|
110
119
|
|
111
120
|
|
112
|
-
|
121
|
+
#####
|
122
|
+
## fix: move to beerdb ??? why? why not??
|
123
|
+
|
113
124
|
def match_brewery( value )
|
114
125
|
if value =~ /^by:/ ## by: -brewed by/brewery
|
115
126
|
brewery_key = value[3..-1] ## cut off by: prefix
|
@@ -124,9 +135,13 @@ module TextUtils
|
|
124
135
|
|
125
136
|
def is_year?( value )
|
126
137
|
# founded/established year e.g. 1776
|
127
|
-
value =~ /^[0-9]{4}$/
|
138
|
+
match_result = value =~ /^[0-9]{4}$/
|
139
|
+
# match found if 0,1,2,3 etc or no match if nil
|
140
|
+
# note: return bool e.g. false|true (not 0,1,2,3 etc. and nil)
|
141
|
+
match_result != nil
|
128
142
|
end
|
129
143
|
|
144
|
+
|
130
145
|
def match_year( value )
|
131
146
|
if is_year?( value ) # founded/established year e.g. 1776
|
132
147
|
yield( value.to_i )
|
@@ -206,7 +221,10 @@ module TextUtils
|
|
206
221
|
# - must end w/ .com
|
207
222
|
#
|
208
223
|
# fix: support more url format (e.g. w/o www. - look for .com .country code etc.)
|
209
|
-
value =~ /^www\.|\.com$/
|
224
|
+
match_result = value =~ /^www\.|\.com$/
|
225
|
+
# match found if 0,1,2,3 etc or no match if nil
|
226
|
+
# note: return bool e.g. false|true (not 0,1,2,3 etc. and nil)
|
227
|
+
match_result != nil
|
210
228
|
end
|
211
229
|
|
212
230
|
def match_website( value )
|
@@ -223,18 +241,45 @@ module TextUtils
|
|
223
241
|
|
224
242
|
def is_address?( value )
|
225
243
|
# if value includes // assume address e.g. 3970 Weitra // Sparkasseplatz 160
|
226
|
-
value =~ /\/{2}/
|
244
|
+
match_result = value =~ /\/{2}/
|
245
|
+
# match found if 0,1,2,3 etc or no match if nil
|
246
|
+
# note: return bool e.g. false|true (not 0,1,2,3 etc. and nil)
|
247
|
+
match_result != nil
|
227
248
|
end
|
228
249
|
|
229
250
|
def is_taglist?( value )
|
230
|
-
|
251
|
+
### note: cannot start w/ number must be letter for now
|
252
|
+
## -- in the future allow free standing years (e.g. 1980 etc.?? why? why not?)
|
253
|
+
## e.g. not allowed 14 ha or 5_000 hl etc.
|
254
|
+
match_result = value =~ /^([a-z][a-z0-9\|_ ]*[a-z0-9]|[a-z])$/
|
255
|
+
# match found if 0,1,2,3 etc or no match if nil
|
256
|
+
# note: return bool e.g. false|true (not 0,1,2,3 etc. and nil)
|
257
|
+
match_result != nil
|
231
258
|
end
|
232
259
|
|
233
260
|
|
234
261
|
def find_grade( value ) # NB: returns ary [grade,value] / two values
|
235
262
|
grade = 4 # defaults to grade 4 e.g *** => 1, ** => 2, * => 3, -/- => 4
|
236
263
|
|
237
|
-
|
264
|
+
# NB: stars must end field/value or start field/value
|
265
|
+
# e.g.
|
266
|
+
# *** Anton Bauer or
|
267
|
+
# Anton Bauer ***
|
268
|
+
|
269
|
+
value = value.sub( /^\s*(\*{1,3})\s+/ ) do |_|
|
270
|
+
if $1 == '***'
|
271
|
+
grade = 1
|
272
|
+
elsif $1 == '**'
|
273
|
+
grade = 2
|
274
|
+
elsif $1 == '*'
|
275
|
+
grade = 3
|
276
|
+
else
|
277
|
+
# unknown grade; not possible, is'it?
|
278
|
+
end
|
279
|
+
'' # remove * from title if found
|
280
|
+
end
|
281
|
+
|
282
|
+
value = value.sub( /\s+(\*{1,3})\s*$/ ) do |_|
|
238
283
|
if $1 == '***'
|
239
284
|
grade = 1
|
240
285
|
elsif $1 == '**'
|
@@ -267,7 +312,9 @@ module TextUtils
|
|
267
312
|
|
268
313
|
# fix/todo: add support for leading underscore _
|
269
314
|
# or allow keys starting w/ digits?
|
270
|
-
|
315
|
+
|
316
|
+
# NB: key must start w/ a-z letter (NB: minimum one letter possible)
|
317
|
+
if values[0] =~ /^([a-z][a-z0-9.]*[a-z0-9]|[a-z])$/
|
271
318
|
key_col = values[0]
|
272
319
|
title_col = values[1]
|
273
320
|
more_values = values[2..-1]
|
data/lib/textutils/version.rb
CHANGED
data/test/helper.rb
CHANGED
@@ -8,6 +8,9 @@ require 'minitest/autorun'
|
|
8
8
|
|
9
9
|
# include MiniTest::Unit # lets us use TestCase instead of MiniTest::Unit::TestCase
|
10
10
|
|
11
|
+
## make sure activesupport gets included/required
|
12
|
+
# note: just activesupport or active_support will NOT work
|
13
|
+
require 'active_support/all'
|
11
14
|
|
12
15
|
## our own code
|
13
16
|
|
@@ -0,0 +1,32 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
|
4
|
+
require 'helper'
|
5
|
+
|
6
|
+
|
7
|
+
class TestTaglist < MiniTest::Unit::TestCase
|
8
|
+
|
9
|
+
include TextUtils::ValueHelper # lets us use is_taglist?, etc.
|
10
|
+
|
11
|
+
def test_taglist_starting_w_digit
|
12
|
+
## for now - taglist cannot start w/ number
|
13
|
+
assert is_taglist?( '20 ha' ) == false
|
14
|
+
assert is_taglist?( '5000 hl' ) == false
|
15
|
+
assert is_taglist?( '5_000 hl' ) == false
|
16
|
+
end
|
17
|
+
|
18
|
+
def test_taglist_upcase
|
19
|
+
## taglist cannot use upcase letters
|
20
|
+
assert is_taglist?( 'ABC' ) == false
|
21
|
+
end
|
22
|
+
|
23
|
+
def test_taglist
|
24
|
+
assert is_taglist?( 'a' )
|
25
|
+
assert is_taglist?( 'a|b|c' )
|
26
|
+
assert is_taglist?( 'a b c' )
|
27
|
+
assert is_taglist?( 'a_b_c' )
|
28
|
+
end
|
29
|
+
|
30
|
+
|
31
|
+
end # class TestTaglist
|
32
|
+
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
|
4
|
+
require 'helper'
|
5
|
+
|
6
|
+
|
7
|
+
class TestTitleFinder < MiniTest::Unit::TestCase
|
8
|
+
|
9
|
+
include TextUtils::ValueHelper # lets us use find_grade, etc.
|
10
|
+
|
11
|
+
def test_grade
|
12
|
+
|
13
|
+
assert_equal [1,'Anton Bauer'], find_grade( '*** Anton Bauer' )
|
14
|
+
assert_equal [2,'Anton Bauer'], find_grade( '** Anton Bauer' )
|
15
|
+
assert_equal [3,'Anton Bauer'], find_grade( '* Anton Bauer' )
|
16
|
+
assert_equal [4,'Anton Bauer'], find_grade( 'Anton Bauer' )
|
17
|
+
|
18
|
+
assert_equal [1,'Anton Bauer'], find_grade( 'Anton Bauer ***' )
|
19
|
+
|
20
|
+
end
|
21
|
+
|
22
|
+
|
23
|
+
|
24
|
+
end # class TestTitleFinder
|
25
|
+
|
@@ -0,0 +1,50 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
|
4
|
+
require 'helper'
|
5
|
+
|
6
|
+
|
7
|
+
class TestTitleMapper < MiniTest::Unit::TestCase
|
8
|
+
|
9
|
+
WineryStruct = Struct.new(:key, :title, :synonyms)
|
10
|
+
|
11
|
+
def test_title_table
|
12
|
+
|
13
|
+
### todo/fix: auto-add year and remove (1971) or (????) etc. from title!!!!
|
14
|
+
|
15
|
+
titles_in = [
|
16
|
+
WineryStruct.new( 'antonbauer', 'Anton Bauer (1971)' ),
|
17
|
+
WineryStruct.new( 'josefbauer', 'Weingut Josef Bauer', 'Joe Bauer|Josef Bauer (????)' ),
|
18
|
+
WineryStruct.new( 'bernhardott', 'Weingut Ott', 'Weingut Bernhard Ott|Bernhard Ott (1972)' ),
|
19
|
+
WineryStruct.new( 'andreaspolsterer', 'Weingut Andreas B. Polsterer', 'Andreas B. Polsterer (1970)' )
|
20
|
+
]
|
21
|
+
|
22
|
+
## note: for regex the following must get escaped
|
23
|
+
# ( => \(
|
24
|
+
# ) => \)
|
25
|
+
# . => \.
|
26
|
+
# ? => \?
|
27
|
+
|
28
|
+
titles_out2 = [
|
29
|
+
['antonbauer', [ 'Anton Bauer \(1971\)', 'Anton Bauer']],
|
30
|
+
['josefbauer', [ 'Weingut Josef Bauer', 'Josef Bauer \(\?\?\?\?\)', 'Josef Bauer', 'Joe Bauer' ]],
|
31
|
+
['bernhardott', [ 'Weingut Bernhard Ott', 'Bernhard Ott \(1972\)', 'Bernhard Ott', 'Weingut Ott' ]],
|
32
|
+
['andreaspolsterer', [ 'Weingut Andreas B\. Polsterer', 'Andreas B\. Polsterer \(1970\)', 'Andreas B\. Polsterer' ]]
|
33
|
+
]
|
34
|
+
|
35
|
+
titles_out = TextUtils.build_title_table_for( titles_in )
|
36
|
+
|
37
|
+
puts 'titles_out:'
|
38
|
+
pp titles_out
|
39
|
+
puts titles_out.to_s
|
40
|
+
|
41
|
+
puts 'titles_out2:'
|
42
|
+
pp titles_out2
|
43
|
+
puts titles_out.to_s
|
44
|
+
|
45
|
+
assert_equal titles_out2.to_s, titles_out.to_s
|
46
|
+
|
47
|
+
end # method test_title_table
|
48
|
+
|
49
|
+
|
50
|
+
end # class TestTitleMapper
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: textutils
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.8.
|
4
|
+
version: 0.8.7
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2014-02
|
12
|
+
date: 2014-03-02 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: logutils
|
16
|
-
requirement: &
|
16
|
+
requirement: &78669010 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ~>
|
@@ -21,37 +21,35 @@ dependencies:
|
|
21
21
|
version: '0.5'
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *78669010
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: rdoc
|
27
|
-
requirement: &
|
27
|
+
requirement: &78668430 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ~>
|
31
31
|
- !ruby/object:Gem::Version
|
32
|
-
version: '
|
32
|
+
version: '3.10'
|
33
33
|
type: :development
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *78668430
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: hoe
|
38
|
-
requirement: &
|
38
|
+
requirement: &78667530 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ~>
|
42
42
|
- !ruby/object:Gem::Version
|
43
|
-
version: '3.
|
43
|
+
version: '3.3'
|
44
44
|
type: :development
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *78667530
|
47
47
|
description: textutils - Text Filters, Helpers, Readers and More
|
48
48
|
email: ruby-talk@ruby-lang.org
|
49
49
|
executables: []
|
50
50
|
extensions: []
|
51
51
|
extra_rdoc_files:
|
52
|
-
- History.md
|
53
52
|
- Manifest.txt
|
54
|
-
- README.md
|
55
53
|
files:
|
56
54
|
- History.md
|
57
55
|
- Manifest.txt
|
@@ -85,7 +83,10 @@ files:
|
|
85
83
|
- test/helper.rb
|
86
84
|
- test/test_address_helper.rb
|
87
85
|
- test/test_hypertext_helper.rb
|
86
|
+
- test/test_taglist.rb
|
87
|
+
- test/test_title_finder.rb
|
88
88
|
- test/test_title_helper.rb
|
89
|
+
- test/test_title_mapper.rb
|
89
90
|
- test/test_unicode_helper.rb
|
90
91
|
- test/test_values_reader.rb
|
91
92
|
- .gemtest
|
@@ -112,13 +113,16 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
112
113
|
version: '0'
|
113
114
|
requirements: []
|
114
115
|
rubyforge_project: textutils
|
115
|
-
rubygems_version: 1.8.
|
116
|
+
rubygems_version: 1.8.17
|
116
117
|
signing_key:
|
117
118
|
specification_version: 3
|
118
119
|
summary: textutils - Text Filters, Helpers, Readers and More
|
119
120
|
test_files:
|
120
|
-
- test/
|
121
|
-
- test/test_hypertext_helper.rb
|
122
|
-
- test/test_title_helper.rb
|
121
|
+
- test/test_title_finder.rb
|
123
122
|
- test/test_unicode_helper.rb
|
123
|
+
- test/test_title_mapper.rb
|
124
124
|
- test/test_values_reader.rb
|
125
|
+
- test/test_taglist.rb
|
126
|
+
- test/test_hypertext_helper.rb
|
127
|
+
- test/test_title_helper.rb
|
128
|
+
- test/test_address_helper.rb
|