textutils 0.8.6 → 0.8.7
Sign up to get free protection for your applications and to get access to all the features.
- data/Manifest.txt +3 -0
- data/lib/textutils/helper/title_helper.rb +21 -1
- data/lib/textutils/helper/value_helper.rb +55 -8
- data/lib/textutils/version.rb +1 -1
- data/test/helper.rb +3 -0
- data/test/test_taglist.rb +32 -0
- data/test/test_title_finder.rb +25 -0
- data/test/test_title_mapper.rb +50 -0
- metadata +20 -16
data/Manifest.txt
CHANGED
@@ -30,6 +30,9 @@ lib/textutils/version.rb
|
|
30
30
|
test/helper.rb
|
31
31
|
test/test_address_helper.rb
|
32
32
|
test/test_hypertext_helper.rb
|
33
|
+
test/test_taglist.rb
|
34
|
+
test/test_title_finder.rb
|
33
35
|
test/test_title_helper.rb
|
36
|
+
test/test_title_mapper.rb
|
34
37
|
test/test_unicode_helper.rb
|
35
38
|
test/test_values_reader.rb
|
@@ -157,15 +157,35 @@ module TextUtils
|
|
157
157
|
|
158
158
|
def title_esc_regex( title_unescaped )
|
159
159
|
|
160
|
-
## escape regex special chars e.g.
|
160
|
+
## escape regex special chars e.g.
|
161
|
+
# . to \. and
|
162
|
+
# ( to \(
|
163
|
+
# ) to \)
|
164
|
+
# ? to \? -- zero or one
|
165
|
+
# * to \* -- zero or more
|
166
|
+
# + to \+ -- one or more
|
167
|
+
# $ to \$ -- end of line
|
168
|
+
# ^ to \^ -- start of line etc.
|
169
|
+
|
170
|
+
### add { and } ???
|
171
|
+
### add [ and ] ???
|
172
|
+
### add \ too ???
|
173
|
+
### add | too ???
|
174
|
+
|
161
175
|
# e.g. Benfica Lis.
|
162
176
|
# e.g. Club Atlético Colón (Santa Fe)
|
177
|
+
# e.g. Bauer Anton (????)
|
163
178
|
|
164
179
|
## NB: cannot use Regexp.escape! will escape space '' to '\ '
|
165
180
|
## title = Regexp.escape( title_unescaped )
|
166
181
|
title = title_unescaped.gsub( '.', '\.' )
|
167
182
|
title = title.gsub( '(', '\(' )
|
168
183
|
title = title.gsub( ')', '\)' )
|
184
|
+
title = title.gsub( '?', '\?' )
|
185
|
+
title = title.gsub( '*', '\*' )
|
186
|
+
title = title.gsub( '+', '\+' )
|
187
|
+
title = title.gsub( '$', '\$' )
|
188
|
+
title = title.gsub( '^', '\^' )
|
169
189
|
|
170
190
|
## match accented char with or without accents
|
171
191
|
## add (ü|ue) etc.
|
@@ -45,7 +45,12 @@ module TextUtils
|
|
45
45
|
|
46
46
|
def is_region?( value )
|
47
47
|
# assume region code e.g. TX or N
|
48
|
-
|
48
|
+
#
|
49
|
+
# fix: allow three letter regions too e.g. BRU (brussels)
|
50
|
+
match_result = value =~ /^[A-Z]{1,2}$/
|
51
|
+
# match found if 0,1,2,3 etc or no match if nil
|
52
|
+
# note: return bool e.g. false|true (not 0,1,2,3 etc. and nil)
|
53
|
+
match_result != nil
|
49
54
|
end
|
50
55
|
|
51
56
|
## fix/todo: use match_region_for_country! w/ !!! why? why not?
|
@@ -88,6 +93,8 @@ module TextUtils
|
|
88
93
|
end
|
89
94
|
end
|
90
95
|
|
96
|
+
######
|
97
|
+
## fix: move to worlddb?? why why not??
|
91
98
|
def match_metro_flag( value )
|
92
99
|
if value =~ /^metro$/ # metro(politan area)
|
93
100
|
yield( true )
|
@@ -97,6 +104,8 @@ module TextUtils
|
|
97
104
|
end
|
98
105
|
end
|
99
106
|
|
107
|
+
######
|
108
|
+
## fix: move to worlddb?? why why not??
|
100
109
|
def match_metro_pop( value )
|
101
110
|
if value =~ /^m:/ # m:
|
102
111
|
num = value[2..-1].gsub(/[ _]/, '').to_i # cut off m: prefix; allow space and _ in number
|
@@ -109,7 +118,9 @@ module TextUtils
|
|
109
118
|
|
110
119
|
|
111
120
|
|
112
|
-
|
121
|
+
#####
|
122
|
+
## fix: move to beerdb ??? why? why not??
|
123
|
+
|
113
124
|
def match_brewery( value )
|
114
125
|
if value =~ /^by:/ ## by: -brewed by/brewery
|
115
126
|
brewery_key = value[3..-1] ## cut off by: prefix
|
@@ -124,9 +135,13 @@ module TextUtils
|
|
124
135
|
|
125
136
|
def is_year?( value )
|
126
137
|
# founded/established year e.g. 1776
|
127
|
-
value =~ /^[0-9]{4}$/
|
138
|
+
match_result = value =~ /^[0-9]{4}$/
|
139
|
+
# match found if 0,1,2,3 etc or no match if nil
|
140
|
+
# note: return bool e.g. false|true (not 0,1,2,3 etc. and nil)
|
141
|
+
match_result != nil
|
128
142
|
end
|
129
143
|
|
144
|
+
|
130
145
|
def match_year( value )
|
131
146
|
if is_year?( value ) # founded/established year e.g. 1776
|
132
147
|
yield( value.to_i )
|
@@ -206,7 +221,10 @@ module TextUtils
|
|
206
221
|
# - must end w/ .com
|
207
222
|
#
|
208
223
|
# fix: support more url format (e.g. w/o www. - look for .com .country code etc.)
|
209
|
-
value =~ /^www\.|\.com$/
|
224
|
+
match_result = value =~ /^www\.|\.com$/
|
225
|
+
# match found if 0,1,2,3 etc or no match if nil
|
226
|
+
# note: return bool e.g. false|true (not 0,1,2,3 etc. and nil)
|
227
|
+
match_result != nil
|
210
228
|
end
|
211
229
|
|
212
230
|
def match_website( value )
|
@@ -223,18 +241,45 @@ module TextUtils
|
|
223
241
|
|
224
242
|
def is_address?( value )
|
225
243
|
# if value includes // assume address e.g. 3970 Weitra // Sparkasseplatz 160
|
226
|
-
value =~ /\/{2}/
|
244
|
+
match_result = value =~ /\/{2}/
|
245
|
+
# match found if 0,1,2,3 etc or no match if nil
|
246
|
+
# note: return bool e.g. false|true (not 0,1,2,3 etc. and nil)
|
247
|
+
match_result != nil
|
227
248
|
end
|
228
249
|
|
229
250
|
def is_taglist?( value )
|
230
|
-
|
251
|
+
### note: cannot start w/ number must be letter for now
|
252
|
+
## -- in the future allow free standing years (e.g. 1980 etc.?? why? why not?)
|
253
|
+
## e.g. not allowed 14 ha or 5_000 hl etc.
|
254
|
+
match_result = value =~ /^([a-z][a-z0-9\|_ ]*[a-z0-9]|[a-z])$/
|
255
|
+
# match found if 0,1,2,3 etc or no match if nil
|
256
|
+
# note: return bool e.g. false|true (not 0,1,2,3 etc. and nil)
|
257
|
+
match_result != nil
|
231
258
|
end
|
232
259
|
|
233
260
|
|
234
261
|
def find_grade( value ) # NB: returns ary [grade,value] / two values
|
235
262
|
grade = 4 # defaults to grade 4 e.g *** => 1, ** => 2, * => 3, -/- => 4
|
236
263
|
|
237
|
-
|
264
|
+
# NB: stars must end field/value or start field/value
|
265
|
+
# e.g.
|
266
|
+
# *** Anton Bauer or
|
267
|
+
# Anton Bauer ***
|
268
|
+
|
269
|
+
value = value.sub( /^\s*(\*{1,3})\s+/ ) do |_|
|
270
|
+
if $1 == '***'
|
271
|
+
grade = 1
|
272
|
+
elsif $1 == '**'
|
273
|
+
grade = 2
|
274
|
+
elsif $1 == '*'
|
275
|
+
grade = 3
|
276
|
+
else
|
277
|
+
# unknown grade; not possible, is'it?
|
278
|
+
end
|
279
|
+
'' # remove * from title if found
|
280
|
+
end
|
281
|
+
|
282
|
+
value = value.sub( /\s+(\*{1,3})\s*$/ ) do |_|
|
238
283
|
if $1 == '***'
|
239
284
|
grade = 1
|
240
285
|
elsif $1 == '**'
|
@@ -267,7 +312,9 @@ module TextUtils
|
|
267
312
|
|
268
313
|
# fix/todo: add support for leading underscore _
|
269
314
|
# or allow keys starting w/ digits?
|
270
|
-
|
315
|
+
|
316
|
+
# NB: key must start w/ a-z letter (NB: minimum one letter possible)
|
317
|
+
if values[0] =~ /^([a-z][a-z0-9.]*[a-z0-9]|[a-z])$/
|
271
318
|
key_col = values[0]
|
272
319
|
title_col = values[1]
|
273
320
|
more_values = values[2..-1]
|
data/lib/textutils/version.rb
CHANGED
data/test/helper.rb
CHANGED
@@ -8,6 +8,9 @@ require 'minitest/autorun'
|
|
8
8
|
|
9
9
|
# include MiniTest::Unit # lets us use TestCase instead of MiniTest::Unit::TestCase
|
10
10
|
|
11
|
+
## make sure activesupport gets included/required
|
12
|
+
# note: just activesupport or active_support will NOT work
|
13
|
+
require 'active_support/all'
|
11
14
|
|
12
15
|
## our own code
|
13
16
|
|
@@ -0,0 +1,32 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
|
4
|
+
require 'helper'
|
5
|
+
|
6
|
+
|
7
|
+
class TestTaglist < MiniTest::Unit::TestCase
|
8
|
+
|
9
|
+
include TextUtils::ValueHelper # lets us use is_taglist?, etc.
|
10
|
+
|
11
|
+
def test_taglist_starting_w_digit
|
12
|
+
## for now - taglist cannot start w/ number
|
13
|
+
assert is_taglist?( '20 ha' ) == false
|
14
|
+
assert is_taglist?( '5000 hl' ) == false
|
15
|
+
assert is_taglist?( '5_000 hl' ) == false
|
16
|
+
end
|
17
|
+
|
18
|
+
def test_taglist_upcase
|
19
|
+
## taglist cannot use upcase letters
|
20
|
+
assert is_taglist?( 'ABC' ) == false
|
21
|
+
end
|
22
|
+
|
23
|
+
def test_taglist
|
24
|
+
assert is_taglist?( 'a' )
|
25
|
+
assert is_taglist?( 'a|b|c' )
|
26
|
+
assert is_taglist?( 'a b c' )
|
27
|
+
assert is_taglist?( 'a_b_c' )
|
28
|
+
end
|
29
|
+
|
30
|
+
|
31
|
+
end # class TestTaglist
|
32
|
+
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
|
4
|
+
require 'helper'
|
5
|
+
|
6
|
+
|
7
|
+
class TestTitleFinder < MiniTest::Unit::TestCase
|
8
|
+
|
9
|
+
include TextUtils::ValueHelper # lets us use find_grade, etc.
|
10
|
+
|
11
|
+
def test_grade
|
12
|
+
|
13
|
+
assert_equal [1,'Anton Bauer'], find_grade( '*** Anton Bauer' )
|
14
|
+
assert_equal [2,'Anton Bauer'], find_grade( '** Anton Bauer' )
|
15
|
+
assert_equal [3,'Anton Bauer'], find_grade( '* Anton Bauer' )
|
16
|
+
assert_equal [4,'Anton Bauer'], find_grade( 'Anton Bauer' )
|
17
|
+
|
18
|
+
assert_equal [1,'Anton Bauer'], find_grade( 'Anton Bauer ***' )
|
19
|
+
|
20
|
+
end
|
21
|
+
|
22
|
+
|
23
|
+
|
24
|
+
end # class TestTitleFinder
|
25
|
+
|
@@ -0,0 +1,50 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
|
4
|
+
require 'helper'
|
5
|
+
|
6
|
+
|
7
|
+
class TestTitleMapper < MiniTest::Unit::TestCase
|
8
|
+
|
9
|
+
WineryStruct = Struct.new(:key, :title, :synonyms)
|
10
|
+
|
11
|
+
def test_title_table
|
12
|
+
|
13
|
+
### todo/fix: auto-add year and remove (1971) or (????) etc. from title!!!!
|
14
|
+
|
15
|
+
titles_in = [
|
16
|
+
WineryStruct.new( 'antonbauer', 'Anton Bauer (1971)' ),
|
17
|
+
WineryStruct.new( 'josefbauer', 'Weingut Josef Bauer', 'Joe Bauer|Josef Bauer (????)' ),
|
18
|
+
WineryStruct.new( 'bernhardott', 'Weingut Ott', 'Weingut Bernhard Ott|Bernhard Ott (1972)' ),
|
19
|
+
WineryStruct.new( 'andreaspolsterer', 'Weingut Andreas B. Polsterer', 'Andreas B. Polsterer (1970)' )
|
20
|
+
]
|
21
|
+
|
22
|
+
## note: for regex the following must get escaped
|
23
|
+
# ( => \(
|
24
|
+
# ) => \)
|
25
|
+
# . => \.
|
26
|
+
# ? => \?
|
27
|
+
|
28
|
+
titles_out2 = [
|
29
|
+
['antonbauer', [ 'Anton Bauer \(1971\)', 'Anton Bauer']],
|
30
|
+
['josefbauer', [ 'Weingut Josef Bauer', 'Josef Bauer \(\?\?\?\?\)', 'Josef Bauer', 'Joe Bauer' ]],
|
31
|
+
['bernhardott', [ 'Weingut Bernhard Ott', 'Bernhard Ott \(1972\)', 'Bernhard Ott', 'Weingut Ott' ]],
|
32
|
+
['andreaspolsterer', [ 'Weingut Andreas B\. Polsterer', 'Andreas B\. Polsterer \(1970\)', 'Andreas B\. Polsterer' ]]
|
33
|
+
]
|
34
|
+
|
35
|
+
titles_out = TextUtils.build_title_table_for( titles_in )
|
36
|
+
|
37
|
+
puts 'titles_out:'
|
38
|
+
pp titles_out
|
39
|
+
puts titles_out.to_s
|
40
|
+
|
41
|
+
puts 'titles_out2:'
|
42
|
+
pp titles_out2
|
43
|
+
puts titles_out.to_s
|
44
|
+
|
45
|
+
assert_equal titles_out2.to_s, titles_out.to_s
|
46
|
+
|
47
|
+
end # method test_title_table
|
48
|
+
|
49
|
+
|
50
|
+
end # class TestTitleMapper
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: textutils
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.8.
|
4
|
+
version: 0.8.7
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2014-02
|
12
|
+
date: 2014-03-02 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: logutils
|
16
|
-
requirement: &
|
16
|
+
requirement: &78669010 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ~>
|
@@ -21,37 +21,35 @@ dependencies:
|
|
21
21
|
version: '0.5'
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *78669010
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: rdoc
|
27
|
-
requirement: &
|
27
|
+
requirement: &78668430 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ~>
|
31
31
|
- !ruby/object:Gem::Version
|
32
|
-
version: '
|
32
|
+
version: '3.10'
|
33
33
|
type: :development
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *78668430
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: hoe
|
38
|
-
requirement: &
|
38
|
+
requirement: &78667530 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ~>
|
42
42
|
- !ruby/object:Gem::Version
|
43
|
-
version: '3.
|
43
|
+
version: '3.3'
|
44
44
|
type: :development
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *78667530
|
47
47
|
description: textutils - Text Filters, Helpers, Readers and More
|
48
48
|
email: ruby-talk@ruby-lang.org
|
49
49
|
executables: []
|
50
50
|
extensions: []
|
51
51
|
extra_rdoc_files:
|
52
|
-
- History.md
|
53
52
|
- Manifest.txt
|
54
|
-
- README.md
|
55
53
|
files:
|
56
54
|
- History.md
|
57
55
|
- Manifest.txt
|
@@ -85,7 +83,10 @@ files:
|
|
85
83
|
- test/helper.rb
|
86
84
|
- test/test_address_helper.rb
|
87
85
|
- test/test_hypertext_helper.rb
|
86
|
+
- test/test_taglist.rb
|
87
|
+
- test/test_title_finder.rb
|
88
88
|
- test/test_title_helper.rb
|
89
|
+
- test/test_title_mapper.rb
|
89
90
|
- test/test_unicode_helper.rb
|
90
91
|
- test/test_values_reader.rb
|
91
92
|
- .gemtest
|
@@ -112,13 +113,16 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
112
113
|
version: '0'
|
113
114
|
requirements: []
|
114
115
|
rubyforge_project: textutils
|
115
|
-
rubygems_version: 1.8.
|
116
|
+
rubygems_version: 1.8.17
|
116
117
|
signing_key:
|
117
118
|
specification_version: 3
|
118
119
|
summary: textutils - Text Filters, Helpers, Readers and More
|
119
120
|
test_files:
|
120
|
-
- test/
|
121
|
-
- test/test_hypertext_helper.rb
|
122
|
-
- test/test_title_helper.rb
|
121
|
+
- test/test_title_finder.rb
|
123
122
|
- test/test_unicode_helper.rb
|
123
|
+
- test/test_title_mapper.rb
|
124
124
|
- test/test_values_reader.rb
|
125
|
+
- test/test_taglist.rb
|
126
|
+
- test/test_hypertext_helper.rb
|
127
|
+
- test/test_title_helper.rb
|
128
|
+
- test/test_address_helper.rb
|