textutils 0.8.6 → 0.8.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Manifest.txt CHANGED
@@ -30,6 +30,9 @@ lib/textutils/version.rb
30
30
  test/helper.rb
31
31
  test/test_address_helper.rb
32
32
  test/test_hypertext_helper.rb
33
+ test/test_taglist.rb
34
+ test/test_title_finder.rb
33
35
  test/test_title_helper.rb
36
+ test/test_title_mapper.rb
34
37
  test/test_unicode_helper.rb
35
38
  test/test_values_reader.rb
@@ -157,15 +157,35 @@ module TextUtils
157
157
 
158
158
  def title_esc_regex( title_unescaped )
159
159
 
160
- ## escape regex special chars e.g. . to \. and ( to \( etc.
160
+ ## escape regex special chars e.g.
161
+ # . to \. and
162
+ # ( to \(
163
+ # ) to \)
164
+ # ? to \? -- zero or one
165
+ # * to \* -- zero or more
166
+ # + to \+ -- one or more
167
+ # $ to \$ -- end of line
168
+ # ^ to \^ -- start of line etc.
169
+
170
+ ### add { and } ???
171
+ ### add [ and ] ???
172
+ ### add \ too ???
173
+ ### add | too ???
174
+
161
175
  # e.g. Benfica Lis.
162
176
  # e.g. Club Atlético Colón (Santa Fe)
177
+ # e.g. Bauer Anton (????)
163
178
 
164
179
  ## NB: cannot use Regexp.escape! will escape space '' to '\ '
165
180
  ## title = Regexp.escape( title_unescaped )
166
181
  title = title_unescaped.gsub( '.', '\.' )
167
182
  title = title.gsub( '(', '\(' )
168
183
  title = title.gsub( ')', '\)' )
184
+ title = title.gsub( '?', '\?' )
185
+ title = title.gsub( '*', '\*' )
186
+ title = title.gsub( '+', '\+' )
187
+ title = title.gsub( '$', '\$' )
188
+ title = title.gsub( '^', '\^' )
169
189
 
170
190
  ## match accented char with or without accents
171
191
  ## add (ü|ue) etc.
@@ -45,7 +45,12 @@ module TextUtils
45
45
 
46
46
  def is_region?( value )
47
47
  # assume region code e.g. TX or N
48
- value =~ /^[A-Z]{1,2}$/
48
+ #
49
+ # fix: allow three letter regions too e.g. BRU (brussels)
50
+ match_result = value =~ /^[A-Z]{1,2}$/
51
+ # match found if 0,1,2,3 etc or no match if nil
52
+ # note: return bool e.g. false|true (not 0,1,2,3 etc. and nil)
53
+ match_result != nil
49
54
  end
50
55
 
51
56
  ## fix/todo: use match_region_for_country! w/ !!! why? why not?
@@ -88,6 +93,8 @@ module TextUtils
88
93
  end
89
94
  end
90
95
 
96
+ ######
97
+ ## fix: move to worlddb?? why why not??
91
98
  def match_metro_flag( value )
92
99
  if value =~ /^metro$/ # metro(politan area)
93
100
  yield( true )
@@ -97,6 +104,8 @@ module TextUtils
97
104
  end
98
105
  end
99
106
 
107
+ ######
108
+ ## fix: move to worlddb?? why why not??
100
109
  def match_metro_pop( value )
101
110
  if value =~ /^m:/ # m:
102
111
  num = value[2..-1].gsub(/[ _]/, '').to_i # cut off m: prefix; allow space and _ in number
@@ -109,7 +118,9 @@ module TextUtils
109
118
 
110
119
 
111
120
 
112
-
121
+ #####
122
+ ## fix: move to beerdb ??? why? why not??
123
+
113
124
  def match_brewery( value )
114
125
  if value =~ /^by:/ ## by: -brewed by/brewery
115
126
  brewery_key = value[3..-1] ## cut off by: prefix
@@ -124,9 +135,13 @@ module TextUtils
124
135
 
125
136
  def is_year?( value )
126
137
  # founded/established year e.g. 1776
127
- value =~ /^[0-9]{4}$/
138
+ match_result = value =~ /^[0-9]{4}$/
139
+ # match found if 0,1,2,3 etc or no match if nil
140
+ # note: return bool e.g. false|true (not 0,1,2,3 etc. and nil)
141
+ match_result != nil
128
142
  end
129
143
 
144
+
130
145
  def match_year( value )
131
146
  if is_year?( value ) # founded/established year e.g. 1776
132
147
  yield( value.to_i )
@@ -206,7 +221,10 @@ module TextUtils
206
221
  # - must end w/ .com
207
222
  #
208
223
  # fix: support more url format (e.g. w/o www. - look for .com .country code etc.)
209
- value =~ /^www\.|\.com$/
224
+ match_result = value =~ /^www\.|\.com$/
225
+ # match found if 0,1,2,3 etc or no match if nil
226
+ # note: return bool e.g. false|true (not 0,1,2,3 etc. and nil)
227
+ match_result != nil
210
228
  end
211
229
 
212
230
  def match_website( value )
@@ -223,18 +241,45 @@ module TextUtils
223
241
 
224
242
  def is_address?( value )
225
243
  # if value includes // assume address e.g. 3970 Weitra // Sparkasseplatz 160
226
- value =~ /\/{2}/
244
+ match_result = value =~ /\/{2}/
245
+ # match found if 0,1,2,3 etc or no match if nil
246
+ # note: return bool e.g. false|true (not 0,1,2,3 etc. and nil)
247
+ match_result != nil
227
248
  end
228
249
 
229
250
  def is_taglist?( value )
230
- value =~ /^[a-z0-9\|_ ]+$/
251
+ ### note: cannot start w/ number must be letter for now
252
+ ## -- in the future allow free standing years (e.g. 1980 etc.?? why? why not?)
253
+ ## e.g. not allowed 14 ha or 5_000 hl etc.
254
+ match_result = value =~ /^([a-z][a-z0-9\|_ ]*[a-z0-9]|[a-z])$/
255
+ # match found if 0,1,2,3 etc or no match if nil
256
+ # note: return bool e.g. false|true (not 0,1,2,3 etc. and nil)
257
+ match_result != nil
231
258
  end
232
259
 
233
260
 
234
261
  def find_grade( value ) # NB: returns ary [grade,value] / two values
235
262
  grade = 4 # defaults to grade 4 e.g *** => 1, ** => 2, * => 3, -/- => 4
236
263
 
237
- value = value.sub( /\s+(\*{1,3})\s*$/ ) do |_| # NB: stars must end field/value
264
+ # NB: stars must end field/value or start field/value
265
+ # e.g.
266
+ # *** Anton Bauer or
267
+ # Anton Bauer ***
268
+
269
+ value = value.sub( /^\s*(\*{1,3})\s+/ ) do |_|
270
+ if $1 == '***'
271
+ grade = 1
272
+ elsif $1 == '**'
273
+ grade = 2
274
+ elsif $1 == '*'
275
+ grade = 3
276
+ else
277
+ # unknown grade; not possible, is'it?
278
+ end
279
+ '' # remove * from title if found
280
+ end
281
+
282
+ value = value.sub( /\s+(\*{1,3})\s*$/ ) do |_|
238
283
  if $1 == '***'
239
284
  grade = 1
240
285
  elsif $1 == '**'
@@ -267,7 +312,9 @@ module TextUtils
267
312
 
268
313
  # fix/todo: add support for leading underscore _
269
314
  # or allow keys starting w/ digits?
270
- if values[0] =~ /^([a-z][a-z0-9.]*[a-z0-9]|[a-z])$/ # NB: key must start w/ a-z letter (NB: minimum one letter possible)
315
+
316
+ # NB: key must start w/ a-z letter (NB: minimum one letter possible)
317
+ if values[0] =~ /^([a-z][a-z0-9.]*[a-z0-9]|[a-z])$/
271
318
  key_col = values[0]
272
319
  title_col = values[1]
273
320
  more_values = values[2..-1]
@@ -1,6 +1,6 @@
1
1
 
2
2
  module TextUtils
3
3
 
4
- VERSION = '0.8.6'
4
+ VERSION = '0.8.7'
5
5
 
6
6
  end # module TextUtils
data/test/helper.rb CHANGED
@@ -8,6 +8,9 @@ require 'minitest/autorun'
8
8
 
9
9
  # include MiniTest::Unit # lets us use TestCase instead of MiniTest::Unit::TestCase
10
10
 
11
+ ## make sure activesupport gets included/required
12
+ # note: just activesupport or active_support will NOT work
13
+ require 'active_support/all'
11
14
 
12
15
  ## our own code
13
16
 
@@ -0,0 +1,32 @@
1
+ # encoding: utf-8
2
+
3
+
4
+ require 'helper'
5
+
6
+
7
+ class TestTaglist < MiniTest::Unit::TestCase
8
+
9
+ include TextUtils::ValueHelper # lets us use is_taglist?, etc.
10
+
11
+ def test_taglist_starting_w_digit
12
+ ## for now - taglist cannot start w/ number
13
+ assert is_taglist?( '20 ha' ) == false
14
+ assert is_taglist?( '5000 hl' ) == false
15
+ assert is_taglist?( '5_000 hl' ) == false
16
+ end
17
+
18
+ def test_taglist_upcase
19
+ ## taglist cannot use upcase letters
20
+ assert is_taglist?( 'ABC' ) == false
21
+ end
22
+
23
+ def test_taglist
24
+ assert is_taglist?( 'a' )
25
+ assert is_taglist?( 'a|b|c' )
26
+ assert is_taglist?( 'a b c' )
27
+ assert is_taglist?( 'a_b_c' )
28
+ end
29
+
30
+
31
+ end # class TestTaglist
32
+
@@ -0,0 +1,25 @@
1
+ # encoding: utf-8
2
+
3
+
4
+ require 'helper'
5
+
6
+
7
+ class TestTitleFinder < MiniTest::Unit::TestCase
8
+
9
+ include TextUtils::ValueHelper # lets us use find_grade, etc.
10
+
11
+ def test_grade
12
+
13
+ assert_equal [1,'Anton Bauer'], find_grade( '*** Anton Bauer' )
14
+ assert_equal [2,'Anton Bauer'], find_grade( '** Anton Bauer' )
15
+ assert_equal [3,'Anton Bauer'], find_grade( '* Anton Bauer' )
16
+ assert_equal [4,'Anton Bauer'], find_grade( 'Anton Bauer' )
17
+
18
+ assert_equal [1,'Anton Bauer'], find_grade( 'Anton Bauer ***' )
19
+
20
+ end
21
+
22
+
23
+
24
+ end # class TestTitleFinder
25
+
@@ -0,0 +1,50 @@
1
+ # encoding: utf-8
2
+
3
+
4
+ require 'helper'
5
+
6
+
7
+ class TestTitleMapper < MiniTest::Unit::TestCase
8
+
9
+ WineryStruct = Struct.new(:key, :title, :synonyms)
10
+
11
+ def test_title_table
12
+
13
+ ### todo/fix: auto-add year and remove (1971) or (????) etc. from title!!!!
14
+
15
+ titles_in = [
16
+ WineryStruct.new( 'antonbauer', 'Anton Bauer (1971)' ),
17
+ WineryStruct.new( 'josefbauer', 'Weingut Josef Bauer', 'Joe Bauer|Josef Bauer (????)' ),
18
+ WineryStruct.new( 'bernhardott', 'Weingut Ott', 'Weingut Bernhard Ott|Bernhard Ott (1972)' ),
19
+ WineryStruct.new( 'andreaspolsterer', 'Weingut Andreas B. Polsterer', 'Andreas B. Polsterer (1970)' )
20
+ ]
21
+
22
+ ## note: for regex the following must get escaped
23
+ # ( => \(
24
+ # ) => \)
25
+ # . => \.
26
+ # ? => \?
27
+
28
+ titles_out2 = [
29
+ ['antonbauer', [ 'Anton Bauer \(1971\)', 'Anton Bauer']],
30
+ ['josefbauer', [ 'Weingut Josef Bauer', 'Josef Bauer \(\?\?\?\?\)', 'Josef Bauer', 'Joe Bauer' ]],
31
+ ['bernhardott', [ 'Weingut Bernhard Ott', 'Bernhard Ott \(1972\)', 'Bernhard Ott', 'Weingut Ott' ]],
32
+ ['andreaspolsterer', [ 'Weingut Andreas B\. Polsterer', 'Andreas B\. Polsterer \(1970\)', 'Andreas B\. Polsterer' ]]
33
+ ]
34
+
35
+ titles_out = TextUtils.build_title_table_for( titles_in )
36
+
37
+ puts 'titles_out:'
38
+ pp titles_out
39
+ puts titles_out.to_s
40
+
41
+ puts 'titles_out2:'
42
+ pp titles_out2
43
+ puts titles_out.to_s
44
+
45
+ assert_equal titles_out2.to_s, titles_out.to_s
46
+
47
+ end # method test_title_table
48
+
49
+
50
+ end # class TestTitleMapper
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: textutils
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.8.6
4
+ version: 0.8.7
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2014-02-16 00:00:00.000000000 Z
12
+ date: 2014-03-02 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: logutils
16
- requirement: &20840748 !ruby/object:Gem::Requirement
16
+ requirement: &78669010 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ~>
@@ -21,37 +21,35 @@ dependencies:
21
21
  version: '0.5'
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *20840748
24
+ version_requirements: *78669010
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: rdoc
27
- requirement: &20840412 !ruby/object:Gem::Requirement
27
+ requirement: &78668430 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ~>
31
31
  - !ruby/object:Gem::Version
32
- version: '4.0'
32
+ version: '3.10'
33
33
  type: :development
34
34
  prerelease: false
35
- version_requirements: *20840412
35
+ version_requirements: *78668430
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: hoe
38
- requirement: &20839944 !ruby/object:Gem::Requirement
38
+ requirement: &78667530 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ~>
42
42
  - !ruby/object:Gem::Version
43
- version: '3.7'
43
+ version: '3.3'
44
44
  type: :development
45
45
  prerelease: false
46
- version_requirements: *20839944
46
+ version_requirements: *78667530
47
47
  description: textutils - Text Filters, Helpers, Readers and More
48
48
  email: ruby-talk@ruby-lang.org
49
49
  executables: []
50
50
  extensions: []
51
51
  extra_rdoc_files:
52
- - History.md
53
52
  - Manifest.txt
54
- - README.md
55
53
  files:
56
54
  - History.md
57
55
  - Manifest.txt
@@ -85,7 +83,10 @@ files:
85
83
  - test/helper.rb
86
84
  - test/test_address_helper.rb
87
85
  - test/test_hypertext_helper.rb
86
+ - test/test_taglist.rb
87
+ - test/test_title_finder.rb
88
88
  - test/test_title_helper.rb
89
+ - test/test_title_mapper.rb
89
90
  - test/test_unicode_helper.rb
90
91
  - test/test_values_reader.rb
91
92
  - .gemtest
@@ -112,13 +113,16 @@ required_rubygems_version: !ruby/object:Gem::Requirement
112
113
  version: '0'
113
114
  requirements: []
114
115
  rubyforge_project: textutils
115
- rubygems_version: 1.8.16
116
+ rubygems_version: 1.8.17
116
117
  signing_key:
117
118
  specification_version: 3
118
119
  summary: textutils - Text Filters, Helpers, Readers and More
119
120
  test_files:
120
- - test/test_address_helper.rb
121
- - test/test_hypertext_helper.rb
122
- - test/test_title_helper.rb
121
+ - test/test_title_finder.rb
123
122
  - test/test_unicode_helper.rb
123
+ - test/test_title_mapper.rb
124
124
  - test/test_values_reader.rb
125
+ - test/test_taglist.rb
126
+ - test/test_hypertext_helper.rb
127
+ - test/test_title_helper.rb
128
+ - test/test_address_helper.rb