textutils 0.8.6 → 0.8.7

Sign up to get free protection for your applications and to get access to all the features.
data/Manifest.txt CHANGED
@@ -30,6 +30,9 @@ lib/textutils/version.rb
30
30
  test/helper.rb
31
31
  test/test_address_helper.rb
32
32
  test/test_hypertext_helper.rb
33
+ test/test_taglist.rb
34
+ test/test_title_finder.rb
33
35
  test/test_title_helper.rb
36
+ test/test_title_mapper.rb
34
37
  test/test_unicode_helper.rb
35
38
  test/test_values_reader.rb
@@ -157,15 +157,35 @@ module TextUtils
157
157
 
158
158
  def title_esc_regex( title_unescaped )
159
159
 
160
- ## escape regex special chars e.g. . to \. and ( to \( etc.
160
+ ## escape regex special chars e.g.
161
+ # . to \. and
162
+ # ( to \(
163
+ # ) to \)
164
+ # ? to \? -- zero or one
165
+ # * to \* -- zero or more
166
+ # + to \+ -- one or more
167
+ # $ to \$ -- end of line
168
+ # ^ to \^ -- start of line etc.
169
+
170
+ ### add { and } ???
171
+ ### add [ and ] ???
172
+ ### add \ too ???
173
+ ### add | too ???
174
+
161
175
  # e.g. Benfica Lis.
162
176
  # e.g. Club Atlético Colón (Santa Fe)
177
+ # e.g. Bauer Anton (????)
163
178
 
164
179
  ## NB: cannot use Regexp.escape! will escape space '' to '\ '
165
180
  ## title = Regexp.escape( title_unescaped )
166
181
  title = title_unescaped.gsub( '.', '\.' )
167
182
  title = title.gsub( '(', '\(' )
168
183
  title = title.gsub( ')', '\)' )
184
+ title = title.gsub( '?', '\?' )
185
+ title = title.gsub( '*', '\*' )
186
+ title = title.gsub( '+', '\+' )
187
+ title = title.gsub( '$', '\$' )
188
+ title = title.gsub( '^', '\^' )
169
189
 
170
190
  ## match accented char with or without accents
171
191
  ## add (ü|ue) etc.
@@ -45,7 +45,12 @@ module TextUtils
45
45
 
46
46
  def is_region?( value )
47
47
  # assume region code e.g. TX or N
48
- value =~ /^[A-Z]{1,2}$/
48
+ #
49
+ # fix: allow three letter regions too e.g. BRU (brussels)
50
+ match_result = value =~ /^[A-Z]{1,2}$/
51
+ # match found if 0,1,2,3 etc or no match if nil
52
+ # note: return bool e.g. false|true (not 0,1,2,3 etc. and nil)
53
+ match_result != nil
49
54
  end
50
55
 
51
56
  ## fix/todo: use match_region_for_country! w/ !!! why? why not?
@@ -88,6 +93,8 @@ module TextUtils
88
93
  end
89
94
  end
90
95
 
96
+ ######
97
+ ## fix: move to worlddb?? why why not??
91
98
  def match_metro_flag( value )
92
99
  if value =~ /^metro$/ # metro(politan area)
93
100
  yield( true )
@@ -97,6 +104,8 @@ module TextUtils
97
104
  end
98
105
  end
99
106
 
107
+ ######
108
+ ## fix: move to worlddb?? why why not??
100
109
  def match_metro_pop( value )
101
110
  if value =~ /^m:/ # m:
102
111
  num = value[2..-1].gsub(/[ _]/, '').to_i # cut off m: prefix; allow space and _ in number
@@ -109,7 +118,9 @@ module TextUtils
109
118
 
110
119
 
111
120
 
112
-
121
+ #####
122
+ ## fix: move to beerdb ??? why? why not??
123
+
113
124
  def match_brewery( value )
114
125
  if value =~ /^by:/ ## by: -brewed by/brewery
115
126
  brewery_key = value[3..-1] ## cut off by: prefix
@@ -124,9 +135,13 @@ module TextUtils
124
135
 
125
136
  def is_year?( value )
126
137
  # founded/established year e.g. 1776
127
- value =~ /^[0-9]{4}$/
138
+ match_result = value =~ /^[0-9]{4}$/
139
+ # match found if 0,1,2,3 etc or no match if nil
140
+ # note: return bool e.g. false|true (not 0,1,2,3 etc. and nil)
141
+ match_result != nil
128
142
  end
129
143
 
144
+
130
145
  def match_year( value )
131
146
  if is_year?( value ) # founded/established year e.g. 1776
132
147
  yield( value.to_i )
@@ -206,7 +221,10 @@ module TextUtils
206
221
  # - must end w/ .com
207
222
  #
208
223
  # fix: support more url format (e.g. w/o www. - look for .com .country code etc.)
209
- value =~ /^www\.|\.com$/
224
+ match_result = value =~ /^www\.|\.com$/
225
+ # match found if 0,1,2,3 etc or no match if nil
226
+ # note: return bool e.g. false|true (not 0,1,2,3 etc. and nil)
227
+ match_result != nil
210
228
  end
211
229
 
212
230
  def match_website( value )
@@ -223,18 +241,45 @@ module TextUtils
223
241
 
224
242
  def is_address?( value )
225
243
  # if value includes // assume address e.g. 3970 Weitra // Sparkasseplatz 160
226
- value =~ /\/{2}/
244
+ match_result = value =~ /\/{2}/
245
+ # match found if 0,1,2,3 etc or no match if nil
246
+ # note: return bool e.g. false|true (not 0,1,2,3 etc. and nil)
247
+ match_result != nil
227
248
  end
228
249
 
229
250
  def is_taglist?( value )
230
- value =~ /^[a-z0-9\|_ ]+$/
251
+ ### note: cannot start w/ number must be letter for now
252
+ ## -- in the future allow free standing years (e.g. 1980 etc.?? why? why not?)
253
+ ## e.g. not allowed 14 ha or 5_000 hl etc.
254
+ match_result = value =~ /^([a-z][a-z0-9\|_ ]*[a-z0-9]|[a-z])$/
255
+ # match found if 0,1,2,3 etc or no match if nil
256
+ # note: return bool e.g. false|true (not 0,1,2,3 etc. and nil)
257
+ match_result != nil
231
258
  end
232
259
 
233
260
 
234
261
  def find_grade( value ) # NB: returns ary [grade,value] / two values
235
262
  grade = 4 # defaults to grade 4 e.g *** => 1, ** => 2, * => 3, -/- => 4
236
263
 
237
- value = value.sub( /\s+(\*{1,3})\s*$/ ) do |_| # NB: stars must end field/value
264
+ # NB: stars must end field/value or start field/value
265
+ # e.g.
266
+ # *** Anton Bauer or
267
+ # Anton Bauer ***
268
+
269
+ value = value.sub( /^\s*(\*{1,3})\s+/ ) do |_|
270
+ if $1 == '***'
271
+ grade = 1
272
+ elsif $1 == '**'
273
+ grade = 2
274
+ elsif $1 == '*'
275
+ grade = 3
276
+ else
277
+ # unknown grade; not possible, is'it?
278
+ end
279
+ '' # remove * from title if found
280
+ end
281
+
282
+ value = value.sub( /\s+(\*{1,3})\s*$/ ) do |_|
238
283
  if $1 == '***'
239
284
  grade = 1
240
285
  elsif $1 == '**'
@@ -267,7 +312,9 @@ module TextUtils
267
312
 
268
313
  # fix/todo: add support for leading underscore _
269
314
  # or allow keys starting w/ digits?
270
- if values[0] =~ /^([a-z][a-z0-9.]*[a-z0-9]|[a-z])$/ # NB: key must start w/ a-z letter (NB: minimum one letter possible)
315
+
316
+ # NB: key must start w/ a-z letter (NB: minimum one letter possible)
317
+ if values[0] =~ /^([a-z][a-z0-9.]*[a-z0-9]|[a-z])$/
271
318
  key_col = values[0]
272
319
  title_col = values[1]
273
320
  more_values = values[2..-1]
@@ -1,6 +1,6 @@
1
1
 
2
2
  module TextUtils
3
3
 
4
- VERSION = '0.8.6'
4
+ VERSION = '0.8.7'
5
5
 
6
6
  end # module TextUtils
data/test/helper.rb CHANGED
@@ -8,6 +8,9 @@ require 'minitest/autorun'
8
8
 
9
9
  # include MiniTest::Unit # lets us use TestCase instead of MiniTest::Unit::TestCase
10
10
 
11
+ ## make sure activesupport gets included/required
12
+ # note: just activesupport or active_support will NOT work
13
+ require 'active_support/all'
11
14
 
12
15
  ## our own code
13
16
 
@@ -0,0 +1,32 @@
1
+ # encoding: utf-8
2
+
3
+
4
+ require 'helper'
5
+
6
+
7
+ class TestTaglist < MiniTest::Unit::TestCase
8
+
9
+ include TextUtils::ValueHelper # lets us use is_taglist?, etc.
10
+
11
+ def test_taglist_starting_w_digit
12
+ ## for now - taglist cannot start w/ number
13
+ assert is_taglist?( '20 ha' ) == false
14
+ assert is_taglist?( '5000 hl' ) == false
15
+ assert is_taglist?( '5_000 hl' ) == false
16
+ end
17
+
18
+ def test_taglist_upcase
19
+ ## taglist cannot use upcase letters
20
+ assert is_taglist?( 'ABC' ) == false
21
+ end
22
+
23
+ def test_taglist
24
+ assert is_taglist?( 'a' )
25
+ assert is_taglist?( 'a|b|c' )
26
+ assert is_taglist?( 'a b c' )
27
+ assert is_taglist?( 'a_b_c' )
28
+ end
29
+
30
+
31
+ end # class TestTaglist
32
+
@@ -0,0 +1,25 @@
1
+ # encoding: utf-8
2
+
3
+
4
+ require 'helper'
5
+
6
+
7
+ class TestTitleFinder < MiniTest::Unit::TestCase
8
+
9
+ include TextUtils::ValueHelper # lets us use find_grade, etc.
10
+
11
+ def test_grade
12
+
13
+ assert_equal [1,'Anton Bauer'], find_grade( '*** Anton Bauer' )
14
+ assert_equal [2,'Anton Bauer'], find_grade( '** Anton Bauer' )
15
+ assert_equal [3,'Anton Bauer'], find_grade( '* Anton Bauer' )
16
+ assert_equal [4,'Anton Bauer'], find_grade( 'Anton Bauer' )
17
+
18
+ assert_equal [1,'Anton Bauer'], find_grade( 'Anton Bauer ***' )
19
+
20
+ end
21
+
22
+
23
+
24
+ end # class TestTitleFinder
25
+
@@ -0,0 +1,50 @@
1
+ # encoding: utf-8
2
+
3
+
4
+ require 'helper'
5
+
6
+
7
+ class TestTitleMapper < MiniTest::Unit::TestCase
8
+
9
+ WineryStruct = Struct.new(:key, :title, :synonyms)
10
+
11
+ def test_title_table
12
+
13
+ ### todo/fix: auto-add year and remove (1971) or (????) etc. from title!!!!
14
+
15
+ titles_in = [
16
+ WineryStruct.new( 'antonbauer', 'Anton Bauer (1971)' ),
17
+ WineryStruct.new( 'josefbauer', 'Weingut Josef Bauer', 'Joe Bauer|Josef Bauer (????)' ),
18
+ WineryStruct.new( 'bernhardott', 'Weingut Ott', 'Weingut Bernhard Ott|Bernhard Ott (1972)' ),
19
+ WineryStruct.new( 'andreaspolsterer', 'Weingut Andreas B. Polsterer', 'Andreas B. Polsterer (1970)' )
20
+ ]
21
+
22
+ ## note: for regex the following must get escaped
23
+ # ( => \(
24
+ # ) => \)
25
+ # . => \.
26
+ # ? => \?
27
+
28
+ titles_out2 = [
29
+ ['antonbauer', [ 'Anton Bauer \(1971\)', 'Anton Bauer']],
30
+ ['josefbauer', [ 'Weingut Josef Bauer', 'Josef Bauer \(\?\?\?\?\)', 'Josef Bauer', 'Joe Bauer' ]],
31
+ ['bernhardott', [ 'Weingut Bernhard Ott', 'Bernhard Ott \(1972\)', 'Bernhard Ott', 'Weingut Ott' ]],
32
+ ['andreaspolsterer', [ 'Weingut Andreas B\. Polsterer', 'Andreas B\. Polsterer \(1970\)', 'Andreas B\. Polsterer' ]]
33
+ ]
34
+
35
+ titles_out = TextUtils.build_title_table_for( titles_in )
36
+
37
+ puts 'titles_out:'
38
+ pp titles_out
39
+ puts titles_out.to_s
40
+
41
+ puts 'titles_out2:'
42
+ pp titles_out2
43
+ puts titles_out.to_s
44
+
45
+ assert_equal titles_out2.to_s, titles_out.to_s
46
+
47
+ end # method test_title_table
48
+
49
+
50
+ end # class TestTitleMapper
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: textutils
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.8.6
4
+ version: 0.8.7
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2014-02-16 00:00:00.000000000 Z
12
+ date: 2014-03-02 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: logutils
16
- requirement: &20840748 !ruby/object:Gem::Requirement
16
+ requirement: &78669010 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ~>
@@ -21,37 +21,35 @@ dependencies:
21
21
  version: '0.5'
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *20840748
24
+ version_requirements: *78669010
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: rdoc
27
- requirement: &20840412 !ruby/object:Gem::Requirement
27
+ requirement: &78668430 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ~>
31
31
  - !ruby/object:Gem::Version
32
- version: '4.0'
32
+ version: '3.10'
33
33
  type: :development
34
34
  prerelease: false
35
- version_requirements: *20840412
35
+ version_requirements: *78668430
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: hoe
38
- requirement: &20839944 !ruby/object:Gem::Requirement
38
+ requirement: &78667530 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ~>
42
42
  - !ruby/object:Gem::Version
43
- version: '3.7'
43
+ version: '3.3'
44
44
  type: :development
45
45
  prerelease: false
46
- version_requirements: *20839944
46
+ version_requirements: *78667530
47
47
  description: textutils - Text Filters, Helpers, Readers and More
48
48
  email: ruby-talk@ruby-lang.org
49
49
  executables: []
50
50
  extensions: []
51
51
  extra_rdoc_files:
52
- - History.md
53
52
  - Manifest.txt
54
- - README.md
55
53
  files:
56
54
  - History.md
57
55
  - Manifest.txt
@@ -85,7 +83,10 @@ files:
85
83
  - test/helper.rb
86
84
  - test/test_address_helper.rb
87
85
  - test/test_hypertext_helper.rb
86
+ - test/test_taglist.rb
87
+ - test/test_title_finder.rb
88
88
  - test/test_title_helper.rb
89
+ - test/test_title_mapper.rb
89
90
  - test/test_unicode_helper.rb
90
91
  - test/test_values_reader.rb
91
92
  - .gemtest
@@ -112,13 +113,16 @@ required_rubygems_version: !ruby/object:Gem::Requirement
112
113
  version: '0'
113
114
  requirements: []
114
115
  rubyforge_project: textutils
115
- rubygems_version: 1.8.16
116
+ rubygems_version: 1.8.17
116
117
  signing_key:
117
118
  specification_version: 3
118
119
  summary: textutils - Text Filters, Helpers, Readers and More
119
120
  test_files:
120
- - test/test_address_helper.rb
121
- - test/test_hypertext_helper.rb
122
- - test/test_title_helper.rb
121
+ - test/test_title_finder.rb
123
122
  - test/test_unicode_helper.rb
123
+ - test/test_title_mapper.rb
124
124
  - test/test_values_reader.rb
125
+ - test/test_taglist.rb
126
+ - test/test_hypertext_helper.rb
127
+ - test/test_title_helper.rb
128
+ - test/test_address_helper.rb