textutils 1.2.3 → 1.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 13b50f88222d75883a9804ba452aa3dce5f4c517
4
- data.tar.gz: 08c6eee7b70caf74b716fd4ed3a8d1d1d0c10967
3
+ metadata.gz: 16e24e7bc0a1004bc3fca49b6f3bfcaa6ce2e5ce
4
+ data.tar.gz: 6aa074466c9c89b089ecf0ac6d75164050abd0cb
5
5
  SHA512:
6
- metadata.gz: 1ce49f78d634c23a8843893eb7afea9a3b8f9aa1fc9c713f6cd22ff47dadfbf4a893ce96b0f91ecfc118e6e65624b9474a1f0e80e83a6d7053a1894a19f3fe2b
7
- data.tar.gz: 5361f93e31737603810fd17b2816677274612632d3cae45aff9b96031e253e36ada6fa3ad084bded187bc527338570cde761fb2c3e96f52f647fad798abca263
6
+ metadata.gz: f386324301ffc37deba32eb32202edf9b8706ff1b14971ba5a41db22a2d9a314be64914b4296e359cea2620a6b54ad89ae1cfa6e7c9fa1322ec4ef29020fb688
7
+ data.tar.gz: d56300976f712b8bb9de8f1e2e571948f35783090d1d7f1a238736f8efd6a60aba915d5ff807cb942a38f47f80b8d6917e6e1e846371b72b62022c8b3ad029f0
@@ -18,7 +18,9 @@ lib/textutils/helper/hypertext_helper.rb
18
18
  lib/textutils/helper/tag_helper.rb
19
19
  lib/textutils/helper/title_helper.rb
20
20
  lib/textutils/helper/unicode_helper.rb
21
- lib/textutils/helper/value_helper.rb
21
+ lib/textutils/helper/value_helper_i.rb
22
+ lib/textutils/helper/value_helper_ii.rb
23
+ lib/textutils/helper/value_helper_iii_numbers.rb
22
24
  lib/textutils/helper/xml_helper.rb
23
25
  lib/textutils/page.rb
24
26
  lib/textutils/parser/name_parser.rb
@@ -43,7 +43,9 @@ require 'textutils/helper/unicode_helper'
43
43
  require 'textutils/helper/tag_helper'
44
44
  require 'textutils/helper/title_helper'
45
45
  require 'textutils/helper/address_helper'
46
- require 'textutils/helper/value_helper'
46
+ require 'textutils/helper/value_helper_i'
47
+ require 'textutils/helper/value_helper_ii'
48
+ require 'textutils/helper/value_helper_iii_numbers'
47
49
 
48
50
  require 'textutils/utils'
49
51
  require 'textutils/core_ext/file'
@@ -0,0 +1,139 @@
1
+ # encoding: utf-8
2
+
3
+
4
+ module TextUtils
5
+ module ValueHelper
6
+
7
+ # if it looks like a key (only a-z lower case allowed); assume it's a key
8
+ # - also allow . in keys e.g. world.quali.america, at.cup, etc.
9
+ # - also allow 0-9 in keys e.g. at.2, at.3.1, etc.
10
+ # - also allow leading digits e.g. 1850muenchen, 3kronen, etc.
11
+
12
+ TITLE_KEY_REGEX = /^(
13
+ [a-z][a-z0-9.]*[a-z0-9]
14
+ |
15
+ [a-z] # allow single letter keys e.g. n,s,etc.
16
+ |
17
+ [1-9][0-9]*[a-z]+ # NOTE: also allow starts with leading digits e.g. 1850muenchen, 3kronen etc.;
18
+ # *MUST* be followed by letter;
19
+ # note: leading zero for now *NOT* allowed
20
+ )$
21
+ /x
22
+
23
+
24
+ def find_key_n_title( values ) # note: returns ary [attribs,more_values] / two values
25
+ # todo/fix:
26
+ ## change title to name
27
+ ## change synonyms to alt_names (!!!)
28
+ ## => use new method e.g. find_key_n_name(s) - why?? why not??
29
+
30
+
31
+ ## fix: add/configure logger for ActiveRecord!!!
32
+ logger = LogKernel::Logger.root
33
+
34
+
35
+ ### support autogenerate key from first title value
36
+ if values[0] =~ TITLE_KEY_REGEX
37
+ key_col = values[0]
38
+ title_col = values[1]
39
+ more_values = values[2..-1]
40
+ else
41
+ key_col = '<auto>'
42
+ title_col = values[0]
43
+ more_values = values[1..-1]
44
+ end
45
+
46
+ attribs = {}
47
+
48
+ ## check title_col for grade (e.g. ***/**/*) and use returned stripped title_col if exits
49
+ grade, title_col = find_grade( title_col )
50
+
51
+ # NB: for now - do NOT include default grade e.g. if grade (***/**/*) not present; attrib will not be present too
52
+ if grade == 1 || grade == 2 || grade == 3 # grade found/present
53
+ logger.debug " found grade #{grade} in title"
54
+ attribs[:grade] = grade
55
+ end
56
+
57
+
58
+ ## fix/todo: add find parts ??
59
+ # e.g. ‹Estrella› ‹Damm› Inedit
60
+ # becomes => title: 'Estrella Damm Inedit' and parts: ['Estrella','Damm']
61
+
62
+
63
+
64
+ ## title (split of optional tree hierarchy)
65
+ ## e.g. Leverkusen › Köln/Bonn › Nordrhein-Westfalen
66
+ ## Gelsenkirchen › Ruhrgebiet › Nordrhein-Westfalen
67
+ ## München [Munich] › Bayern etc.
68
+
69
+ ## fix!!!! - trailing hierarchy get *ignored* for now!!! - fix!!
70
+ ## pass along in :tree (or :hierarchy) ??
71
+
72
+
73
+ ## note: must include leading and trailing space for now (fix!! later)
74
+ ## hack for avoiding conflict w/ parts; fix: read/parse parts first
75
+ ## todo: also allow > (as an alternative to ›)
76
+
77
+ title_tree = title_col.split( /[ ]+[›][ ]+/ )
78
+
79
+ ## title (split of optional synonyms)
80
+ # e.g. FC Bayern Muenchen|Bayern Muenchen|Bayern
81
+ # München [Munich]
82
+ titles = NameTokenizer.new.tokenize( title_tree[0] )
83
+
84
+ attribs[ :title ] = titles[0]
85
+
86
+ ## add optional synonyms if present
87
+ attribs[ :synonyms ] = titles[1..-1].join('|') if titles.size > 1
88
+
89
+ if key_col == '<auto>'
90
+ ## autogenerate key from first title
91
+ key_col = TextUtils.title_to_key( titles[0] )
92
+ logger.debug " autogen key »#{key_col}« from title »#{titles[0]}«"
93
+ end
94
+
95
+ attribs[ :key ] = key_col
96
+
97
+ [attribs, more_values]
98
+ end
99
+
100
+
101
+ def find_grade( value ) # NB: returns ary [grade,value] / two values
102
+ grade = 4 # defaults to grade 4 e.g *** => 1, ** => 2, * => 3, -/- => 4
103
+
104
+ # NB: stars must end field/value or start field/value
105
+ # e.g.
106
+ # *** Anton Bauer or
107
+ # Anton Bauer ***
108
+
109
+ value = value.sub( /^\s*(\*{1,3})\s+/ ) do |_|
110
+ if $1 == '***'
111
+ grade = 1
112
+ elsif $1 == '**'
113
+ grade = 2
114
+ elsif $1 == '*'
115
+ grade = 3
116
+ else
117
+ # unknown grade; not possible, is'it?
118
+ end
119
+ '' # remove * from title if found
120
+ end
121
+
122
+ value = value.sub( /\s+(\*{1,3})\s*$/ ) do |_|
123
+ if $1 == '***'
124
+ grade = 1
125
+ elsif $1 == '**'
126
+ grade = 2
127
+ elsif $1 == '*'
128
+ grade = 3
129
+ else
130
+ # unknown grade; not possible, is'it?
131
+ end
132
+ '' # remove * from title if found
133
+ end
134
+
135
+ [grade,value]
136
+ end
137
+
138
+ end # module ValueHelper
139
+ end # module TextUtils
@@ -0,0 +1,83 @@
1
+ # encoding: utf-8
2
+
3
+
4
+ module TextUtils
5
+ module ValueHelper
6
+
7
+ #####
8
+ ## fix!!!!: move to beerdb ??? why? why not?? - yes, move to beerdb-models
9
+
10
+ def match_brewery( value )
11
+ if value =~ /^by:/ ## by: -brewed by/brewery
12
+ brewery_key = value[3..-1] ## cut off by: prefix
13
+ brewery = BeerDb::Model::Brewery.find_by_key!( brewery_key )
14
+ yield( brewery )
15
+ true # bingo - match found
16
+ else
17
+ false # no match found
18
+ end
19
+ end
20
+
21
+
22
+ def is_year?( value )
23
+ # founded/established year e.g. 1776
24
+ match_result = value =~ /^[0-9]{4}$/
25
+ # match found if 0,1,2,3 etc or no match if nil
26
+ # note: return bool e.g. false|true (not 0,1,2,3 etc. and nil)
27
+ match_result != nil
28
+ end
29
+
30
+
31
+ def match_year( value )
32
+ if is_year?( value ) # founded/established year e.g. 1776
33
+ yield( value.to_i )
34
+ true # bingo - match found
35
+ else
36
+ false # no match found
37
+ end
38
+ end
39
+
40
+
41
+ def is_address?( value )
42
+ # if value includes // assume address e.g. 3970 Weitra // Sparkasseplatz 160
43
+ match_result = value =~ /\/{2}/
44
+ # match found if 0,1,2,3 etc or no match if nil
45
+ # note: return bool e.g. false|true (not 0,1,2,3 etc. and nil)
46
+ match_result != nil
47
+ end
48
+
49
+ def is_taglist?( value )
50
+ ### note: cannot start w/ number must be letter for now
51
+ ## -- in the future allow free standing years (e.g. 1980 etc.?? why? why not?)
52
+ ## e.g. not allowed 14 ha or 5_000 hl etc.
53
+ match_result = value =~ /^([a-z][a-z0-9\|_ ]*[a-z0-9]|[a-z])$/
54
+ # match found if 0,1,2,3 etc or no match if nil
55
+ # note: return bool e.g. false|true (not 0,1,2,3 etc. and nil)
56
+ match_result != nil
57
+ end
58
+
59
+
60
+ def is_website?( value )
61
+ # check for url/internet address e.g. www.ottakringer.at
62
+ # - must start w/ www. or
63
+ # - must end w/ .com
64
+ #
65
+ # fix: support more url format (e.g. w/o www. - look for .com .country code etc.)
66
+ match_result = value =~ /^www\.|\.com$/
67
+ # match found if 0,1,2,3 etc or no match if nil
68
+ # note: return bool e.g. false|true (not 0,1,2,3 etc. and nil)
69
+ match_result != nil
70
+ end
71
+
72
+ def match_website( value )
73
+ if is_website?( value ) # check for url/internet address e.g. www.ottakringer.at
74
+ # fix: support more url format (e.g. w/o www. - look for .com .country code etc.)
75
+ yield( value )
76
+ true # bingo - match found
77
+ else
78
+ false # no match found
79
+ end
80
+ end
81
+
82
+ end # module ValueHelper
83
+ end # module TextUtils
@@ -0,0 +1,78 @@
1
+ # encoding: utf-8
2
+
3
+
4
+ # match numbers (units)
5
+ # e.g km_squared, abv, etc.
6
+
7
+ module TextUtils
8
+ module ValueHelper
9
+
10
+
11
+ def match_number( value )
12
+ ## numeric
13
+ ## note: can use any _ or spaces inside digits e.g. 1_000_000 or 1 000 000)
14
+ if value =~ /^([0-9][0-9 _]+[0-9])|([0-9]{1,2})$/
15
+ num = value.gsub(/[ _]/, '').to_i
16
+ yield( num )
17
+ true # bingo - match found
18
+ else
19
+ false # no match found
20
+ end
21
+ end
22
+
23
+
24
+ ###########################
25
+ ## numbers w/ units
26
+
27
+ def match_km_squared( value )
28
+ ## allow numbers like 453 km² or 45_000 km2
29
+ if value =~ /^([0-9][0-9 _]+[0-9]|[0-9]{1,2})(?:\s*(?:km2|km²)\s*)$/
30
+ num = value.gsub( 'km2', '').gsub( 'km²', '' ).gsub(/[ _]/, '').to_i
31
+ yield( num )
32
+ true # bingo - match found
33
+ else
34
+ false # no match found
35
+ end
36
+ end
37
+
38
+ def match_abv( value ) # alcohol by volume (abv) e.g. 5.2%
39
+ if value =~ /^<?\s*(\d+(?:\.\d+)?)\s*%$/
40
+ # nb: allow leading < e.g. <0.5%
41
+ yield( $1.to_f ) # convert to decimal? how? use float?
42
+ true # bingo - match found
43
+ else
44
+ false # no match found
45
+ end
46
+ end
47
+
48
+ def match_og( value ) # plato (stammwuerze/gravity?) e.g. 11.2°
49
+ if value =~ /^(\d+(?:\.\d+)?)°$/
50
+ # nb: no whitespace allowed between ° and number e.g. 11.2°
51
+ yield( $1.to_f ) # convert to decimal? how? use float?
52
+ true # bingo - match found
53
+ else
54
+ false # no match found
55
+ end
56
+ end
57
+
58
+ def match_kcal( value )
59
+ if value =~ /^(\d+(?:\.\d+)?)\s*kcal(?:\/100ml)?$/ # kcal
60
+ # nb: allow 44.4 kcal/100ml or 44.4 kcal or 44.4kcal
61
+ yield( $1.to_f ) # convert to decimal? how? use float?
62
+ true # bingo - match found
63
+ else
64
+ false # no match found
65
+ end
66
+ end
67
+
68
+ def match_hl( value ) # hector liters (hl) 1hl = 100l
69
+ if value =~ /^(?:([0-9][0-9_ ]+[0-9]|[0-9]{1,2})\s*hl)$/ # e.g. 20_000 hl or 50hl etc.
70
+ yield( $1.gsub( /[ _]/, '' ).to_i )
71
+ true # bingo - match found
72
+ else
73
+ false # no match found
74
+ end
75
+ end
76
+
77
+ end # module ValueHelper
78
+ end # module TextUtils
@@ -2,6 +2,55 @@
2
2
 
3
3
  # fix: move into TextUtils namespace/module!! ??
4
4
 
5
+
6
+ class NameTokenizer ## - rename to NameScanner, NameSplitter, NameSeparator, etc.
7
+
8
+ ## split (single) string value into array of names
9
+ ## e.g.
10
+ ## 'München [Munich]' => ['München', '[Munich]']
11
+ ## 'Wr. Neustadt | Wiener Neustadt' => ['Wr. Neustadt', 'Wiener Neustadt']
12
+ include LogUtils::Logging
13
+
14
+ def tokenize( value ) ## rename to/use split - why? why not??
15
+ names = []
16
+
17
+ # 1) split by | (pipe) -- remove leading n trailing whitespaces
18
+ parts = value.split( /[ \t]*\|[ \t]*/ )
19
+
20
+ # 2) split "inline" translations e.g. München [Munich]
21
+
22
+ ## todo: add support for Munich [en] e.g. trailing lang tag
23
+ ## todo: add support for bullet (official bi-lingual names w/ tags ??) - see brussels - why, why not??
24
+
25
+ parts.each do |part|
26
+ s = StringScanner.new( part )
27
+ s.skip( /[ \t]+/) # skip whitespaces
28
+
29
+ while s.eos? == false
30
+ if s.check( /\[/ )
31
+ ## scan everything until the end of bracket (e.g.])
32
+ name = s.scan( /\[[^\]]+\]/)
33
+ ## todo/fix: if name nil - issue warning??
34
+ # starting w/ [ but no closing ] found !!!! - possible? fix!!
35
+ else
36
+ ## scan everything until the begin of bracket (e.g.[)
37
+ name = s.scan( /[^\[]+/)
38
+ name = name.rstrip ## remove trailing spaces (if present)
39
+ end
40
+ names << name
41
+
42
+ s.skip( /[ \t]+/) # skip whitespaces
43
+ logger.debug( "[NameTokenizer] eos?: #{s.eos?}, rest: >#{s.rest}<" )
44
+ end
45
+ end # each part
46
+
47
+ logger.debug( "[NameTokenizer] names=#{names.inspect}")
48
+ names
49
+ end # method split
50
+ end # class NameTokenizer
51
+
52
+
53
+ =begin
5
54
  class NameParser
6
55
 
7
56
  include LogUtils::Logging
@@ -9,4 +58,4 @@ class NameParser
9
58
  ## to be done
10
59
 
11
60
  end # class NameParser
12
-
61
+ =end
@@ -4,7 +4,7 @@ module TextUtils
4
4
 
5
5
  MAJOR = 1 ## todo: namespace inside version or something - why? why not??
6
6
  MINOR = 2
7
- PATCH = 3
7
+ PATCH = 4
8
8
  VERSION = [MAJOR,MINOR,PATCH].join('.')
9
9
 
10
10
  def self.version
@@ -1,5 +1,9 @@
1
1
  # encoding: utf-8
2
2
 
3
+ ###
4
+ # to run use
5
+ # ruby -I ./lib -I ./test test/test_title_finder.rb
6
+
3
7
 
4
8
  require 'helper'
5
9
 
@@ -8,18 +12,43 @@ class TestTitleFinder < Minitest::Test
8
12
 
9
13
  include TextUtils::ValueHelper # lets us use find_grade, etc.
10
14
 
11
- def test_grade
12
15
 
16
+ def test_find_key_n_title
17
+ attribs, _ = find_key_n_title( ['München [Munich]'] ) ## skip returned more_values (use _)
18
+ assert_equal 'muenchen', attribs[:key]
19
+ assert_equal 'München', attribs[:title]
20
+ assert_equal '[Munich]', attribs[:synonyms]
21
+ end
22
+
23
+ def test_find_key_n_title_w_tree
24
+ attribs, _ = find_key_n_title( ['München [Munich] › Oberbayern › Bayern'] ) ## skip returned more_values (use _)
25
+ assert_equal 'muenchen', attribs[:key]
26
+ assert_equal 'München', attribs[:title]
27
+ assert_equal '[Munich]', attribs[:synonyms]
28
+ end
29
+
30
+
31
+ def test_title_tokenizer
32
+ names = NameTokenizer.new.tokenize( 'München [Munich]' )
33
+ assert_equal 2, names.size
34
+ assert_equal 'München', names[0]
35
+ assert_equal '[Munich]', names[1]
36
+
37
+ names = NameTokenizer.new.tokenize( 'FC Bayern Muenchen|Bayern Muenchen|Bayern' )
38
+ assert_equal 3, names.size
39
+ assert_equal 'FC Bayern Muenchen', names[0]
40
+ assert_equal 'Bayern Muenchen', names[1]
41
+ assert_equal 'Bayern', names[2]
42
+ end
43
+
44
+ def test_grade
13
45
  assert_equal [1,'Anton Bauer'], find_grade( '*** Anton Bauer' )
14
46
  assert_equal [2,'Anton Bauer'], find_grade( '** Anton Bauer' )
15
47
  assert_equal [3,'Anton Bauer'], find_grade( '* Anton Bauer' )
16
48
  assert_equal [4,'Anton Bauer'], find_grade( 'Anton Bauer' )
17
49
 
18
50
  assert_equal [1,'Anton Bauer'], find_grade( 'Anton Bauer ***' )
19
-
20
51
  end
21
52
 
22
-
23
-
24
53
  end # class TestTitleFinder
25
54
 
@@ -62,7 +62,8 @@ class TestTitleHelper < Minitest::Test
62
62
  [ "Ṣan'ā' [Sana'a]", 'sana'],
63
63
  [ "P'yŏngyang [Pyongyang]", 'pyongyang' ],
64
64
  [ 'Kāṭhmāḍaũ [Kathmandu]', 'kathmadau' ],
65
- [ "Pe\u{030C}awar", 'pexawar'] ## note: use unicode literal; Pex̌awar -- see en.wikipedia.org/wiki/Peshawar
65
+ [ "Pe\u{030C}awar", 'pexawar'], ## note: use unicode literal; Pex̌awar -- see en.wikipedia.org/wiki/Peshawar
66
+ [ '1850 München', '1850muenchen'],
66
67
  ]
67
68
 
68
69
  txt_io.each do |txt|
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: textutils
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.3
4
+ version: 1.2.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gerald Bauer
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-06-11 00:00:00.000000000 Z
11
+ date: 2015-06-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: props
@@ -124,7 +124,9 @@ files:
124
124
  - lib/textutils/helper/tag_helper.rb
125
125
  - lib/textutils/helper/title_helper.rb
126
126
  - lib/textutils/helper/unicode_helper.rb
127
- - lib/textutils/helper/value_helper.rb
127
+ - lib/textutils/helper/value_helper_i.rb
128
+ - lib/textutils/helper/value_helper_ii.rb
129
+ - lib/textutils/helper/value_helper_iii_numbers.rb
128
130
  - lib/textutils/helper/xml_helper.rb
129
131
  - lib/textutils/page.rb
130
132
  - lib/textutils/parser/name_parser.rb
@@ -1,249 +0,0 @@
1
- # encoding: utf-8
2
-
3
-
4
- module TextUtils
5
- module ValueHelper
6
-
7
- #####
8
- ## fix: move to beerdb ??? why? why not?? - yes, move to beerdb-models
9
-
10
- def match_brewery( value )
11
- if value =~ /^by:/ ## by: -brewed by/brewery
12
- brewery_key = value[3..-1] ## cut off by: prefix
13
- brewery = BeerDb::Model::Brewery.find_by_key!( brewery_key )
14
- yield( brewery )
15
- true # bingo - match found
16
- else
17
- false # no match found
18
- end
19
- end
20
-
21
-
22
- def is_year?( value )
23
- # founded/established year e.g. 1776
24
- match_result = value =~ /^[0-9]{4}$/
25
- # match found if 0,1,2,3 etc or no match if nil
26
- # note: return bool e.g. false|true (not 0,1,2,3 etc. and nil)
27
- match_result != nil
28
- end
29
-
30
-
31
- def match_year( value )
32
- if is_year?( value ) # founded/established year e.g. 1776
33
- yield( value.to_i )
34
- true # bingo - match found
35
- else
36
- false # no match found
37
- end
38
- end
39
-
40
-
41
- def match_km_squared( value )
42
- ## allow numbers like 453 km² or 45_000 km2
43
- if value =~ /^([0-9][0-9 _]+[0-9]|[0-9]{1,2})(?:\s*(?:km2|km²)\s*)$/
44
- num = value.gsub( 'km2', '').gsub( 'km²', '' ).gsub(/[ _]/, '').to_i
45
- yield( num )
46
- true # bingo - match found
47
- else
48
- false # no match found
49
- end
50
- end
51
-
52
- def match_number( value )
53
- ## numeric (nb: can use any _ or spaces inside digits e.g. 1_000_000 or 1 000 000)
54
- if value =~ /^([0-9][0-9 _]+[0-9])|([0-9]{1,2})$/
55
- num = value.gsub(/[ _]/, '').to_i
56
- yield( num )
57
- true # bingo - match found
58
- else
59
- false # no match found
60
- end
61
- end
62
-
63
-
64
- def match_abv( value ) # alcohol by volume (abv) e.g. 5.2%
65
- if value =~ /^<?\s*(\d+(?:\.\d+)?)\s*%$/
66
- # nb: allow leading < e.g. <0.5%
67
- yield( $1.to_f ) # convert to decimal? how? use float?
68
- true # bingo - match found
69
- else
70
- false # no match found
71
- end
72
- end
73
-
74
- def match_og( value ) # plato (stammwuerze/gravity?) e.g. 11.2°
75
- if value =~ /^(\d+(?:\.\d+)?)°$/
76
- # nb: no whitespace allowed between ° and number e.g. 11.2°
77
- yield( $1.to_f ) # convert to decimal? how? use float?
78
- true # bingo - match found
79
- else
80
- false # no match found
81
- end
82
- end
83
-
84
- def match_kcal( value )
85
- if value =~ /^(\d+(?:\.\d+)?)\s*kcal(?:\/100ml)?$/ # kcal
86
- # nb: allow 44.4 kcal/100ml or 44.4 kcal or 44.4kcal
87
- yield( $1.to_f ) # convert to decimal? how? use float?
88
- true # bingo - match found
89
- else
90
- false # no match found
91
- end
92
- end
93
-
94
- def match_hl( value ) # hector liters (hl) 1hl = 100l
95
- if value =~ /^(?:([0-9][0-9_ ]+[0-9]|[0-9]{1,2})\s*hl)$/ # e.g. 20_000 hl or 50hl etc.
96
- yield( $1.gsub( /[ _]/, '' ).to_i )
97
- true # bingo - match found
98
- else
99
- false # no match found
100
- end
101
- end
102
-
103
-
104
- def is_website?( value )
105
- # check for url/internet address e.g. www.ottakringer.at
106
- # - must start w/ www. or
107
- # - must end w/ .com
108
- #
109
- # fix: support more url format (e.g. w/o www. - look for .com .country code etc.)
110
- match_result = value =~ /^www\.|\.com$/
111
- # match found if 0,1,2,3 etc or no match if nil
112
- # note: return bool e.g. false|true (not 0,1,2,3 etc. and nil)
113
- match_result != nil
114
- end
115
-
116
- def match_website( value )
117
- if is_website?( value ) # check for url/internet address e.g. www.ottakringer.at
118
- # fix: support more url format (e.g. w/o www. - look for .com .country code etc.)
119
- yield( value )
120
- true # bingo - match found
121
- else
122
- false # no match found
123
- end
124
- end
125
-
126
-
127
-
128
- def is_address?( value )
129
- # if value includes // assume address e.g. 3970 Weitra // Sparkasseplatz 160
130
- match_result = value =~ /\/{2}/
131
- # match found if 0,1,2,3 etc or no match if nil
132
- # note: return bool e.g. false|true (not 0,1,2,3 etc. and nil)
133
- match_result != nil
134
- end
135
-
136
- def is_taglist?( value )
137
- ### note: cannot start w/ number must be letter for now
138
- ## -- in the future allow free standing years (e.g. 1980 etc.?? why? why not?)
139
- ## e.g. not allowed 14 ha or 5_000 hl etc.
140
- match_result = value =~ /^([a-z][a-z0-9\|_ ]*[a-z0-9]|[a-z])$/
141
- # match found if 0,1,2,3 etc or no match if nil
142
- # note: return bool e.g. false|true (not 0,1,2,3 etc. and nil)
143
- match_result != nil
144
- end
145
-
146
-
147
- def find_grade( value ) # NB: returns ary [grade,value] / two values
148
- grade = 4 # defaults to grade 4 e.g *** => 1, ** => 2, * => 3, -/- => 4
149
-
150
- # NB: stars must end field/value or start field/value
151
- # e.g.
152
- # *** Anton Bauer or
153
- # Anton Bauer ***
154
-
155
- value = value.sub( /^\s*(\*{1,3})\s+/ ) do |_|
156
- if $1 == '***'
157
- grade = 1
158
- elsif $1 == '**'
159
- grade = 2
160
- elsif $1 == '*'
161
- grade = 3
162
- else
163
- # unknown grade; not possible, is'it?
164
- end
165
- '' # remove * from title if found
166
- end
167
-
168
- value = value.sub( /\s+(\*{1,3})\s*$/ ) do |_|
169
- if $1 == '***'
170
- grade = 1
171
- elsif $1 == '**'
172
- grade = 2
173
- elsif $1 == '*'
174
- grade = 3
175
- else
176
- # unknown grade; not possible, is'it?
177
- end
178
- '' # remove * from title if found
179
- end
180
-
181
- [grade,value]
182
- end
183
-
184
-
185
- def find_key_n_title( values ) # NB: returns ary [attribs,more_values] / two values
186
-
187
- ## fix: add/configure logger for ActiveRecord!!!
188
- logger = LogKernel::Logger.root
189
-
190
- ### todo/fix: allow check - do NOT allow mixed use of with key and w/o key
191
- ## either use keys or do NOT use keys; do NOT mix in a single fixture file
192
-
193
- ### support autogenerate key from first title value
194
-
195
- # if it looks like a key (only a-z lower case allowed); assume it's a key
196
- # - also allow . in keys e.g. world.quali.america, at.cup, etc.
197
- # - also allow 0-9 in keys e.g. at.2, at.3.1, etc.
198
-
199
- # fix/todo: add support for leading underscore _
200
- # or allow keys starting w/ digits?
201
-
202
- # NB: key must start w/ a-z letter (NB: minimum one letter possible)
203
- if values[0] =~ /^([a-z][a-z0-9.]*[a-z0-9]|[a-z])$/
204
- key_col = values[0]
205
- title_col = values[1]
206
- more_values = values[2..-1]
207
- else
208
- key_col = '<auto>'
209
- title_col = values[0]
210
- more_values = values[1..-1]
211
- end
212
-
213
- attribs = {}
214
-
215
- ## check title_col for grade (e.g. ***/**/*) and use returned stripped title_col if exits
216
- grade, title_col = find_grade( title_col )
217
-
218
- # NB: for now - do NOT include default grade e.g. if grade (***/**/*) not present; attrib will not be present too
219
- if grade == 1 || grade == 2 || grade == 3 # grade found/present
220
- logger.debug " found grade #{grade} in title"
221
- attribs[:grade] = grade
222
- end
223
-
224
- ## fix/todo: add find parts ??
225
- # e.g. ‹Estrella› ‹Damm› Inedit
226
- # becomes => title: 'Estrella Damm Inedit' and parts: ['Estrella','Damm']
227
-
228
- ## title (split of optional synonyms)
229
- # e.g. FC Bayern Muenchen|Bayern Muenchen|Bayern
230
- titles = title_col.split('|')
231
-
232
- attribs[ :title ] = titles[0]
233
-
234
- ## add optional synonyms if present
235
- attribs[ :synonyms ] = titles[1..-1].join('|') if titles.size > 1
236
-
237
- if key_col == '<auto>'
238
- ## autogenerate key from first title
239
- key_col = TextUtils.title_to_key( titles[0] )
240
- logger.debug " autogen key »#{key_col}« from title »#{titles[0]}«"
241
- end
242
-
243
- attribs[ :key ] = key_col
244
-
245
- [attribs, more_values]
246
- end
247
-
248
- end # module ValueHelper
249
- end # module TextUtils