textutils 0.5.13 → 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
data/Manifest.txt CHANGED
@@ -8,6 +8,7 @@ lib/textutils/filter/comment_filter.rb
8
8
  lib/textutils/filter/erb_django_filter.rb
9
9
  lib/textutils/filter/erb_filter.rb
10
10
  lib/textutils/helper/address_helper.rb
11
+ lib/textutils/helper/tag_helper.rb
11
12
  lib/textutils/helper/title_helper.rb
12
13
  lib/textutils/helper/unicode_helper.rb
13
14
  lib/textutils/helper/value_helper.rb
@@ -4,31 +4,33 @@
4
4
  module TextUtils
5
5
  module AddressHelper
6
6
 
7
- def normalize_address( old_address_line )
7
+ def normalize_addr( old_address )
8
8
  # for now only checks german 5-digit zip code
9
9
  #
10
10
  # e.g. Alte Plauener Straße 24 // 95028 Hof becomes
11
11
  # 95028 Hof // Alte Plauener Straße 24
12
12
 
13
- new_address_line = old_address_line # default - do nothing - just path through
13
+ new_address = old_address # default - do nothing - just path through
14
14
 
15
- lines = old_address_line.split( '//' )
15
+ lines = old_address.split( '//' )
16
16
 
17
17
  if lines.size == 2 # two lines / check for switching lines
18
18
  line1 = lines[0].strip
19
19
  line2 = lines[1].strip
20
20
  if line2 =~ /^[0-9]{5}\s/
21
- new_address_line = "#{line2} // #{line1}" # swap - let line w/ 5-digit zip code go first
21
+ new_address = "#{line2} // #{line1}" # swap - let line w/ 5-digit zip code go first
22
22
  end
23
23
  end
24
24
 
25
- new_address_line
25
+ new_address
26
26
  end
27
-
27
+
28
28
 
29
29
  # todo/fix: add _in_adr or _in_addr to name - why? why not?
30
+ # -- make country_key optional - why? why not?
31
+ # n move to second pos; use opts={} why? why not?
30
32
 
31
- def find_city_for_country( country_key, address )
33
+ def find_city_in_addr( address, country_key )
32
34
 
33
35
  return nil if address.blank? # do NOT process nil or empty address lines; sorry
34
36
 
@@ -0,0 +1,35 @@
1
+ # encoding: utf-8
2
+
3
+ module TextUtils
4
+ module TagHelper
5
+
6
+
7
+ def find_tags( value )
8
+ # logger.debug " found tags: >>#{value}<<"
9
+
10
+ tag_keys = value.split('|')
11
+
12
+ ## unify; replace _w/ space; remove leading n trailing whitespace
13
+ tag_keys = tag_keys.map do |key|
14
+ key = key.gsub( '_', ' ' )
15
+ key = key.strip
16
+ key
17
+ end
18
+
19
+ tag_keys # return tag keys as ary
20
+ end
21
+
22
+ def find_tags_in_attribs!( attribs )
23
+ # NB: will remove :tags from attribs hash
24
+
25
+ if attribs[:tags].present?
26
+ tag_keys = find_tags( attribs[:tags] )
27
+ attribs.delete(:tags)
28
+ tag_keys # return tag keys as ary of strings
29
+ else
30
+ [] # nothing found; return empty ary
31
+ end
32
+ end
33
+
34
+ end # module TagHelper
35
+ end # module TextUtils
@@ -5,16 +5,79 @@ module TextUtils
5
5
  module ValueHelper
6
6
 
7
7
 
8
+ def match_country( value )
9
+ if value =~ /^country:/ ## country:
10
+ country_key = value[8..-1] ## cut off country: prefix
11
+ country = Country.find_by_key!( country_key )
12
+ yield( country )
13
+ true # bingo - match found
14
+ else
15
+ false # no match found
16
+ end
17
+ end
18
+
19
+
8
20
  def is_region?( value )
9
21
  # assume region code e.g. TX or N
10
22
  value =~ /^[A-Z]{1,2}$/
11
23
  end
12
24
 
25
+ ## fix/todo: use match_region_for_country! w/ !!! why? why not?
26
+ def match_region_for_country( value, country_id ) ## NB: required country_id
27
+ if value =~ /^region:/ ## region:
28
+ region_key = value[7..-1] ## cut off region: prefix
29
+ region = Region.find_by_key_and_country_id!( region_key, country_id )
30
+ yield( region )
31
+ true # bingo - match found
32
+ elsif is_region?( value ) ## assume region code e.g. TX or N
33
+ region = Region.find_by_key_and_country_id!( value.downcase, country_id )
34
+ yield( region )
35
+ true # bingo - match found
36
+ else
37
+ false # no match found
38
+ end
39
+ end
40
+
41
+
42
+ def match_city( value )
43
+ if value =~ /^city:/ ## city:
44
+ city_key = value[5..-1] ## cut off city: prefix
45
+ city = City.find_by_key( city_key )
46
+ yield( city ) # NB: might be nil (city not found)
47
+ true # bingo - match found
48
+ else
49
+ false # no match found
50
+ end
51
+ end
52
+
53
+
54
+ def match_brewery( value )
55
+ if value =~ /^by:/ ## by: -brewed by/brewery
56
+ brewery_key = value[3..-1] ## cut off by: prefix
57
+ brewery = Brewery.find_by_key!( brewery_key )
58
+ yield( brewery )
59
+ true # bingo - match found
60
+ else
61
+ false # no match found
62
+ end
63
+ end
64
+
65
+
13
66
  def is_year?( value )
14
67
  # founded/established year e.g. 1776
15
68
  value =~ /^[0-9]{4}$/
16
69
  end
17
70
 
71
+ def match_year( value )
72
+ if is_year?( value ) # founded/established year e.g. 1776
73
+ yield( value.to_i )
74
+ true # bingo - match found
75
+ else
76
+ false # no match found
77
+ end
78
+ end
79
+
80
+
18
81
  def is_website?( value )
19
82
  # check for url/internet address e.g. www.ottakringer.at
20
83
  # - must start w/ www. or
@@ -24,6 +87,18 @@ module TextUtils
24
87
  value =~ /^www\.|\.com$/
25
88
  end
26
89
 
90
+ def match_website( value )
91
+ if is_website?( value ) # check for url/internet address e.g. www.ottakringer.at
92
+ # fix: support more url format (e.g. w/o www. - look for .com .country code etc.)
93
+ yield( value )
94
+ true # bingo - match found
95
+ else
96
+ false # no match found
97
+ end
98
+ end
99
+
100
+
101
+
27
102
  def is_address?( value )
28
103
  # if value includes // assume address e.g. 3970 Weitra // Sparkasseplatz 160
29
104
  value =~ /\/{2}/
@@ -34,10 +109,10 @@ module TextUtils
34
109
  end
35
110
 
36
111
 
37
- def find_grade( text ) # NB: returns ary [grade,text] / two values
112
+ def find_grade( value ) # NB: returns ary [grade,value] / two values
38
113
  grade = 4 # defaults to grade 4 e.g *** => 1, ** => 2, * => 3, -/- => 4
39
114
 
40
- text = text.sub( /\s+(\*{1,3})\s*$/ ) do |_| # NB: stars must end field/value
115
+ value = value.sub( /\s+(\*{1,3})\s*$/ ) do |_| # NB: stars must end field/value
41
116
  if $1 == '***'
42
117
  grade = 1
43
118
  elsif $1 == '**'
@@ -50,10 +125,66 @@ module TextUtils
50
125
  '' # remove * from title if found
51
126
  end
52
127
 
53
- [grade,text]
128
+ [grade,value]
54
129
  end
55
130
 
56
131
 
132
+ def find_key_n_title( values ) # NB: returns ary [attribs,more_values] / two values
133
+
134
+ ## fix: add/configure logger for ActiveRecord!!!
135
+ logger = LogKernel::Logger.root
136
+
137
+ ### todo/fix: allow check - do NOT allow mixed use of with key and w/o key
138
+ ## either use keys or do NOT use keys; do NOT mix in a single fixture file
139
+
140
+ ### support autogenerate key from first title value
141
+
142
+ # if it looks like a key (only a-z lower case allowed); assume it's a key
143
+ # - also allow . in keys e.g. world.quali.america, at.cup, etc.
144
+ # - also allow 0-9 in keys e.g. at.2, at.3.1, etc.
145
+
146
+ # fix/todo: add support for leading underscore _
147
+ # or allow keys starting w/ digits?
148
+ if values[0] =~ /^([a-z][a-z0-9.]*[a-z0-9]|[a-z])$/ # NB: key must start w/ a-z letter (NB: minimum one letter possible)
149
+ key_col = values[0]
150
+ title_col = values[1]
151
+ more_values = values[2..-1]
152
+ else
153
+ key_col = '<auto>'
154
+ title_col = values[0]
155
+ more_values = values[1..-1]
156
+ end
157
+
158
+ attribs = {}
159
+
160
+ ## check title_col for grade (e.g. ***/**/*) and use returned stripped title_col if exits
161
+ grade, title_col = find_grade( title_col )
162
+
163
+ # NB: for now - do NOT include default grade e.g. if grade (***/**/*) not present; attrib will not be present too
164
+ if grade == 1 || grade == 2 || grade == 3 # grade found/present
165
+ logger.debug " found grade #{grade} in title"
166
+ attribs[:grade] = grade
167
+ end
168
+
169
+ ## title (split of optional synonyms)
170
+ # e.g. FC Bayern Muenchen|Bayern Muenchen|Bayern
171
+ titles = title_col.split('|')
172
+
173
+ attribs[ :title ] = titles[0]
174
+
175
+ ## add optional synonyms if present
176
+ attribs[ :synonyms ] = titles[1..-1].join('|') if titles.size > 1
177
+
178
+ if key_col == '<auto>'
179
+ ## autogenerate key from first title
180
+ key_col = TextUtils.title_to_key( titles[0] )
181
+ logger.debug " autogen key »#{key_col}« from title »#{titles[0]}«"
182
+ end
183
+
184
+ attribs[ :key ] = key_col
185
+
186
+ [attribs, more_values]
187
+ end
57
188
 
58
189
  end # module ValueHelper
59
190
  end # module TextUtils
@@ -3,6 +3,38 @@
3
3
 
4
4
  # fix: move into TextUtils namespace/module!!
5
5
 
6
+
7
+ ## todo/fix: find a better name than HashReaderV2 (HashReaderPlus?) ??
8
+
9
+ class HashReaderV2
10
+ include LogUtils::Logging
11
+
12
+ def initialize( name, include_path )
13
+ @name = name
14
+ @include_path = include_path
15
+ end
16
+
17
+ attr_reader :name
18
+ attr_reader :include_path
19
+
20
+ def each
21
+ path = "#{include_path}/#{name}.yml"
22
+ reader = HashReader.new( path )
23
+
24
+ logger.info "parsing data '#{name}' (#{path})..."
25
+
26
+ reader.each do |key, value|
27
+ yield( key, value )
28
+ end
29
+
30
+ ## fix: move Prop table to props gem - why? why not??
31
+ WorldDb::Models::Prop.create_from_fixture!( name, path )
32
+ end
33
+
34
+ end # class HashReaderV2
35
+
36
+
37
+
6
38
  class HashReader
7
39
 
8
40
  include LogUtils::Logging
@@ -2,17 +2,50 @@
2
2
 
3
3
  # fix: move into TextUtils namespace/module!!
4
4
 
5
+ ## todo/fix: find a better name than HashReaderV2 (HashReaderPlus?) ??
6
+
7
+ class ValuesReaderV2
8
+ include LogUtils::Logging
9
+
10
+ def initialize( name, include_path, more_attribs={} )
11
+ @name = name
12
+ @include_path = include_path
13
+ @more_attribs = more_attribs
14
+ end
15
+
16
+ attr_reader :name
17
+ attr_reader :include_path
18
+ attr_reader :more_attribs
19
+
20
+ def each_line
21
+ path = "#{include_path}/#{name}.txt"
22
+ reader = ValuesReader.new( path, more_attribs )
23
+
24
+ logger.info "parsing data '#{name}' (#{path})..."
25
+
26
+ reader.each_line do |attribs, values|
27
+ yield( attribs, values )
28
+ end
29
+
30
+ ## fix: move Prop table to props gem - why? why not??
31
+ WorldDb::Models::Prop.create_from_fixture!( name, path )
32
+ end
33
+
34
+ end # class ValuesReaderV2
35
+
36
+
37
+
5
38
  class ValuesReader
6
39
 
7
40
  include LogUtils::Logging
8
41
 
9
- include TextUtils::ValueHelper # e.g. includes find_grade()
42
+ include TextUtils::ValueHelper # e.g. includes find_grade, find_key_n_title
10
43
 
11
44
 
12
- def initialize( path, more_values={} )
45
+ def initialize( path, more_attribs={} )
13
46
  @path = path
14
47
 
15
- @more_values = more_values
48
+ @more_attribs = more_attribs
16
49
 
17
50
  @data = File.read_utf8( @path )
18
51
  end
@@ -29,8 +62,8 @@ class ValuesReader
29
62
  def each_line # support multi line records
30
63
 
31
64
  inside_line = false # todo: find a better name? e.g. line_found?
32
- attribs = {} # rename to new_attributes?
33
- more_cols = [] # rename to more_values?
65
+ attribs = {}
66
+ more_values = []
34
67
 
35
68
 
36
69
  @data.each_line do |line|
@@ -73,15 +106,15 @@ class ValuesReader
73
106
  # allows you to use any chars
74
107
  logger.debug " multi-line record - add key-value »#{line}«"
75
108
 
76
- more_cols.unshift( line.dup ) # add value upfront to array (first value); lets us keep (optional) tags as last entry; fix!! see valuereaderEx v2
109
+ more_values.unshift( line.dup ) # add value upfront to array (first value); lets us keep (optional) tags as last entry; fix!! see valuereaderEx v2
77
110
  next
78
111
  else
79
112
  # NB: new record clears/ends multi-line record
80
-
113
+
81
114
  if inside_line # check if we already processed a line? if yes; yield last line
82
- yield( attribs, more_cols )
83
- attribs = {}
84
- more_cols = []
115
+ yield( attribs, more_values )
116
+ attribs = {}
117
+ more_values = []
85
118
  end
86
119
  inside_line = true
87
120
  end
@@ -91,14 +124,14 @@ class ValuesReader
91
124
  line = line.gsub( '\,', '♣' ) # use black club suit/=shamrock char for escaped separator
92
125
 
93
126
  ## use generic separator (allow us to configure separator)
94
- line = line.gsub( ',', '') # use black diamond suit for separator
127
+ line = line.gsub( ',', '')
95
128
 
96
129
  ## restore escaped commas (before split)
97
130
  line = line.gsub( '♣', ',' )
98
131
 
99
132
  logger.debug "line: »#{line}«"
100
133
 
101
- values = line.split( '' )
134
+ values = line.split( '' )
102
135
 
103
136
  # pass 1) remove leading and trailing whitespace for values
104
137
 
@@ -115,67 +148,18 @@ class ValuesReader
115
148
  true
116
149
  end
117
150
  end
118
-
119
- logger.debug " values: »#{values.join('« »')}«"
120
-
121
-
122
- ### todo/fix: allow check - do NOT allow mixed use of with key and w/o key
123
- ## either use keys or do NOT use keys; do NOT mix in a single fixture file
124
-
125
-
126
- ### support autogenerate key from first title value
127
-
128
- # if it looks like a key (only a-z lower case allowed); assume it's a key
129
- # - also allow . in keys e.g. world.quali.america, at.cup, etc.
130
- # - also allow 0-9 in keys e.g. at.2, at.3.1, etc.
131
-
132
- # fix/todo: add support for leading underscore _
133
- # or allow keys starting w/ digits?
134
- if values[0] =~ /^([a-z][a-z0-9.]*[a-z0-9]|[a-z])$/ # NB: key must start w/ a-z letter (NB: minimum one letter possible)
135
- key_col = values[0]
136
- title_col = values[1]
137
- more_cols = values[2..-1]
138
- else
139
- key_col = '<auto>'
140
- title_col = values[0]
141
- more_cols = values[1..-1]
142
- end
143
151
 
144
- attribs = {}
152
+ logger.debug " values: »#{values.join('« »')}«"
145
153
 
146
- ## check title_col for grade (e.g. ***/**/*) and use returned stripped title_col if exits
147
- grade, title_col = find_grade( title_col )
154
+ attribs, more_values = find_key_n_title( values )
148
155
 
149
- # NB: for now - do NOT include default grade e.g. if grade (***/**/*) not present; attrib will not be present too
150
- if grade == 1 || grade == 2 || grade == 3 # grade found/present
151
- logger.debug " found grade #{grade} in title"
152
- attribs[:grade] = grade
153
- end
156
+ attribs = attribs.merge( @more_attribs ) # e.g. merge country_id and other defaults if present
154
157
 
155
- ## title (split of optional synonyms)
156
- # e.g. FC Bayern Muenchen|Bayern Muenchen|Bayern
157
- titles = title_col.split('|')
158
-
159
- attribs[ :title ] = titles[0]
160
-
161
- ## add optional synonyms if present
162
- attribs[ :synonyms ] = titles[1..-1].join('|') if titles.size > 1
163
-
164
- if key_col == '<auto>'
165
- ## autogenerate key from first title
166
- key_col = TextUtils.title_to_key( titles[0] )
167
- logger.debug " autogen key »#{key_col}« from title »#{titles[0]}«, textutils version #{TextUtils::VERSION}"
168
- end
169
-
170
- attribs[ :key ] = key_col
171
-
172
- attribs = attribs.merge( @more_values ) # e.g. merge country_id and other defaults if present
173
-
174
158
  end # each lines
175
159
 
176
160
  # do NOT forget to yield last line (if present/processed)
177
161
  if inside_line
178
- yield( attribs, more_cols )
162
+ yield( attribs, more_values )
179
163
  end
180
164
 
181
165
 
@@ -1,6 +1,6 @@
1
1
 
2
2
  module TextUtils
3
3
 
4
- VERSION = '0.5.13'
4
+ VERSION = '0.6.0'
5
5
 
6
6
  end # module TextUtils
data/lib/textutils.rb CHANGED
@@ -22,6 +22,7 @@ require 'textutils/filter/erb_django_filter'
22
22
  require 'textutils/filter/erb_filter'
23
23
 
24
24
  require 'textutils/helper/unicode_helper'
25
+ require 'textutils/helper/tag_helper'
25
26
  require 'textutils/helper/title_helper'
26
27
  require 'textutils/helper/address_helper'
27
28
  require 'textutils/helper/value_helper'
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: textutils
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.13
4
+ version: 0.6.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-05-14 00:00:00.000000000 Z
12
+ date: 2013-05-20 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: logutils
16
- requirement: &83466980 !ruby/object:Gem::Requirement
16
+ requirement: &84354180 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ~>
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '0.5'
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *83466980
24
+ version_requirements: *84354180
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: rdoc
27
- requirement: &83466720 !ruby/object:Gem::Requirement
27
+ requirement: &84353960 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ~>
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: '3.10'
33
33
  type: :development
34
34
  prerelease: false
35
- version_requirements: *83466720
35
+ version_requirements: *84353960
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: hoe
38
- requirement: &83466490 !ruby/object:Gem::Requirement
38
+ requirement: &84353740 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ~>
@@ -43,7 +43,7 @@ dependencies:
43
43
  version: '3.3'
44
44
  type: :development
45
45
  prerelease: false
46
- version_requirements: *83466490
46
+ version_requirements: *84353740
47
47
  description: textutils - Text Filters, Helpers, Readers and More
48
48
  email: webslideshow@googlegroups.com
49
49
  executables: []
@@ -61,6 +61,7 @@ files:
61
61
  - lib/textutils/filter/erb_django_filter.rb
62
62
  - lib/textutils/filter/erb_filter.rb
63
63
  - lib/textutils/helper/address_helper.rb
64
+ - lib/textutils/helper/tag_helper.rb
64
65
  - lib/textutils/helper/title_helper.rb
65
66
  - lib/textutils/helper/unicode_helper.rb
66
67
  - lib/textutils/helper/value_helper.rb