textutils 0.5.13 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Manifest.txt CHANGED
@@ -8,6 +8,7 @@ lib/textutils/filter/comment_filter.rb
8
8
  lib/textutils/filter/erb_django_filter.rb
9
9
  lib/textutils/filter/erb_filter.rb
10
10
  lib/textutils/helper/address_helper.rb
11
+ lib/textutils/helper/tag_helper.rb
11
12
  lib/textutils/helper/title_helper.rb
12
13
  lib/textutils/helper/unicode_helper.rb
13
14
  lib/textutils/helper/value_helper.rb
@@ -4,31 +4,33 @@
4
4
  module TextUtils
5
5
  module AddressHelper
6
6
 
7
- def normalize_address( old_address_line )
7
+ def normalize_addr( old_address )
8
8
  # for now only checks german 5-digit zip code
9
9
  #
10
10
  # e.g. Alte Plauener Straße 24 // 95028 Hof becomes
11
11
  # 95028 Hof // Alte Plauener Straße 24
12
12
 
13
- new_address_line = old_address_line # default - do nothing - just path through
13
+ new_address = old_address # default - do nothing - just path through
14
14
 
15
- lines = old_address_line.split( '//' )
15
+ lines = old_address.split( '//' )
16
16
 
17
17
  if lines.size == 2 # two lines / check for switching lines
18
18
  line1 = lines[0].strip
19
19
  line2 = lines[1].strip
20
20
  if line2 =~ /^[0-9]{5}\s/
21
- new_address_line = "#{line2} // #{line1}" # swap - let line w/ 5-digit zip code go first
21
+ new_address = "#{line2} // #{line1}" # swap - let line w/ 5-digit zip code go first
22
22
  end
23
23
  end
24
24
 
25
- new_address_line
25
+ new_address
26
26
  end
27
-
27
+
28
28
 
29
29
  # todo/fix: add _in_adr or _in_addr to name - why? why not?
30
+ # -- make country_key optional - why? why not?
31
+ # n move to second pos; use opts={} why? why not?
30
32
 
31
- def find_city_for_country( country_key, address )
33
+ def find_city_in_addr( address, country_key )
32
34
 
33
35
  return nil if address.blank? # do NOT process nil or empty address lines; sorry
34
36
 
@@ -0,0 +1,35 @@
1
+ # encoding: utf-8
2
+
3
+ module TextUtils
4
+ module TagHelper
5
+
6
+
7
+ def find_tags( value )
8
+ # logger.debug " found tags: >>#{value}<<"
9
+
10
+ tag_keys = value.split('|')
11
+
12
+ ## unify; replace _w/ space; remove leading n trailing whitespace
13
+ tag_keys = tag_keys.map do |key|
14
+ key = key.gsub( '_', ' ' )
15
+ key = key.strip
16
+ key
17
+ end
18
+
19
+ tag_keys # return tag keys as ary
20
+ end
21
+
22
+ def find_tags_in_attribs!( attribs )
23
+ # NB: will remove :tags from attribs hash
24
+
25
+ if attribs[:tags].present?
26
+ tag_keys = find_tags( attribs[:tags] )
27
+ attribs.delete(:tags)
28
+ tag_keys # return tag keys as ary of strings
29
+ else
30
+ [] # nothing found; return empty ary
31
+ end
32
+ end
33
+
34
+ end # module TagHelper
35
+ end # module TextUtils
@@ -5,16 +5,79 @@ module TextUtils
5
5
  module ValueHelper
6
6
 
7
7
 
8
+ def match_country( value )
9
+ if value =~ /^country:/ ## country:
10
+ country_key = value[8..-1] ## cut off country: prefix
11
+ country = Country.find_by_key!( country_key )
12
+ yield( country )
13
+ true # bingo - match found
14
+ else
15
+ false # no match found
16
+ end
17
+ end
18
+
19
+
8
20
  def is_region?( value )
9
21
  # assume region code e.g. TX or N
10
22
  value =~ /^[A-Z]{1,2}$/
11
23
  end
12
24
 
25
+ ## fix/todo: use match_region_for_country! w/ !!! why? why not?
26
+ def match_region_for_country( value, country_id ) ## NB: required country_id
27
+ if value =~ /^region:/ ## region:
28
+ region_key = value[7..-1] ## cut off region: prefix
29
+ region = Region.find_by_key_and_country_id!( region_key, country_id )
30
+ yield( region )
31
+ true # bingo - match found
32
+ elsif is_region?( value ) ## assume region code e.g. TX or N
33
+ region = Region.find_by_key_and_country_id!( value.downcase, country_id )
34
+ yield( region )
35
+ true # bingo - match found
36
+ else
37
+ false # no match found
38
+ end
39
+ end
40
+
41
+
42
+ def match_city( value )
43
+ if value =~ /^city:/ ## city:
44
+ city_key = value[5..-1] ## cut off city: prefix
45
+ city = City.find_by_key( city_key )
46
+ yield( city ) # NB: might be nil (city not found)
47
+ true # bingo - match found
48
+ else
49
+ false # no match found
50
+ end
51
+ end
52
+
53
+
54
+ def match_brewery( value )
55
+ if value =~ /^by:/ ## by: -brewed by/brewery
56
+ brewery_key = value[3..-1] ## cut off by: prefix
57
+ brewery = Brewery.find_by_key!( brewery_key )
58
+ yield( brewery )
59
+ true # bingo - match found
60
+ else
61
+ false # no match found
62
+ end
63
+ end
64
+
65
+
13
66
  def is_year?( value )
14
67
  # founded/established year e.g. 1776
15
68
  value =~ /^[0-9]{4}$/
16
69
  end
17
70
 
71
+ def match_year( value )
72
+ if is_year?( value ) # founded/established year e.g. 1776
73
+ yield( value.to_i )
74
+ true # bingo - match found
75
+ else
76
+ false # no match found
77
+ end
78
+ end
79
+
80
+
18
81
  def is_website?( value )
19
82
  # check for url/internet address e.g. www.ottakringer.at
20
83
  # - must start w/ www. or
@@ -24,6 +87,18 @@ module TextUtils
24
87
  value =~ /^www\.|\.com$/
25
88
  end
26
89
 
90
+ def match_website( value )
91
+ if is_website?( value ) # check for url/internet address e.g. www.ottakringer.at
92
+ # fix: support more url format (e.g. w/o www. - look for .com .country code etc.)
93
+ yield( value )
94
+ true # bingo - match found
95
+ else
96
+ false # no match found
97
+ end
98
+ end
99
+
100
+
101
+
27
102
  def is_address?( value )
28
103
  # if value includes // assume address e.g. 3970 Weitra // Sparkasseplatz 160
29
104
  value =~ /\/{2}/
@@ -34,10 +109,10 @@ module TextUtils
34
109
  end
35
110
 
36
111
 
37
- def find_grade( text ) # NB: returns ary [grade,text] / two values
112
+ def find_grade( value ) # NB: returns ary [grade,value] / two values
38
113
  grade = 4 # defaults to grade 4 e.g *** => 1, ** => 2, * => 3, -/- => 4
39
114
 
40
- text = text.sub( /\s+(\*{1,3})\s*$/ ) do |_| # NB: stars must end field/value
115
+ value = value.sub( /\s+(\*{1,3})\s*$/ ) do |_| # NB: stars must end field/value
41
116
  if $1 == '***'
42
117
  grade = 1
43
118
  elsif $1 == '**'
@@ -50,10 +125,66 @@ module TextUtils
50
125
  '' # remove * from title if found
51
126
  end
52
127
 
53
- [grade,text]
128
+ [grade,value]
54
129
  end
55
130
 
56
131
 
132
+ def find_key_n_title( values ) # NB: returns ary [attribs,more_values] / two values
133
+
134
+ ## fix: add/configure logger for ActiveRecord!!!
135
+ logger = LogKernel::Logger.root
136
+
137
+ ### todo/fix: allow check - do NOT allow mixed use of with key and w/o key
138
+ ## either use keys or do NOT use keys; do NOT mix in a single fixture file
139
+
140
+ ### support autogenerate key from first title value
141
+
142
+ # if it looks like a key (only a-z lower case allowed); assume it's a key
143
+ # - also allow . in keys e.g. world.quali.america, at.cup, etc.
144
+ # - also allow 0-9 in keys e.g. at.2, at.3.1, etc.
145
+
146
+ # fix/todo: add support for leading underscore _
147
+ # or allow keys starting w/ digits?
148
+ if values[0] =~ /^([a-z][a-z0-9.]*[a-z0-9]|[a-z])$/ # NB: key must start w/ a-z letter (NB: minimum one letter possible)
149
+ key_col = values[0]
150
+ title_col = values[1]
151
+ more_values = values[2..-1]
152
+ else
153
+ key_col = '<auto>'
154
+ title_col = values[0]
155
+ more_values = values[1..-1]
156
+ end
157
+
158
+ attribs = {}
159
+
160
+ ## check title_col for grade (e.g. ***/**/*) and use returned stripped title_col if exits
161
+ grade, title_col = find_grade( title_col )
162
+
163
+ # NB: for now - do NOT include default grade e.g. if grade (***/**/*) not present; attrib will not be present too
164
+ if grade == 1 || grade == 2 || grade == 3 # grade found/present
165
+ logger.debug " found grade #{grade} in title"
166
+ attribs[:grade] = grade
167
+ end
168
+
169
+ ## title (split of optional synonyms)
170
+ # e.g. FC Bayern Muenchen|Bayern Muenchen|Bayern
171
+ titles = title_col.split('|')
172
+
173
+ attribs[ :title ] = titles[0]
174
+
175
+ ## add optional synonyms if present
176
+ attribs[ :synonyms ] = titles[1..-1].join('|') if titles.size > 1
177
+
178
+ if key_col == '<auto>'
179
+ ## autogenerate key from first title
180
+ key_col = TextUtils.title_to_key( titles[0] )
181
+ logger.debug " autogen key »#{key_col}« from title »#{titles[0]}«"
182
+ end
183
+
184
+ attribs[ :key ] = key_col
185
+
186
+ [attribs, more_values]
187
+ end
57
188
 
58
189
  end # module ValueHelper
59
190
  end # module TextUtils
@@ -3,6 +3,38 @@
3
3
 
4
4
  # fix: move into TextUtils namespace/module!!
5
5
 
6
+
7
+ ## todo/fix: find a better name than HashReaderV2 (HashReaderPlus?) ??
8
+
9
+ class HashReaderV2
10
+ include LogUtils::Logging
11
+
12
+ def initialize( name, include_path )
13
+ @name = name
14
+ @include_path = include_path
15
+ end
16
+
17
+ attr_reader :name
18
+ attr_reader :include_path
19
+
20
+ def each
21
+ path = "#{include_path}/#{name}.yml"
22
+ reader = HashReader.new( path )
23
+
24
+ logger.info "parsing data '#{name}' (#{path})..."
25
+
26
+ reader.each do |key, value|
27
+ yield( key, value )
28
+ end
29
+
30
+ ## fix: move Prop table to props gem - why? why not??
31
+ WorldDb::Models::Prop.create_from_fixture!( name, path )
32
+ end
33
+
34
+ end # class HashReaderV2
35
+
36
+
37
+
6
38
  class HashReader
7
39
 
8
40
  include LogUtils::Logging
@@ -2,17 +2,50 @@
2
2
 
3
3
  # fix: move into TextUtils namespace/module!!
4
4
 
5
+ ## todo/fix: find a better name than HashReaderV2 (HashReaderPlus?) ??
6
+
7
+ class ValuesReaderV2
8
+ include LogUtils::Logging
9
+
10
+ def initialize( name, include_path, more_attribs={} )
11
+ @name = name
12
+ @include_path = include_path
13
+ @more_attribs = more_attribs
14
+ end
15
+
16
+ attr_reader :name
17
+ attr_reader :include_path
18
+ attr_reader :more_attribs
19
+
20
+ def each_line
21
+ path = "#{include_path}/#{name}.txt"
22
+ reader = ValuesReader.new( path, more_attribs )
23
+
24
+ logger.info "parsing data '#{name}' (#{path})..."
25
+
26
+ reader.each_line do |attribs, values|
27
+ yield( attribs, values )
28
+ end
29
+
30
+ ## fix: move Prop table to props gem - why? why not??
31
+ WorldDb::Models::Prop.create_from_fixture!( name, path )
32
+ end
33
+
34
+ end # class ValuesReaderV2
35
+
36
+
37
+
5
38
  class ValuesReader
6
39
 
7
40
  include LogUtils::Logging
8
41
 
9
- include TextUtils::ValueHelper # e.g. includes find_grade()
42
+ include TextUtils::ValueHelper # e.g. includes find_grade, find_key_n_title
10
43
 
11
44
 
12
- def initialize( path, more_values={} )
45
+ def initialize( path, more_attribs={} )
13
46
  @path = path
14
47
 
15
- @more_values = more_values
48
+ @more_attribs = more_attribs
16
49
 
17
50
  @data = File.read_utf8( @path )
18
51
  end
@@ -29,8 +62,8 @@ class ValuesReader
29
62
  def each_line # support multi line records
30
63
 
31
64
  inside_line = false # todo: find a better name? e.g. line_found?
32
- attribs = {} # rename to new_attributes?
33
- more_cols = [] # rename to more_values?
65
+ attribs = {}
66
+ more_values = []
34
67
 
35
68
 
36
69
  @data.each_line do |line|
@@ -73,15 +106,15 @@ class ValuesReader
73
106
  # allows you to use any chars
74
107
  logger.debug " multi-line record - add key-value »#{line}«"
75
108
 
76
- more_cols.unshift( line.dup ) # add value upfront to array (first value); lets us keep (optional) tags as last entry; fix!! see valuereaderEx v2
109
+ more_values.unshift( line.dup ) # add value upfront to array (first value); lets us keep (optional) tags as last entry; fix!! see valuereaderEx v2
77
110
  next
78
111
  else
79
112
  # NB: new record clears/ends multi-line record
80
-
113
+
81
114
  if inside_line # check if we already processed a line? if yes; yield last line
82
- yield( attribs, more_cols )
83
- attribs = {}
84
- more_cols = []
115
+ yield( attribs, more_values )
116
+ attribs = {}
117
+ more_values = []
85
118
  end
86
119
  inside_line = true
87
120
  end
@@ -91,14 +124,14 @@ class ValuesReader
91
124
  line = line.gsub( '\,', '♣' ) # use black club suit/=shamrock char for escaped separator
92
125
 
93
126
  ## use generic separator (allow us to configure separator)
94
- line = line.gsub( ',', '') # use black diamond suit for separator
127
+ line = line.gsub( ',', '')
95
128
 
96
129
  ## restore escaped commas (before split)
97
130
  line = line.gsub( '♣', ',' )
98
131
 
99
132
  logger.debug "line: »#{line}«"
100
133
 
101
- values = line.split( '' )
134
+ values = line.split( '' )
102
135
 
103
136
  # pass 1) remove leading and trailing whitespace for values
104
137
 
@@ -115,67 +148,18 @@ class ValuesReader
115
148
  true
116
149
  end
117
150
  end
118
-
119
- logger.debug " values: »#{values.join('« »')}«"
120
-
121
-
122
- ### todo/fix: allow check - do NOT allow mixed use of with key and w/o key
123
- ## either use keys or do NOT use keys; do NOT mix in a single fixture file
124
-
125
-
126
- ### support autogenerate key from first title value
127
-
128
- # if it looks like a key (only a-z lower case allowed); assume it's a key
129
- # - also allow . in keys e.g. world.quali.america, at.cup, etc.
130
- # - also allow 0-9 in keys e.g. at.2, at.3.1, etc.
131
-
132
- # fix/todo: add support for leading underscore _
133
- # or allow keys starting w/ digits?
134
- if values[0] =~ /^([a-z][a-z0-9.]*[a-z0-9]|[a-z])$/ # NB: key must start w/ a-z letter (NB: minimum one letter possible)
135
- key_col = values[0]
136
- title_col = values[1]
137
- more_cols = values[2..-1]
138
- else
139
- key_col = '<auto>'
140
- title_col = values[0]
141
- more_cols = values[1..-1]
142
- end
143
151
 
144
- attribs = {}
152
+ logger.debug " values: »#{values.join('« »')}«"
145
153
 
146
- ## check title_col for grade (e.g. ***/**/*) and use returned stripped title_col if exits
147
- grade, title_col = find_grade( title_col )
154
+ attribs, more_values = find_key_n_title( values )
148
155
 
149
- # NB: for now - do NOT include default grade e.g. if grade (***/**/*) not present; attrib will not be present too
150
- if grade == 1 || grade == 2 || grade == 3 # grade found/present
151
- logger.debug " found grade #{grade} in title"
152
- attribs[:grade] = grade
153
- end
156
+ attribs = attribs.merge( @more_attribs ) # e.g. merge country_id and other defaults if present
154
157
 
155
- ## title (split of optional synonyms)
156
- # e.g. FC Bayern Muenchen|Bayern Muenchen|Bayern
157
- titles = title_col.split('|')
158
-
159
- attribs[ :title ] = titles[0]
160
-
161
- ## add optional synonyms if present
162
- attribs[ :synonyms ] = titles[1..-1].join('|') if titles.size > 1
163
-
164
- if key_col == '<auto>'
165
- ## autogenerate key from first title
166
- key_col = TextUtils.title_to_key( titles[0] )
167
- logger.debug " autogen key »#{key_col}« from title »#{titles[0]}«, textutils version #{TextUtils::VERSION}"
168
- end
169
-
170
- attribs[ :key ] = key_col
171
-
172
- attribs = attribs.merge( @more_values ) # e.g. merge country_id and other defaults if present
173
-
174
158
  end # each lines
175
159
 
176
160
  # do NOT forget to yield last line (if present/processed)
177
161
  if inside_line
178
- yield( attribs, more_cols )
162
+ yield( attribs, more_values )
179
163
  end
180
164
 
181
165
 
@@ -1,6 +1,6 @@
1
1
 
2
2
  module TextUtils
3
3
 
4
- VERSION = '0.5.13'
4
+ VERSION = '0.6.0'
5
5
 
6
6
  end # module TextUtils
data/lib/textutils.rb CHANGED
@@ -22,6 +22,7 @@ require 'textutils/filter/erb_django_filter'
22
22
  require 'textutils/filter/erb_filter'
23
23
 
24
24
  require 'textutils/helper/unicode_helper'
25
+ require 'textutils/helper/tag_helper'
25
26
  require 'textutils/helper/title_helper'
26
27
  require 'textutils/helper/address_helper'
27
28
  require 'textutils/helper/value_helper'
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: textutils
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.13
4
+ version: 0.6.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-05-14 00:00:00.000000000 Z
12
+ date: 2013-05-20 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: logutils
16
- requirement: &83466980 !ruby/object:Gem::Requirement
16
+ requirement: &84354180 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ~>
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '0.5'
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *83466980
24
+ version_requirements: *84354180
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: rdoc
27
- requirement: &83466720 !ruby/object:Gem::Requirement
27
+ requirement: &84353960 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ~>
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: '3.10'
33
33
  type: :development
34
34
  prerelease: false
35
- version_requirements: *83466720
35
+ version_requirements: *84353960
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: hoe
38
- requirement: &83466490 !ruby/object:Gem::Requirement
38
+ requirement: &84353740 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ~>
@@ -43,7 +43,7 @@ dependencies:
43
43
  version: '3.3'
44
44
  type: :development
45
45
  prerelease: false
46
- version_requirements: *83466490
46
+ version_requirements: *84353740
47
47
  description: textutils - Text Filters, Helpers, Readers and More
48
48
  email: webslideshow@googlegroups.com
49
49
  executables: []
@@ -61,6 +61,7 @@ files:
61
61
  - lib/textutils/filter/erb_django_filter.rb
62
62
  - lib/textutils/filter/erb_filter.rb
63
63
  - lib/textutils/helper/address_helper.rb
64
+ - lib/textutils/helper/tag_helper.rb
64
65
  - lib/textutils/helper/title_helper.rb
65
66
  - lib/textutils/helper/unicode_helper.rb
66
67
  - lib/textutils/helper/value_helper.rb