textutils 0.7.1 → 0.8.0

Sign up to get free protection for your applications and to get access to all the features.
File without changes
data/Manifest.txt CHANGED
@@ -1,6 +1,6 @@
1
- History.markdown
1
+ History.md
2
2
  Manifest.txt
3
- README.markdown
3
+ README.md
4
4
  Rakefile
5
5
  lib/textutils.rb
6
6
  lib/textutils/classifier.rb
@@ -29,3 +29,4 @@ test/helper.rb
29
29
  test/test_hypertext_helper.rb
30
30
  test/test_title_helper.rb
31
31
  test/test_unicode_helper.rb
32
+ test/test_values_reader.rb
@@ -1,4 +1,6 @@
1
- # `textutils` - Text Filters, Helpers, Readers and More in Ruby
1
+ # `textutils`
2
+
3
+ Text Filters, Helpers, Readers and More in Ruby
2
4
 
3
5
  * home :: [github.com/rubylibs/textutils](https://github.com/rubylibs/textutils)
4
6
  * bugs :: [github.com/rubylibs/textutils/issues](https://github.com/rubylibs/textutils/issues)
@@ -7,6 +9,7 @@
7
9
  * forum :: [ruby-talk@ruby-lang.org](www.ruby-lang.org/en/community/mailing-lists/)
8
10
 
9
11
 
12
+
10
13
  ## Filters
11
14
 
12
15
  ### `comments_percent_style` Filter
data/Rakefile CHANGED
@@ -14,8 +14,8 @@ Hoe.spec 'textutils' do
14
14
  self.email = 'ruby-talk@ruby-lang.org'
15
15
 
16
16
  # switch extension to .markdown for gihub formatting
17
- self.readme_file = 'README.markdown'
18
- self.history_file = 'History.markdown'
17
+ self.readme_file = 'README.md'
18
+ self.history_file = 'History.md'
19
19
 
20
20
  self.extra_deps = [
21
21
  ['logutils', '~> 0.5'] # e.g. >= 0.5 <= 1.0
@@ -10,13 +10,20 @@ module TextUtils
10
10
  # lets us use "classic" web helpers a la rails
11
11
  # find a good name for sub module - Reader? Fixtures? Values? Parser?
12
12
 
13
+ def strip_part_markers( title ) # use different name e.g. strip_name_markers/strip_name_enclosure etc.??
14
+ # remove optional part markers
15
+ # e.g. Bock ‹Damm› becomes => Bock Damm
16
+ # ‹Estrella› ‹Damm› Inedit becomes => Estrella Damm Inedit
13
17
 
18
+ # todo: also allow reguluar <> for easy typing/input ??? why? why not? used for anything else already?
19
+ title.gsub( /[‹›]/, '' )
20
+ end
14
21
 
15
22
  def strip_translations( title )
16
23
  # remove optional english translation in square brackets ([])
17
24
  # e.g. Wien [Vienna] => Wien
18
25
 
19
- title.gsub( /\[.+\]/, '' )
26
+ title.gsub( /\[[^\]]+\]/, '' )
20
27
  end
21
28
 
22
29
  def strip_subtitles( title )
@@ -24,7 +31,7 @@ module TextUtils
24
31
  # e.g. Las Palmas (de Gran Canaria) => Las Palmas
25
32
  # Palma (de Mallorca) => Palma
26
33
 
27
- title.gsub( /\(.+\)/, '' )
34
+ title.gsub( /\([^\)]+\)/, '' )
28
35
  end
29
36
 
30
37
  def strip_tags( title ) # todo: use an alias or rename for better name ??
@@ -34,7 +41,7 @@ module TextUtils
34
41
  #
35
42
  # todo: use for autotags? e.g. {Bio} => bio
36
43
 
37
- title.gsub( /\{.+\}/, '' )
44
+ title.gsub( /\{[^\}]+\}/, '' )
38
45
  end
39
46
 
40
47
  def strip_whitespaces( title )
@@ -54,6 +61,8 @@ module TextUtils
54
61
  ## NB: downcase does NOT work for accented chars (thus, include in alternatives)
55
62
  key = title.downcase
56
63
 
64
+ key = strip_part_markers( key ) # e.g. ‹Estrella› ‹Damm› Inedit becomes => Estrella Damm Inedit
65
+
57
66
  key = strip_translations( key )
58
67
 
59
68
  key = strip_subtitles( key )
@@ -12,7 +12,7 @@ module TextUtils
12
12
  def match_country( value )
13
13
  if value =~ /^country:/ # country:
14
14
  country_key = value[8..-1] # cut off country: prefix
15
- country = WorldDb::Models::Country.find_by_key!( country_key )
15
+ country = WorldDb::Model::Country.find_by_key!( country_key )
16
16
  yield( country )
17
17
  true # bingo - match found
18
18
  else
@@ -23,7 +23,7 @@ module TextUtils
23
23
  def match_supra( value )
24
24
  if value =~ /^supra:/ # supra:
25
25
  country_key = value[6..-1] # cut off supra: prefix
26
- country = WorldDb::Models::Country.find_by_key!( country_key )
26
+ country = WorldDb::Model::Country.find_by_key!( country_key )
27
27
  yield( country )
28
28
  true # bingo - match found
29
29
  else
@@ -52,11 +52,11 @@ module TextUtils
52
52
  def match_region_for_country( value, country_id ) ## NB: required country_id
53
53
  if value =~ /^region:/ ## region:
54
54
  region_key = value[7..-1] ## cut off region: prefix
55
- region = WorldDb::Models::Region.find_by_key_and_country_id!( region_key, country_id )
55
+ region = WorldDb::Model::Region.find_by_key_and_country_id!( region_key, country_id )
56
56
  yield( region )
57
57
  true # bingo - match found
58
58
  elsif is_region?( value ) ## assume region code e.g. TX or N
59
- region = WorldDb::Models::Region.find_by_key_and_country_id!( value.downcase, country_id )
59
+ region = WorldDb::Model::Region.find_by_key_and_country_id!( value.downcase, country_id )
60
60
  yield( region )
61
61
  true # bingo - match found
62
62
  else
@@ -68,7 +68,7 @@ module TextUtils
68
68
  def match_city( value ) # NB: might be nil (city not found)
69
69
  if value =~ /^city:/ ## city:
70
70
  city_key = value[5..-1] ## cut off city: prefix
71
- city = WorldDb::Models::City.find_by_key( city_key )
71
+ city = WorldDb::Model::City.find_by_key( city_key )
72
72
  yield( city ) # NB: might be nil (city not found)
73
73
  true # bingo - match found
74
74
  else
@@ -80,7 +80,7 @@ module TextUtils
80
80
  def match_metro( value )
81
81
  if value =~ /^metro:/ ## metro:
82
82
  city_key = value[6..-1] ## cut off metro: prefix
83
- city = WorldDb::Models::City.find_by_key!( city_key ) # NB: parent city/metro required, that is, lookup w/ !
83
+ city = WorldDb::Model::City.find_by_key!( city_key ) # NB: parent city/metro required, that is, lookup w/ !
84
84
  yield( city )
85
85
  true # bingo - match found
86
86
  else
@@ -113,7 +113,7 @@ module TextUtils
113
113
  def match_brewery( value )
114
114
  if value =~ /^by:/ ## by: -brewed by/brewery
115
115
  brewery_key = value[3..-1] ## cut off by: prefix
116
- brewery = BeerDb::Models::Brewery.find_by_key!( brewery_key )
116
+ brewery = BeerDb::Model::Brewery.find_by_key!( brewery_key )
117
117
  yield( brewery )
118
118
  true # bingo - match found
119
119
  else
@@ -288,6 +288,10 @@ module TextUtils
288
288
  attribs[:grade] = grade
289
289
  end
290
290
 
291
+ ## fix/todo: add find parts ??
292
+ # e.g. ‹Estrella› ‹Damm› Inedit
293
+ # becomes => title: 'Estrella Damm Inedit' and parts: ['Estrella','Damm']
294
+
291
295
  ## title (split of optional synonyms)
292
296
  # e.g. FC Bayern Muenchen|Bayern Muenchen|Bayern
293
297
  titles = title_col.split('|')
@@ -58,11 +58,20 @@ class ValuesReader
58
58
 
59
59
 
60
60
  def initialize( path, more_attribs={} )
61
- @path = path
62
-
63
61
  @more_attribs = more_attribs
64
62
 
65
- @data = File.read_utf8( @path )
63
+ ### workaround/hack
64
+ # if path includes newline assume it's a string buffer not a file name
65
+ # fix: use from_file an from_string etc. for ctor
66
+ # check what is the best convention (follow ???)
67
+
68
+ if path =~ /\n/m
69
+ @path = 'stringio' # what name to use ???
70
+ @data = path.dup # make a duplicate ?? why? why not?
71
+ else
72
+ @path = path
73
+ @data = File.read_utf8( @path )
74
+ end
66
75
  end
67
76
 
68
77
 
@@ -76,13 +85,12 @@ class ValuesReader
76
85
 
77
86
  def each_line # support multi line records
78
87
 
79
- inside_line = false # todo: find a better name? e.g. line_found?
80
- attribs = {}
81
- more_values = []
82
-
88
+ inside_record = false
89
+ blank_counter = 0 # count of number of blank lines (note: 1+ blank lines clear multi-line record)
90
+ values = []
83
91
 
84
92
  @data.each_line do |line|
85
-
93
+
86
94
  ## allow alternative comment lines
87
95
  ## e.g. -- comment or
88
96
  ## % comment
@@ -91,7 +99,10 @@ class ValuesReader
91
99
  ## NB: for now alternative comment lines not allowed as end of line style e.g
92
100
  ## some data, more data -- comment here
93
101
 
94
- if line =~ /^\s*#/ || line =~ /^\s*--/ || line =~ /^\s*%/
102
+ if line =~ /^\s*#/ ||
103
+ line =~ /^\s*--/ ||
104
+ line =~ /^\s*%/ ||
105
+ line =~ /^\s*__/
95
106
  # skip komments and do NOT copy to result (keep comments secret!)
96
107
  logger.debug 'skipping comment line'
97
108
  next
@@ -99,7 +110,8 @@ class ValuesReader
99
110
 
100
111
  if line =~ /^\s*$/
101
112
  # kommentar oder leerzeile überspringen
102
- logger.debug 'skipping blank line'
113
+ blank_counter += 1
114
+ logger.debug "skipping blank line (#{blank_counter})"
103
115
  next
104
116
  end
105
117
 
@@ -114,71 +126,109 @@ class ValuesReader
114
126
  line = line.strip
115
127
 
116
128
 
129
+ if line =~ /^-\s/ # check for group headers e.g. - St. James Brewery
130
+ logger.info " skip group header #{line} for now (fix/add soon)"
131
+ next
132
+ elsif line =~ /^\[([a-z][a-z]+)\]/
117
133
  ### check for multiline record
118
- ## must start with key and colon e.g. brands:
119
- if line =~ /^[a-z][a-z0-9.]*[a-z0-9]:/
120
- # NB: every additional line is one value e.g. city:wien, etc.
121
- # allows you to use any chars
122
- logger.debug " multi-line record - add key-value »#{line}«"
123
-
124
- more_values.unshift( line.dup ) # add value upfront to array (first value); lets us keep (optional) tags as last entry; fix!! see valuereaderEx v2
125
- next
126
- else
127
- # NB: new record clears/ends multi-line record
128
-
129
- if inside_line # check if we already processed a line? if yes; yield last line
134
+ ## must start with key e.g. [guiness]
135
+ ## for now only supports key with letter a-z (no digits/numbers or underscore or dots)
136
+
137
+ if values.length > 0 # check if we already processed a record? if yes; yield last record (before reset)
138
+ attribs, more_values = find_key_n_title( values )
139
+ attribs = attribs.merge( @more_attribs ) # e.g. merge country_id and other defaults if present
130
140
  yield( attribs, more_values )
131
- attribs = {}
132
- more_values = []
141
+ values = []
142
+ end
143
+
144
+ inside_record = true
145
+ blank_counter = 0
146
+
147
+ # NB: every additional line is one value e.g. city:wien, etc.
148
+ # allows you to use any chars
149
+ logger.debug " multi-line record w/ key »#{$1}«"
150
+
151
+ values = [$1.dup] # add key as first value in ary
152
+ elsif inside_record && blank_counter == 0 && line =~ /\/{2}/ # check address line (must contain //)
153
+ values += [line.dup] # assume single value column (no need to escape commas)
154
+ elsif inside_record && blank_counter == 0 && line =~ /^[a-z][a-z0-9.]*[a-z0-9]:/ # check key: value pair
155
+ values += [line.dup] # assume single value column (no need to escape commas)
156
+ else
157
+ if inside_record && blank_counter == 0 # continue adding more values
158
+ values += find_values( line )
159
+ else # assume single-line (stand-alone / classic csv) record
160
+ if values.length > 0
161
+ attribs, more_values = find_key_n_title( values )
162
+ attribs = attribs.merge( @more_attribs ) # e.g. merge country_id and other defaults if present
163
+ yield( attribs, more_values )
164
+ values = []
165
+ end
166
+ inside_record = false
167
+ blank_counter = 0
168
+ values = find_values( line )
133
169
  end
134
- inside_line = true
135
170
  end
136
171
 
172
+ end # each lines
137
173
 
138
- ### guard escaped commas (e.g. \,)
139
- line = line.gsub( '\,', '♣' ) # use black club suit/=shamrock char for escaped separator
140
-
141
- ## use generic separator (allow us to configure separator)
142
- line = line.gsub( ',', '›')
143
-
144
- ## restore escaped commas (before split)
145
- line = line.gsub( '♣', ',' )
174
+ # do NOT forget to yield last line (if present/processed)
175
+ if values.length > 0
176
+ attribs, more_values = find_key_n_title( values )
177
+ attribs = attribs.merge( @more_attribs ) # e.g. merge country_id and other defaults if present
178
+ yield( attribs, more_values )
179
+ end
146
180
 
147
- logger.debug "line: »#{line}«"
181
+ end # method each_line
148
182
 
149
- values = line.split( '›' )
150
-
151
- # pass 1) remove leading and trailing whitespace for values
152
183
 
153
- values = values.map { |value| value.strip }
184
+ ### todo:
185
+ ## move to helper for reuse a la find_key_n_title ???
186
+ ## use different/better name ?? e.g. find_values_in_line or split_line_into_values ??
187
+ def find_values( line )
188
+ ## note returns an array of values (strings)
154
189
 
155
- ##### todo remove support of comment column? (NB: must NOT include commas)
156
- # pass 2) remove comment columns
157
-
158
- values = values.select do |value|
159
- if value =~ /^#/ ## start with # treat it as a comment column; e.g. remove it
160
- logger.debug " removing column with value »#{value}«"
161
- false
162
- else
163
- true
164
- end
165
- end
190
+ meta_comma = '«KOMMA»'
191
+ meta_separator = »'
166
192
 
167
- logger.debug " values: »#{values.join('« »')}«"
193
+ # guard escaped commas
194
+ # e.g. convert \, to «KOMMA»
195
+ line = line.gsub( '\,', meta_comma )
168
196
 
169
- attribs, more_values = find_key_n_title( values )
197
+ # note: use generic separator (allow us to configure separator)
198
+ # e.g « »
199
+ line = line.gsub( ',', meta_separator )
170
200
 
171
- attribs = attribs.merge( @more_attribs ) # e.g. merge country_id and other defaults if present
201
+ # restore escaped commas (before split)
202
+ line = line.gsub( meta_comma, ',' )
172
203
 
173
- end # each lines
204
+ logger.debug "line: |»#{line}«|"
174
205
 
175
- # do NOT forget to yield last line (if present/processed)
176
- if inside_line
177
- yield( attribs, more_values )
178
- end
206
+ values = line.split( meta_separator )
179
207
 
208
+ # pass 1) remove leading and trailing whitespace for values
180
209
 
181
- end # method each_line
210
+ values = values.map { |value| value.strip }
211
+
212
+
213
+ ##### todo/fix:
214
+ # !!!REMOVE!!!
215
+ # remove support of comment column? (NB: must NOT include commas)
216
+ # pass 2) remove comment columns
217
+ #
218
+ # todo/fix: check if still possible ?? - add an example here how it looks like/works
219
+
220
+ values = values.select do |value|
221
+ if value =~ /^#/ ## start with # treat it as a comment column; e.g. remove it
222
+ logger.info " removing column with value »#{value}«"
223
+ false
224
+ else
225
+ true
226
+ end
227
+ end
228
+
229
+ logger.debug " values: |»#{values.join('« »')}«|"
230
+ values
231
+ end
182
232
 
183
233
 
184
234
  end # class ValuesReader
@@ -1,6 +1,7 @@
1
1
 
2
2
  module TextUtils
3
3
 
4
- VERSION = '0.7.1'
4
+ VERSION = '0.8.0'
5
5
 
6
6
  end # module TextUtils
7
+
@@ -8,6 +8,7 @@
8
8
 
9
9
  require 'helper'
10
10
 
11
+
11
12
  class TestTitleHelper < MiniTest::Unit::TestCase
12
13
 
13
14
  def test_title_to_key
@@ -15,7 +16,9 @@ class TestTitleHelper < MiniTest::Unit::TestCase
15
16
  txt_io = [
16
17
  [ 'São Paulo', 'saopaulo' ],
17
18
  [ 'São Gonçalo', 'saogoncalo' ],
18
- [ 'Výčepní', 'vycepni' ]
19
+ [ 'Výčepní', 'vycepni' ],
20
+ [ 'Bock ‹Damm›', 'bockdamm' ],
21
+ [ '‹Estrella› ‹Damm› Inedit', 'estrelladamminedit' ]
19
22
  ]
20
23
 
21
24
  txt_io.each do |txt|
@@ -0,0 +1,311 @@
1
+ # encoding: utf-8
2
+
3
+ ###
4
+ # to run use
5
+ # ruby -I ./lib -I ./test test/test_helper.rb
6
+ # or better
7
+ # rake test
8
+
9
+ require 'helper'
10
+
11
+ class TestValuesReader < MiniTest::Unit::TestCase
12
+
13
+ def test_escape_comma
14
+ # note: double espace comma e.g. \\, becomes literal \,
15
+ txt =<<EOS
16
+ [fuller]
17
+ Fuller\\, Smith & Turner, 1845
18
+ The Griffin Brewery // Chiswick Lane South // London, W4 2QB
19
+ brands: Fuller's
20
+
21
+ fuller, Fuller\\, Smith & Turner, 1845, The Griffin Brewery // Chiswick Lane South // London\\, W4 2QB
22
+ EOS
23
+
24
+ pp txt
25
+
26
+ reader = ValuesReader.new( txt )
27
+
28
+ i = 0
29
+ reader.each_line do |attribs, values|
30
+ i += 1
31
+
32
+ puts "attribs:"
33
+ pp attribs
34
+ puts "values:"
35
+ pp values
36
+
37
+ if i == 1
38
+ assert_equal attribs[:key], 'fuller'
39
+ assert_equal attribs[:title], 'Fuller, Smith & Turner'
40
+ assert_equal attribs[:grade], nil
41
+ assert_equal attribs[:synonyms], nil
42
+
43
+ assert_equal values[0], '1845'
44
+ assert_equal values[1], 'The Griffin Brewery // Chiswick Lane South // London, W4 2QB'
45
+ assert_equal values[2], "brands: Fuller's"
46
+ elsif i == 2
47
+ assert_equal attribs[:key], 'fuller'
48
+ assert_equal attribs[:title], 'Fuller, Smith & Turner'
49
+ assert_equal attribs[:grade], nil
50
+ assert_equal attribs[:synonyms], nil
51
+
52
+ assert_equal values[0], '1845'
53
+ assert_equal values[1], 'The Griffin Brewery // Chiswick Lane South // London, W4 2QB'
54
+ else
55
+ assert_equal true, false # should not get here
56
+ end
57
+ end
58
+ end # test_escape_comma
59
+
60
+
61
+ def test_mixed
62
+ txt =<<EOS
63
+ ##########
64
+ # Wien Umbgebung
65
+
66
+ [schwechat]
67
+ Brauerei Schwechat (Brau Union) **, 1796
68
+ www.schwechater.at
69
+ 2320 Schwechat // Mautner Markhof-Straße 11
70
+ brands: Schwechater
71
+ brau_union # Part of Brau Union
72
+
73
+
74
+ #############
75
+ # Waldviertel
76
+
77
+ zwettler, Zwettler Brauerei|Privatbrauerei Zwettl **, 1709, www.zwettler.at, 3910 Zwettl // Syrnauer Straße 22-25
78
+ weitra, Weitra Bräu Bierwerkstatt|Brauerei Weitra *, 1321, www.bierwerkstatt.at, 3970 Weitra // Sparkasseplatz 160, zwettler # Part of Zwettler
79
+
80
+ #############
81
+ # Weinviertel
82
+
83
+ [hubertus]
84
+ Hubertus Bräu *, 1454
85
+ www.hubertus.at
86
+ 2136 Laa/Thaya // Hubertusgasse 1
87
+ brands: Hubertus
88
+
89
+ egger, Privatbrauerei Fritz Egger **, 1978, www.egger-bier.at, 3105 Unterradlberg // Tiroler Straße 18
90
+ EOS
91
+
92
+ reader = ValuesReader.new( txt )
93
+
94
+ i = 0
95
+ reader.each_line do |attribs, values|
96
+ i += 1
97
+
98
+ puts "attribs:"
99
+ pp attribs
100
+ puts "values:"
101
+ pp values
102
+
103
+ if i == 1
104
+ assert_equal attribs[:key], 'schwechat'
105
+ assert_equal attribs[:title], 'Brauerei Schwechat (Brau Union)'
106
+ assert_equal attribs[:grade], 2
107
+ assert_equal attribs[:synonyms], nil
108
+
109
+ assert_equal values[0], '1796'
110
+ assert_equal values[1], 'www.schwechater.at'
111
+ assert_equal values[2], '2320 Schwechat // Mautner Markhof-Straße 11'
112
+ assert_equal values[3], 'brands: Schwechater'
113
+ assert_equal values[-1], 'brau_union'
114
+ elsif i == 2
115
+ assert_equal attribs[:key], 'zwettler'
116
+ assert_equal attribs[:title], 'Zwettler Brauerei'
117
+ assert_equal attribs[:grade], 2
118
+ assert_equal attribs[:synonyms], 'Privatbrauerei Zwettl'
119
+
120
+ assert_equal values[0], '1709'
121
+ assert_equal values[1], 'www.zwettler.at'
122
+ assert_equal values[2], '3910 Zwettl // Syrnauer Straße 22-25'
123
+ elsif i == 3
124
+ assert_equal attribs[:key], 'weitra'
125
+ assert_equal attribs[:title], 'Weitra Bräu Bierwerkstatt'
126
+ assert_equal attribs[:grade], 3
127
+ assert_equal attribs[:synonyms], 'Brauerei Weitra'
128
+
129
+ assert_equal values[0], '1321'
130
+ assert_equal values[1], 'www.bierwerkstatt.at'
131
+ assert_equal values[2], '3970 Weitra // Sparkasseplatz 160'
132
+ assert_equal values[-1], 'zwettler'
133
+ elsif i == 4
134
+ assert_equal attribs[:key], 'hubertus'
135
+ assert_equal attribs[:title], 'Hubertus Bräu'
136
+ assert_equal attribs[:grade], 3
137
+ assert_equal attribs[:synonyms], nil
138
+
139
+ assert_equal values[0], '1454'
140
+ assert_equal values[1], 'www.hubertus.at'
141
+ assert_equal values[2], '2136 Laa/Thaya // Hubertusgasse 1'
142
+ assert_equal values[3], 'brands: Hubertus'
143
+ elsif i == 5
144
+ assert_equal attribs[:key], 'egger'
145
+ assert_equal attribs[:title], 'Privatbrauerei Fritz Egger'
146
+ assert_equal attribs[:grade], 2
147
+ assert_equal attribs[:synonyms], nil
148
+
149
+ assert_equal values[0], '1978'
150
+ assert_equal values[1], 'www.egger-bier.at'
151
+ assert_equal values[2], '3105 Unterradlberg // Tiroler Straße 18'
152
+ else
153
+ assert_equal true, false # should not get here
154
+ end
155
+ end
156
+ end # test_mixed
157
+
158
+
159
+ def test_multi_line_records
160
+ txt =<<EOS
161
+ ##########
162
+ # Wien Umbgebung
163
+
164
+ [schwechat]
165
+ Brauerei Schwechat (Brau Union) **, 1796
166
+ www.schwechater.at
167
+ 2320 Schwechat // Mautner Markhof-Straße 11
168
+ brands: Schwechater
169
+ brau_union # Part of Brau Union
170
+
171
+
172
+ #############
173
+ # Waldviertel
174
+
175
+ [zwettler]
176
+ Zwettler Brauerei|Privatbrauerei Zwettl **, 1709
177
+ www.zwettler.at
178
+ 3910 Zwettl // Syrnauer Straße 22-25
179
+ brands: Zwettler
180
+ EOS
181
+
182
+ reader = ValuesReader.new( txt )
183
+
184
+ i = 0
185
+ reader.each_line do |attribs, values|
186
+ i += 1
187
+
188
+ puts "attribs:"
189
+ pp attribs
190
+ puts "values:"
191
+ pp values
192
+
193
+ if i == 1
194
+ assert_equal attribs[:key], 'schwechat'
195
+ assert_equal attribs[:title], 'Brauerei Schwechat (Brau Union)'
196
+ assert_equal attribs[:grade], 2
197
+ assert_equal attribs[:synonyms], nil
198
+
199
+ assert_equal values[0], '1796'
200
+ assert_equal values[1], 'www.schwechater.at'
201
+ assert_equal values[2], '2320 Schwechat // Mautner Markhof-Straße 11'
202
+ assert_equal values[3], 'brands: Schwechater'
203
+ assert_equal values[-1], 'brau_union'
204
+ elsif i == 2
205
+ assert_equal attribs[:key], 'zwettler'
206
+ assert_equal attribs[:title], 'Zwettler Brauerei'
207
+ assert_equal attribs[:grade], 2
208
+ assert_equal attribs[:synonyms], 'Privatbrauerei Zwettl'
209
+
210
+ assert_equal values[0], '1709'
211
+ assert_equal values[1], 'www.zwettler.at'
212
+ assert_equal values[2], '3910 Zwettl // Syrnauer Straße 22-25'
213
+ assert_equal values[3], 'brands: Zwettler'
214
+ else
215
+ assert_equal true, false # should not get here
216
+ end
217
+ end
218
+ end # test_multi_line_records
219
+
220
+
221
+ def test_classic_csv_records
222
+
223
+ txt =<<EOS
224
+ arsenal, Arsenal|Arsenal FC|FC Arsenal, ARS, city:london
225
+ manunited, Manchester United|Man Utd|Manchester U., MUN, city:manchester
226
+ liverpool, Liverpool|Liverpool FC|FC Liverpool, LIV, city:liverpool
227
+ EOS
228
+
229
+ reader = ValuesReader.new( txt )
230
+
231
+ i = 0
232
+ reader.each_line do |attribs, values|
233
+ i += 1
234
+
235
+ puts "attribs:"
236
+ pp attribs
237
+ puts "values:"
238
+ pp values
239
+
240
+ if i == 1
241
+ assert_equal attribs[:key], 'arsenal'
242
+ assert_equal attribs[:title], 'Arsenal'
243
+ assert_equal attribs[:synonyms], 'Arsenal FC|FC Arsenal'
244
+
245
+ assert_equal values[0], 'ARS'
246
+ assert_equal values[1], 'city:london'
247
+ elsif i == 2
248
+ elsif i == 3
249
+ else
250
+ assert_equal true, false # should not get here
251
+ end
252
+ end
253
+ end # test_classic_csv_records
254
+
255
+
256
+ def test_autogen_keys
257
+ txt =<<EOS
258
+ Ottakringer Helles, 5.2 %, 11.8°
259
+ Ottakringer Gold Fassl Spezial, 5.6 %, 12.7°
260
+ Ottakringer (Gold Fassl) Pils, 4.6 %, 11.2°
261
+ Ottakringer (Gold Fassl) Pur {Bio}, 5.2 %, 11.8°, bio
262
+ EOS
263
+
264
+ reader = ValuesReader.new( txt )
265
+
266
+ i = 0
267
+ reader.each_line do |attribs, values|
268
+ i += 1
269
+
270
+ puts "attribs:"
271
+ pp attribs
272
+ puts "values:"
273
+ pp values
274
+
275
+ if i == 1
276
+ assert_equal attribs[:key], 'ottakringerhelles'
277
+ assert_equal attribs[:title], 'Ottakringer Helles'
278
+ assert_equal attribs[:synonyms], nil
279
+
280
+ assert_equal values[0], '5.2 %'
281
+ assert_equal values[1], '11.8°'
282
+ elsif i == 2
283
+ assert_equal attribs[:key], 'ottakringergoldfasslspezial'
284
+ assert_equal attribs[:title], 'Ottakringer Gold Fassl Spezial'
285
+ assert_equal attribs[:synonyms], nil
286
+
287
+ assert_equal values[0], '5.6 %'
288
+ assert_equal values[1], '12.7°'
289
+ elsif i == 3
290
+ assert_equal attribs[:key], 'ottakringerpils'
291
+ assert_equal attribs[:title], 'Ottakringer (Gold Fassl) Pils'
292
+ assert_equal attribs[:synonyms], nil
293
+
294
+ assert_equal values[0], '4.6 %'
295
+ assert_equal values[1], '11.2°'
296
+ elsif i == 4
297
+ assert_equal attribs[:key], 'ottakringerpur'
298
+ assert_equal attribs[:title], 'Ottakringer (Gold Fassl) Pur {Bio}'
299
+ assert_equal attribs[:synonyms], nil
300
+
301
+ assert_equal values[0], '5.2 %'
302
+ assert_equal values[1], '11.8°'
303
+ assert_equal values[-1], 'bio'
304
+ else
305
+ assert_equal true, false # should not get here
306
+ end
307
+ end
308
+ end # test_autogen_keys
309
+
310
+
311
+ end # class TestValuesReader
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: textutils
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.1
4
+ version: 0.8.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-12-25 00:00:00.000000000 Z
12
+ date: 2014-01-17 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: logutils
16
- requirement: &21849324 !ruby/object:Gem::Requirement
16
+ requirement: &3086976 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ~>
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '0.5'
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *21849324
24
+ version_requirements: *3086976
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: rdoc
27
- requirement: &21848640 !ruby/object:Gem::Requirement
27
+ requirement: &3085764 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ~>
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: '4.0'
33
33
  type: :development
34
34
  prerelease: false
35
- version_requirements: *21848640
35
+ version_requirements: *3085764
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: hoe
38
- requirement: &21848040 !ruby/object:Gem::Requirement
38
+ requirement: &3082920 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ~>
@@ -43,17 +43,19 @@ dependencies:
43
43
  version: '3.7'
44
44
  type: :development
45
45
  prerelease: false
46
- version_requirements: *21848040
46
+ version_requirements: *3082920
47
47
  description: textutils - Text Filters, Helpers, Readers and More
48
48
  email: ruby-talk@ruby-lang.org
49
49
  executables: []
50
50
  extensions: []
51
51
  extra_rdoc_files:
52
+ - History.md
52
53
  - Manifest.txt
54
+ - README.md
53
55
  files:
54
- - History.markdown
56
+ - History.md
55
57
  - Manifest.txt
56
- - README.markdown
58
+ - README.md
57
59
  - Rakefile
58
60
  - lib/textutils.rb
59
61
  - lib/textutils/classifier.rb
@@ -82,6 +84,7 @@ files:
82
84
  - test/test_hypertext_helper.rb
83
85
  - test/test_title_helper.rb
84
86
  - test/test_unicode_helper.rb
87
+ - test/test_values_reader.rb
85
88
  - .gemtest
86
89
  homepage: https://github.com/rubylibs/textutils
87
90
  licenses:
@@ -89,7 +92,7 @@ licenses:
89
92
  post_install_message:
90
93
  rdoc_options:
91
94
  - --main
92
- - README.markdown
95
+ - README.md
93
96
  require_paths:
94
97
  - lib
95
98
  required_ruby_version: !ruby/object:Gem::Requirement
@@ -114,3 +117,4 @@ test_files:
114
117
  - test/test_hypertext_helper.rb
115
118
  - test/test_title_helper.rb
116
119
  - test/test_unicode_helper.rb
120
+ - test/test_values_reader.rb