textutils 0.7.1 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
File without changes
data/Manifest.txt CHANGED
@@ -1,6 +1,6 @@
1
- History.markdown
1
+ History.md
2
2
  Manifest.txt
3
- README.markdown
3
+ README.md
4
4
  Rakefile
5
5
  lib/textutils.rb
6
6
  lib/textutils/classifier.rb
@@ -29,3 +29,4 @@ test/helper.rb
29
29
  test/test_hypertext_helper.rb
30
30
  test/test_title_helper.rb
31
31
  test/test_unicode_helper.rb
32
+ test/test_values_reader.rb
@@ -1,4 +1,6 @@
1
- # `textutils` - Text Filters, Helpers, Readers and More in Ruby
1
+ # `textutils`
2
+
3
+ Text Filters, Helpers, Readers and More in Ruby
2
4
 
3
5
  * home :: [github.com/rubylibs/textutils](https://github.com/rubylibs/textutils)
4
6
  * bugs :: [github.com/rubylibs/textutils/issues](https://github.com/rubylibs/textutils/issues)
@@ -7,6 +9,7 @@
7
9
  * forum :: [ruby-talk@ruby-lang.org](www.ruby-lang.org/en/community/mailing-lists/)
8
10
 
9
11
 
12
+
10
13
  ## Filters
11
14
 
12
15
  ### `comments_percent_style` Filter
data/Rakefile CHANGED
@@ -14,8 +14,8 @@ Hoe.spec 'textutils' do
14
14
  self.email = 'ruby-talk@ruby-lang.org'
15
15
 
16
16
  # switch extension to .markdown for gihub formatting
17
- self.readme_file = 'README.markdown'
18
- self.history_file = 'History.markdown'
17
+ self.readme_file = 'README.md'
18
+ self.history_file = 'History.md'
19
19
 
20
20
  self.extra_deps = [
21
21
  ['logutils', '~> 0.5'] # e.g. >= 0.5 <= 1.0
@@ -10,13 +10,20 @@ module TextUtils
10
10
  # lets us use "classic" web helpers a la rails
11
11
  # find a good name for sub module - Reader? Fixtures? Values? Parser?
12
12
 
13
+ def strip_part_markers( title ) # use different name e.g. strip_name_markers/strip_name_enclosure etc.??
14
+ # remove optional part markers
15
+ # e.g. Bock ‹Damm› becomes => Bock Damm
16
+ # ‹Estrella› ‹Damm› Inedit becomes => Estrella Damm Inedit
13
17
 
18
+ # todo: also allow reguluar <> for easy typing/input ??? why? why not? used for anything else already?
19
+ title.gsub( /[‹›]/, '' )
20
+ end
14
21
 
15
22
  def strip_translations( title )
16
23
  # remove optional english translation in square brackets ([])
17
24
  # e.g. Wien [Vienna] => Wien
18
25
 
19
- title.gsub( /\[.+\]/, '' )
26
+ title.gsub( /\[[^\]]+\]/, '' )
20
27
  end
21
28
 
22
29
  def strip_subtitles( title )
@@ -24,7 +31,7 @@ module TextUtils
24
31
  # e.g. Las Palmas (de Gran Canaria) => Las Palmas
25
32
  # Palma (de Mallorca) => Palma
26
33
 
27
- title.gsub( /\(.+\)/, '' )
34
+ title.gsub( /\([^\)]+\)/, '' )
28
35
  end
29
36
 
30
37
  def strip_tags( title ) # todo: use an alias or rename for better name ??
@@ -34,7 +41,7 @@ module TextUtils
34
41
  #
35
42
  # todo: use for autotags? e.g. {Bio} => bio
36
43
 
37
- title.gsub( /\{.+\}/, '' )
44
+ title.gsub( /\{[^\}]+\}/, '' )
38
45
  end
39
46
 
40
47
  def strip_whitespaces( title )
@@ -54,6 +61,8 @@ module TextUtils
54
61
  ## NB: downcase does NOT work for accented chars (thus, include in alternatives)
55
62
  key = title.downcase
56
63
 
64
+ key = strip_part_markers( key ) # e.g. ‹Estrella› ‹Damm› Inedit becomes => Estrella Damm Inedit
65
+
57
66
  key = strip_translations( key )
58
67
 
59
68
  key = strip_subtitles( key )
@@ -12,7 +12,7 @@ module TextUtils
12
12
  def match_country( value )
13
13
  if value =~ /^country:/ # country:
14
14
  country_key = value[8..-1] # cut off country: prefix
15
- country = WorldDb::Models::Country.find_by_key!( country_key )
15
+ country = WorldDb::Model::Country.find_by_key!( country_key )
16
16
  yield( country )
17
17
  true # bingo - match found
18
18
  else
@@ -23,7 +23,7 @@ module TextUtils
23
23
  def match_supra( value )
24
24
  if value =~ /^supra:/ # supra:
25
25
  country_key = value[6..-1] # cut off supra: prefix
26
- country = WorldDb::Models::Country.find_by_key!( country_key )
26
+ country = WorldDb::Model::Country.find_by_key!( country_key )
27
27
  yield( country )
28
28
  true # bingo - match found
29
29
  else
@@ -52,11 +52,11 @@ module TextUtils
52
52
  def match_region_for_country( value, country_id ) ## NB: required country_id
53
53
  if value =~ /^region:/ ## region:
54
54
  region_key = value[7..-1] ## cut off region: prefix
55
- region = WorldDb::Models::Region.find_by_key_and_country_id!( region_key, country_id )
55
+ region = WorldDb::Model::Region.find_by_key_and_country_id!( region_key, country_id )
56
56
  yield( region )
57
57
  true # bingo - match found
58
58
  elsif is_region?( value ) ## assume region code e.g. TX or N
59
- region = WorldDb::Models::Region.find_by_key_and_country_id!( value.downcase, country_id )
59
+ region = WorldDb::Model::Region.find_by_key_and_country_id!( value.downcase, country_id )
60
60
  yield( region )
61
61
  true # bingo - match found
62
62
  else
@@ -68,7 +68,7 @@ module TextUtils
68
68
  def match_city( value ) # NB: might be nil (city not found)
69
69
  if value =~ /^city:/ ## city:
70
70
  city_key = value[5..-1] ## cut off city: prefix
71
- city = WorldDb::Models::City.find_by_key( city_key )
71
+ city = WorldDb::Model::City.find_by_key( city_key )
72
72
  yield( city ) # NB: might be nil (city not found)
73
73
  true # bingo - match found
74
74
  else
@@ -80,7 +80,7 @@ module TextUtils
80
80
  def match_metro( value )
81
81
  if value =~ /^metro:/ ## metro:
82
82
  city_key = value[6..-1] ## cut off metro: prefix
83
- city = WorldDb::Models::City.find_by_key!( city_key ) # NB: parent city/metro required, that is, lookup w/ !
83
+ city = WorldDb::Model::City.find_by_key!( city_key ) # NB: parent city/metro required, that is, lookup w/ !
84
84
  yield( city )
85
85
  true # bingo - match found
86
86
  else
@@ -113,7 +113,7 @@ module TextUtils
113
113
  def match_brewery( value )
114
114
  if value =~ /^by:/ ## by: -brewed by/brewery
115
115
  brewery_key = value[3..-1] ## cut off by: prefix
116
- brewery = BeerDb::Models::Brewery.find_by_key!( brewery_key )
116
+ brewery = BeerDb::Model::Brewery.find_by_key!( brewery_key )
117
117
  yield( brewery )
118
118
  true # bingo - match found
119
119
  else
@@ -288,6 +288,10 @@ module TextUtils
288
288
  attribs[:grade] = grade
289
289
  end
290
290
 
291
+ ## fix/todo: add find parts ??
292
+ # e.g. ‹Estrella› ‹Damm› Inedit
293
+ # becomes => title: 'Estrella Damm Inedit' and parts: ['Estrella','Damm']
294
+
291
295
  ## title (split of optional synonyms)
292
296
  # e.g. FC Bayern Muenchen|Bayern Muenchen|Bayern
293
297
  titles = title_col.split('|')
@@ -58,11 +58,20 @@ class ValuesReader
58
58
 
59
59
 
60
60
  def initialize( path, more_attribs={} )
61
- @path = path
62
-
63
61
  @more_attribs = more_attribs
64
62
 
65
- @data = File.read_utf8( @path )
63
+ ### workaround/hack
64
+ # if path includes newline assume it's a string buffer not a file name
65
+ # fix: use from_file an from_string etc. for ctor
66
+ # check what is the best convention (follow ???)
67
+
68
+ if path =~ /\n/m
69
+ @path = 'stringio' # what name to use ???
70
+ @data = path.dup # make a duplicate ?? why? why not?
71
+ else
72
+ @path = path
73
+ @data = File.read_utf8( @path )
74
+ end
66
75
  end
67
76
 
68
77
 
@@ -76,13 +85,12 @@ class ValuesReader
76
85
 
77
86
  def each_line # support multi line records
78
87
 
79
- inside_line = false # todo: find a better name? e.g. line_found?
80
- attribs = {}
81
- more_values = []
82
-
88
+ inside_record = false
89
+ blank_counter = 0 # count of number of blank lines (note: 1+ blank lines clear multi-line record)
90
+ values = []
83
91
 
84
92
  @data.each_line do |line|
85
-
93
+
86
94
  ## allow alternative comment lines
87
95
  ## e.g. -- comment or
88
96
  ## % comment
@@ -91,7 +99,10 @@ class ValuesReader
91
99
  ## NB: for now alternative comment lines not allowed as end of line style e.g
92
100
  ## some data, more data -- comment here
93
101
 
94
- if line =~ /^\s*#/ || line =~ /^\s*--/ || line =~ /^\s*%/
102
+ if line =~ /^\s*#/ ||
103
+ line =~ /^\s*--/ ||
104
+ line =~ /^\s*%/ ||
105
+ line =~ /^\s*__/
95
106
  # skip komments and do NOT copy to result (keep comments secret!)
96
107
  logger.debug 'skipping comment line'
97
108
  next
@@ -99,7 +110,8 @@ class ValuesReader
99
110
 
100
111
  if line =~ /^\s*$/
101
112
  # kommentar oder leerzeile überspringen
102
- logger.debug 'skipping blank line'
113
+ blank_counter += 1
114
+ logger.debug "skipping blank line (#{blank_counter})"
103
115
  next
104
116
  end
105
117
 
@@ -114,71 +126,109 @@ class ValuesReader
114
126
  line = line.strip
115
127
 
116
128
 
129
+ if line =~ /^-\s/ # check for group headers e.g. - St. James Brewery
130
+ logger.info " skip group header #{line} for now (fix/add soon)"
131
+ next
132
+ elsif line =~ /^\[([a-z][a-z]+)\]/
117
133
  ### check for multiline record
118
- ## must start with key and colon e.g. brands:
119
- if line =~ /^[a-z][a-z0-9.]*[a-z0-9]:/
120
- # NB: every additional line is one value e.g. city:wien, etc.
121
- # allows you to use any chars
122
- logger.debug " multi-line record - add key-value »#{line}«"
123
-
124
- more_values.unshift( line.dup ) # add value upfront to array (first value); lets us keep (optional) tags as last entry; fix!! see valuereaderEx v2
125
- next
126
- else
127
- # NB: new record clears/ends multi-line record
128
-
129
- if inside_line # check if we already processed a line? if yes; yield last line
134
+ ## must start with key e.g. [guiness]
135
+ ## for now only supports key with letter a-z (no digits/numbers or underscore or dots)
136
+
137
+ if values.length > 0 # check if we already processed a record? if yes; yield last record (before reset)
138
+ attribs, more_values = find_key_n_title( values )
139
+ attribs = attribs.merge( @more_attribs ) # e.g. merge country_id and other defaults if present
130
140
  yield( attribs, more_values )
131
- attribs = {}
132
- more_values = []
141
+ values = []
142
+ end
143
+
144
+ inside_record = true
145
+ blank_counter = 0
146
+
147
+ # NB: every additional line is one value e.g. city:wien, etc.
148
+ # allows you to use any chars
149
+ logger.debug " multi-line record w/ key »#{$1}«"
150
+
151
+ values = [$1.dup] # add key as first value in ary
152
+ elsif inside_record && blank_counter == 0 && line =~ /\/{2}/ # check address line (must contain //)
153
+ values += [line.dup] # assume single value column (no need to escape commas)
154
+ elsif inside_record && blank_counter == 0 && line =~ /^[a-z][a-z0-9.]*[a-z0-9]:/ # check key: value pair
155
+ values += [line.dup] # assume single value column (no need to escape commas)
156
+ else
157
+ if inside_record && blank_counter == 0 # continue adding more values
158
+ values += find_values( line )
159
+ else # assume single-line (stand-alone / classic csv) record
160
+ if values.length > 0
161
+ attribs, more_values = find_key_n_title( values )
162
+ attribs = attribs.merge( @more_attribs ) # e.g. merge country_id and other defaults if present
163
+ yield( attribs, more_values )
164
+ values = []
165
+ end
166
+ inside_record = false
167
+ blank_counter = 0
168
+ values = find_values( line )
133
169
  end
134
- inside_line = true
135
170
  end
136
171
 
172
+ end # each lines
137
173
 
138
- ### guard escaped commas (e.g. \,)
139
- line = line.gsub( '\,', '♣' ) # use black club suit/=shamrock char for escaped separator
140
-
141
- ## use generic separator (allow us to configure separator)
142
- line = line.gsub( ',', '›')
143
-
144
- ## restore escaped commas (before split)
145
- line = line.gsub( '♣', ',' )
174
+ # do NOT forget to yield last line (if present/processed)
175
+ if values.length > 0
176
+ attribs, more_values = find_key_n_title( values )
177
+ attribs = attribs.merge( @more_attribs ) # e.g. merge country_id and other defaults if present
178
+ yield( attribs, more_values )
179
+ end
146
180
 
147
- logger.debug "line: »#{line}«"
181
+ end # method each_line
148
182
 
149
- values = line.split( '›' )
150
-
151
- # pass 1) remove leading and trailing whitespace for values
152
183
 
153
- values = values.map { |value| value.strip }
184
+ ### todo:
185
+ ## move to helper for reuse a la find_key_n_title ???
186
+ ## use different/better name ?? e.g. find_values_in_line or split_line_into_values ??
187
+ def find_values( line )
188
+ ## note returns an array of values (strings)
154
189
 
155
- ##### todo remove support of comment column? (NB: must NOT include commas)
156
- # pass 2) remove comment columns
157
-
158
- values = values.select do |value|
159
- if value =~ /^#/ ## start with # treat it as a comment column; e.g. remove it
160
- logger.debug " removing column with value »#{value}«"
161
- false
162
- else
163
- true
164
- end
165
- end
190
+ meta_comma = '«KOMMA»'
191
+ meta_separator = »'
166
192
 
167
- logger.debug " values: »#{values.join('« »')}«"
193
+ # guard escaped commas
194
+ # e.g. convert \, to «KOMMA»
195
+ line = line.gsub( '\,', meta_comma )
168
196
 
169
- attribs, more_values = find_key_n_title( values )
197
+ # note: use generic separator (allow us to configure separator)
198
+ # e.g « »
199
+ line = line.gsub( ',', meta_separator )
170
200
 
171
- attribs = attribs.merge( @more_attribs ) # e.g. merge country_id and other defaults if present
201
+ # restore escaped commas (before split)
202
+ line = line.gsub( meta_comma, ',' )
172
203
 
173
- end # each lines
204
+ logger.debug "line: |»#{line}«|"
174
205
 
175
- # do NOT forget to yield last line (if present/processed)
176
- if inside_line
177
- yield( attribs, more_values )
178
- end
206
+ values = line.split( meta_separator )
179
207
 
208
+ # pass 1) remove leading and trailing whitespace for values
180
209
 
181
- end # method each_line
210
+ values = values.map { |value| value.strip }
211
+
212
+
213
+ ##### todo/fix:
214
+ # !!!REMOVE!!!
215
+ # remove support of comment column? (NB: must NOT include commas)
216
+ # pass 2) remove comment columns
217
+ #
218
+ # todo/fix: check if still possible ?? - add an example here how it looks like/works
219
+
220
+ values = values.select do |value|
221
+ if value =~ /^#/ ## start with # treat it as a comment column; e.g. remove it
222
+ logger.info " removing column with value »#{value}«"
223
+ false
224
+ else
225
+ true
226
+ end
227
+ end
228
+
229
+ logger.debug " values: |»#{values.join('« »')}«|"
230
+ values
231
+ end
182
232
 
183
233
 
184
234
  end # class ValuesReader
@@ -1,6 +1,7 @@
1
1
 
2
2
  module TextUtils
3
3
 
4
- VERSION = '0.7.1'
4
+ VERSION = '0.8.0'
5
5
 
6
6
  end # module TextUtils
7
+
@@ -8,6 +8,7 @@
8
8
 
9
9
  require 'helper'
10
10
 
11
+
11
12
  class TestTitleHelper < MiniTest::Unit::TestCase
12
13
 
13
14
  def test_title_to_key
@@ -15,7 +16,9 @@ class TestTitleHelper < MiniTest::Unit::TestCase
15
16
  txt_io = [
16
17
  [ 'São Paulo', 'saopaulo' ],
17
18
  [ 'São Gonçalo', 'saogoncalo' ],
18
- [ 'Výčepní', 'vycepni' ]
19
+ [ 'Výčepní', 'vycepni' ],
20
+ [ 'Bock ‹Damm›', 'bockdamm' ],
21
+ [ '‹Estrella› ‹Damm› Inedit', 'estrelladamminedit' ]
19
22
  ]
20
23
 
21
24
  txt_io.each do |txt|
@@ -0,0 +1,311 @@
1
+ # encoding: utf-8
2
+
3
+ ###
4
+ # to run use
5
+ # ruby -I ./lib -I ./test test/test_helper.rb
6
+ # or better
7
+ # rake test
8
+
9
+ require 'helper'
10
+
11
+ class TestValuesReader < MiniTest::Unit::TestCase
12
+
13
+ def test_escape_comma
14
+ # note: double espace comma e.g. \\, becomes literal \,
15
+ txt =<<EOS
16
+ [fuller]
17
+ Fuller\\, Smith & Turner, 1845
18
+ The Griffin Brewery // Chiswick Lane South // London, W4 2QB
19
+ brands: Fuller's
20
+
21
+ fuller, Fuller\\, Smith & Turner, 1845, The Griffin Brewery // Chiswick Lane South // London\\, W4 2QB
22
+ EOS
23
+
24
+ pp txt
25
+
26
+ reader = ValuesReader.new( txt )
27
+
28
+ i = 0
29
+ reader.each_line do |attribs, values|
30
+ i += 1
31
+
32
+ puts "attribs:"
33
+ pp attribs
34
+ puts "values:"
35
+ pp values
36
+
37
+ if i == 1
38
+ assert_equal attribs[:key], 'fuller'
39
+ assert_equal attribs[:title], 'Fuller, Smith & Turner'
40
+ assert_equal attribs[:grade], nil
41
+ assert_equal attribs[:synonyms], nil
42
+
43
+ assert_equal values[0], '1845'
44
+ assert_equal values[1], 'The Griffin Brewery // Chiswick Lane South // London, W4 2QB'
45
+ assert_equal values[2], "brands: Fuller's"
46
+ elsif i == 2
47
+ assert_equal attribs[:key], 'fuller'
48
+ assert_equal attribs[:title], 'Fuller, Smith & Turner'
49
+ assert_equal attribs[:grade], nil
50
+ assert_equal attribs[:synonyms], nil
51
+
52
+ assert_equal values[0], '1845'
53
+ assert_equal values[1], 'The Griffin Brewery // Chiswick Lane South // London, W4 2QB'
54
+ else
55
+ assert_equal true, false # should not get here
56
+ end
57
+ end
58
+ end # test_escape_comma
59
+
60
+
61
+ def test_mixed
62
+ txt =<<EOS
63
+ ##########
64
+ # Wien Umbgebung
65
+
66
+ [schwechat]
67
+ Brauerei Schwechat (Brau Union) **, 1796
68
+ www.schwechater.at
69
+ 2320 Schwechat // Mautner Markhof-Straße 11
70
+ brands: Schwechater
71
+ brau_union # Part of Brau Union
72
+
73
+
74
+ #############
75
+ # Waldviertel
76
+
77
+ zwettler, Zwettler Brauerei|Privatbrauerei Zwettl **, 1709, www.zwettler.at, 3910 Zwettl // Syrnauer Straße 22-25
78
+ weitra, Weitra Bräu Bierwerkstatt|Brauerei Weitra *, 1321, www.bierwerkstatt.at, 3970 Weitra // Sparkasseplatz 160, zwettler # Part of Zwettler
79
+
80
+ #############
81
+ # Weinviertel
82
+
83
+ [hubertus]
84
+ Hubertus Bräu *, 1454
85
+ www.hubertus.at
86
+ 2136 Laa/Thaya // Hubertusgasse 1
87
+ brands: Hubertus
88
+
89
+ egger, Privatbrauerei Fritz Egger **, 1978, www.egger-bier.at, 3105 Unterradlberg // Tiroler Straße 18
90
+ EOS
91
+
92
+ reader = ValuesReader.new( txt )
93
+
94
+ i = 0
95
+ reader.each_line do |attribs, values|
96
+ i += 1
97
+
98
+ puts "attribs:"
99
+ pp attribs
100
+ puts "values:"
101
+ pp values
102
+
103
+ if i == 1
104
+ assert_equal attribs[:key], 'schwechat'
105
+ assert_equal attribs[:title], 'Brauerei Schwechat (Brau Union)'
106
+ assert_equal attribs[:grade], 2
107
+ assert_equal attribs[:synonyms], nil
108
+
109
+ assert_equal values[0], '1796'
110
+ assert_equal values[1], 'www.schwechater.at'
111
+ assert_equal values[2], '2320 Schwechat // Mautner Markhof-Straße 11'
112
+ assert_equal values[3], 'brands: Schwechater'
113
+ assert_equal values[-1], 'brau_union'
114
+ elsif i == 2
115
+ assert_equal attribs[:key], 'zwettler'
116
+ assert_equal attribs[:title], 'Zwettler Brauerei'
117
+ assert_equal attribs[:grade], 2
118
+ assert_equal attribs[:synonyms], 'Privatbrauerei Zwettl'
119
+
120
+ assert_equal values[0], '1709'
121
+ assert_equal values[1], 'www.zwettler.at'
122
+ assert_equal values[2], '3910 Zwettl // Syrnauer Straße 22-25'
123
+ elsif i == 3
124
+ assert_equal attribs[:key], 'weitra'
125
+ assert_equal attribs[:title], 'Weitra Bräu Bierwerkstatt'
126
+ assert_equal attribs[:grade], 3
127
+ assert_equal attribs[:synonyms], 'Brauerei Weitra'
128
+
129
+ assert_equal values[0], '1321'
130
+ assert_equal values[1], 'www.bierwerkstatt.at'
131
+ assert_equal values[2], '3970 Weitra // Sparkasseplatz 160'
132
+ assert_equal values[-1], 'zwettler'
133
+ elsif i == 4
134
+ assert_equal attribs[:key], 'hubertus'
135
+ assert_equal attribs[:title], 'Hubertus Bräu'
136
+ assert_equal attribs[:grade], 3
137
+ assert_equal attribs[:synonyms], nil
138
+
139
+ assert_equal values[0], '1454'
140
+ assert_equal values[1], 'www.hubertus.at'
141
+ assert_equal values[2], '2136 Laa/Thaya // Hubertusgasse 1'
142
+ assert_equal values[3], 'brands: Hubertus'
143
+ elsif i == 5
144
+ assert_equal attribs[:key], 'egger'
145
+ assert_equal attribs[:title], 'Privatbrauerei Fritz Egger'
146
+ assert_equal attribs[:grade], 2
147
+ assert_equal attribs[:synonyms], nil
148
+
149
+ assert_equal values[0], '1978'
150
+ assert_equal values[1], 'www.egger-bier.at'
151
+ assert_equal values[2], '3105 Unterradlberg // Tiroler Straße 18'
152
+ else
153
+ assert_equal true, false # should not get here
154
+ end
155
+ end
156
+ end # test_mixed
157
+
158
+
159
+ def test_multi_line_records
160
+ txt =<<EOS
161
+ ##########
162
+ # Wien Umbgebung
163
+
164
+ [schwechat]
165
+ Brauerei Schwechat (Brau Union) **, 1796
166
+ www.schwechater.at
167
+ 2320 Schwechat // Mautner Markhof-Straße 11
168
+ brands: Schwechater
169
+ brau_union # Part of Brau Union
170
+
171
+
172
+ #############
173
+ # Waldviertel
174
+
175
+ [zwettler]
176
+ Zwettler Brauerei|Privatbrauerei Zwettl **, 1709
177
+ www.zwettler.at
178
+ 3910 Zwettl // Syrnauer Straße 22-25
179
+ brands: Zwettler
180
+ EOS
181
+
182
+ reader = ValuesReader.new( txt )
183
+
184
+ i = 0
185
+ reader.each_line do |attribs, values|
186
+ i += 1
187
+
188
+ puts "attribs:"
189
+ pp attribs
190
+ puts "values:"
191
+ pp values
192
+
193
+ if i == 1
194
+ assert_equal attribs[:key], 'schwechat'
195
+ assert_equal attribs[:title], 'Brauerei Schwechat (Brau Union)'
196
+ assert_equal attribs[:grade], 2
197
+ assert_equal attribs[:synonyms], nil
198
+
199
+ assert_equal values[0], '1796'
200
+ assert_equal values[1], 'www.schwechater.at'
201
+ assert_equal values[2], '2320 Schwechat // Mautner Markhof-Straße 11'
202
+ assert_equal values[3], 'brands: Schwechater'
203
+ assert_equal values[-1], 'brau_union'
204
+ elsif i == 2
205
+ assert_equal attribs[:key], 'zwettler'
206
+ assert_equal attribs[:title], 'Zwettler Brauerei'
207
+ assert_equal attribs[:grade], 2
208
+ assert_equal attribs[:synonyms], 'Privatbrauerei Zwettl'
209
+
210
+ assert_equal values[0], '1709'
211
+ assert_equal values[1], 'www.zwettler.at'
212
+ assert_equal values[2], '3910 Zwettl // Syrnauer Straße 22-25'
213
+ assert_equal values[3], 'brands: Zwettler'
214
+ else
215
+ assert_equal true, false # should not get here
216
+ end
217
+ end
218
+ end # test_multi_line_records
219
+
220
+
221
+ def test_classic_csv_records
222
+
223
+ txt =<<EOS
224
+ arsenal, Arsenal|Arsenal FC|FC Arsenal, ARS, city:london
225
+ manunited, Manchester United|Man Utd|Manchester U., MUN, city:manchester
226
+ liverpool, Liverpool|Liverpool FC|FC Liverpool, LIV, city:liverpool
227
+ EOS
228
+
229
+ reader = ValuesReader.new( txt )
230
+
231
+ i = 0
232
+ reader.each_line do |attribs, values|
233
+ i += 1
234
+
235
+ puts "attribs:"
236
+ pp attribs
237
+ puts "values:"
238
+ pp values
239
+
240
+ if i == 1
241
+ assert_equal attribs[:key], 'arsenal'
242
+ assert_equal attribs[:title], 'Arsenal'
243
+ assert_equal attribs[:synonyms], 'Arsenal FC|FC Arsenal'
244
+
245
+ assert_equal values[0], 'ARS'
246
+ assert_equal values[1], 'city:london'
247
+ elsif i == 2
248
+ elsif i == 3
249
+ else
250
+ assert_equal true, false # should not get here
251
+ end
252
+ end
253
+ end # test_classic_csv_records
254
+
255
+
256
+ def test_autogen_keys
257
+ txt =<<EOS
258
+ Ottakringer Helles, 5.2 %, 11.8°
259
+ Ottakringer Gold Fassl Spezial, 5.6 %, 12.7°
260
+ Ottakringer (Gold Fassl) Pils, 4.6 %, 11.2°
261
+ Ottakringer (Gold Fassl) Pur {Bio}, 5.2 %, 11.8°, bio
262
+ EOS
263
+
264
+ reader = ValuesReader.new( txt )
265
+
266
+ i = 0
267
+ reader.each_line do |attribs, values|
268
+ i += 1
269
+
270
+ puts "attribs:"
271
+ pp attribs
272
+ puts "values:"
273
+ pp values
274
+
275
+ if i == 1
276
+ assert_equal attribs[:key], 'ottakringerhelles'
277
+ assert_equal attribs[:title], 'Ottakringer Helles'
278
+ assert_equal attribs[:synonyms], nil
279
+
280
+ assert_equal values[0], '5.2 %'
281
+ assert_equal values[1], '11.8°'
282
+ elsif i == 2
283
+ assert_equal attribs[:key], 'ottakringergoldfasslspezial'
284
+ assert_equal attribs[:title], 'Ottakringer Gold Fassl Spezial'
285
+ assert_equal attribs[:synonyms], nil
286
+
287
+ assert_equal values[0], '5.6 %'
288
+ assert_equal values[1], '12.7°'
289
+ elsif i == 3
290
+ assert_equal attribs[:key], 'ottakringerpils'
291
+ assert_equal attribs[:title], 'Ottakringer (Gold Fassl) Pils'
292
+ assert_equal attribs[:synonyms], nil
293
+
294
+ assert_equal values[0], '4.6 %'
295
+ assert_equal values[1], '11.2°'
296
+ elsif i == 4
297
+ assert_equal attribs[:key], 'ottakringerpur'
298
+ assert_equal attribs[:title], 'Ottakringer (Gold Fassl) Pur {Bio}'
299
+ assert_equal attribs[:synonyms], nil
300
+
301
+ assert_equal values[0], '5.2 %'
302
+ assert_equal values[1], '11.8°'
303
+ assert_equal values[-1], 'bio'
304
+ else
305
+ assert_equal true, false # should not get here
306
+ end
307
+ end
308
+ end # test_autogen_keys
309
+
310
+
311
+ end # class TestValuesReader
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: textutils
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.1
4
+ version: 0.8.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-12-25 00:00:00.000000000 Z
12
+ date: 2014-01-17 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: logutils
16
- requirement: &21849324 !ruby/object:Gem::Requirement
16
+ requirement: &3086976 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ~>
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '0.5'
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *21849324
24
+ version_requirements: *3086976
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: rdoc
27
- requirement: &21848640 !ruby/object:Gem::Requirement
27
+ requirement: &3085764 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ~>
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: '4.0'
33
33
  type: :development
34
34
  prerelease: false
35
- version_requirements: *21848640
35
+ version_requirements: *3085764
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: hoe
38
- requirement: &21848040 !ruby/object:Gem::Requirement
38
+ requirement: &3082920 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ~>
@@ -43,17 +43,19 @@ dependencies:
43
43
  version: '3.7'
44
44
  type: :development
45
45
  prerelease: false
46
- version_requirements: *21848040
46
+ version_requirements: *3082920
47
47
  description: textutils - Text Filters, Helpers, Readers and More
48
48
  email: ruby-talk@ruby-lang.org
49
49
  executables: []
50
50
  extensions: []
51
51
  extra_rdoc_files:
52
+ - History.md
52
53
  - Manifest.txt
54
+ - README.md
53
55
  files:
54
- - History.markdown
56
+ - History.md
55
57
  - Manifest.txt
56
- - README.markdown
58
+ - README.md
57
59
  - Rakefile
58
60
  - lib/textutils.rb
59
61
  - lib/textutils/classifier.rb
@@ -82,6 +84,7 @@ files:
82
84
  - test/test_hypertext_helper.rb
83
85
  - test/test_title_helper.rb
84
86
  - test/test_unicode_helper.rb
87
+ - test/test_values_reader.rb
85
88
  - .gemtest
86
89
  homepage: https://github.com/rubylibs/textutils
87
90
  licenses:
@@ -89,7 +92,7 @@ licenses:
89
92
  post_install_message:
90
93
  rdoc_options:
91
94
  - --main
92
- - README.markdown
95
+ - README.md
93
96
  require_paths:
94
97
  - lib
95
98
  required_ruby_version: !ruby/object:Gem::Requirement
@@ -114,3 +117,4 @@ test_files:
114
117
  - test/test_hypertext_helper.rb
115
118
  - test/test_title_helper.rb
116
119
  - test/test_unicode_helper.rb
120
+ - test/test_values_reader.rb