textutils 0.7.1 → 0.8.0
Sign up to get free protection for your applications and to get access to all the features.
- data/{History.markdown → History.md} +0 -0
- data/Manifest.txt +3 -2
- data/{README.markdown → README.md} +4 -1
- data/Rakefile +2 -2
- data/lib/textutils/helper/title_helper.rb +12 -3
- data/lib/textutils/helper/value_helper.rb +11 -7
- data/lib/textutils/reader/values_reader.rb +108 -58
- data/lib/textutils/version.rb +2 -1
- data/test/test_title_helper.rb +4 -1
- data/test/test_values_reader.rb +311 -0
- metadata +15 -11
File without changes
|
data/Manifest.txt
CHANGED
@@ -1,6 +1,6 @@
|
|
1
|
-
History.
|
1
|
+
History.md
|
2
2
|
Manifest.txt
|
3
|
-
README.
|
3
|
+
README.md
|
4
4
|
Rakefile
|
5
5
|
lib/textutils.rb
|
6
6
|
lib/textutils/classifier.rb
|
@@ -29,3 +29,4 @@ test/helper.rb
|
|
29
29
|
test/test_hypertext_helper.rb
|
30
30
|
test/test_title_helper.rb
|
31
31
|
test/test_unicode_helper.rb
|
32
|
+
test/test_values_reader.rb
|
@@ -1,4 +1,6 @@
|
|
1
|
-
# `textutils`
|
1
|
+
# `textutils`
|
2
|
+
|
3
|
+
Text Filters, Helpers, Readers and More in Ruby
|
2
4
|
|
3
5
|
* home :: [github.com/rubylibs/textutils](https://github.com/rubylibs/textutils)
|
4
6
|
* bugs :: [github.com/rubylibs/textutils/issues](https://github.com/rubylibs/textutils/issues)
|
@@ -7,6 +9,7 @@
|
|
7
9
|
* forum :: [ruby-talk@ruby-lang.org](www.ruby-lang.org/en/community/mailing-lists/)
|
8
10
|
|
9
11
|
|
12
|
+
|
10
13
|
## Filters
|
11
14
|
|
12
15
|
### `comments_percent_style` Filter
|
data/Rakefile
CHANGED
@@ -14,8 +14,8 @@ Hoe.spec 'textutils' do
|
|
14
14
|
self.email = 'ruby-talk@ruby-lang.org'
|
15
15
|
|
16
16
|
# switch extension to .markdown for gihub formatting
|
17
|
-
self.readme_file = 'README.
|
18
|
-
self.history_file = 'History.
|
17
|
+
self.readme_file = 'README.md'
|
18
|
+
self.history_file = 'History.md'
|
19
19
|
|
20
20
|
self.extra_deps = [
|
21
21
|
['logutils', '~> 0.5'] # e.g. >= 0.5 <= 1.0
|
@@ -10,13 +10,20 @@ module TextUtils
|
|
10
10
|
# lets us use "classic" web helpers a la rails
|
11
11
|
# find a good name for sub module - Reader? Fixtures? Values? Parser?
|
12
12
|
|
13
|
+
def strip_part_markers( title ) # use different name e.g. strip_name_markers/strip_name_enclosure etc.??
|
14
|
+
# remove optional part markers
|
15
|
+
# e.g. Bock ‹Damm› becomes => Bock Damm
|
16
|
+
# ‹Estrella› ‹Damm› Inedit becomes => Estrella Damm Inedit
|
13
17
|
|
18
|
+
# todo: also allow reguluar <> for easy typing/input ??? why? why not? used for anything else already?
|
19
|
+
title.gsub( /[‹›]/, '' )
|
20
|
+
end
|
14
21
|
|
15
22
|
def strip_translations( title )
|
16
23
|
# remove optional english translation in square brackets ([])
|
17
24
|
# e.g. Wien [Vienna] => Wien
|
18
25
|
|
19
|
-
title.gsub( /\[
|
26
|
+
title.gsub( /\[[^\]]+\]/, '' )
|
20
27
|
end
|
21
28
|
|
22
29
|
def strip_subtitles( title )
|
@@ -24,7 +31,7 @@ module TextUtils
|
|
24
31
|
# e.g. Las Palmas (de Gran Canaria) => Las Palmas
|
25
32
|
# Palma (de Mallorca) => Palma
|
26
33
|
|
27
|
-
title.gsub( /\(
|
34
|
+
title.gsub( /\([^\)]+\)/, '' )
|
28
35
|
end
|
29
36
|
|
30
37
|
def strip_tags( title ) # todo: use an alias or rename for better name ??
|
@@ -34,7 +41,7 @@ module TextUtils
|
|
34
41
|
#
|
35
42
|
# todo: use for autotags? e.g. {Bio} => bio
|
36
43
|
|
37
|
-
title.gsub( /\{
|
44
|
+
title.gsub( /\{[^\}]+\}/, '' )
|
38
45
|
end
|
39
46
|
|
40
47
|
def strip_whitespaces( title )
|
@@ -54,6 +61,8 @@ module TextUtils
|
|
54
61
|
## NB: downcase does NOT work for accented chars (thus, include in alternatives)
|
55
62
|
key = title.downcase
|
56
63
|
|
64
|
+
key = strip_part_markers( key ) # e.g. ‹Estrella› ‹Damm› Inedit becomes => Estrella Damm Inedit
|
65
|
+
|
57
66
|
key = strip_translations( key )
|
58
67
|
|
59
68
|
key = strip_subtitles( key )
|
@@ -12,7 +12,7 @@ module TextUtils
|
|
12
12
|
def match_country( value )
|
13
13
|
if value =~ /^country:/ # country:
|
14
14
|
country_key = value[8..-1] # cut off country: prefix
|
15
|
-
country = WorldDb::
|
15
|
+
country = WorldDb::Model::Country.find_by_key!( country_key )
|
16
16
|
yield( country )
|
17
17
|
true # bingo - match found
|
18
18
|
else
|
@@ -23,7 +23,7 @@ module TextUtils
|
|
23
23
|
def match_supra( value )
|
24
24
|
if value =~ /^supra:/ # supra:
|
25
25
|
country_key = value[6..-1] # cut off supra: prefix
|
26
|
-
country = WorldDb::
|
26
|
+
country = WorldDb::Model::Country.find_by_key!( country_key )
|
27
27
|
yield( country )
|
28
28
|
true # bingo - match found
|
29
29
|
else
|
@@ -52,11 +52,11 @@ module TextUtils
|
|
52
52
|
def match_region_for_country( value, country_id ) ## NB: required country_id
|
53
53
|
if value =~ /^region:/ ## region:
|
54
54
|
region_key = value[7..-1] ## cut off region: prefix
|
55
|
-
region = WorldDb::
|
55
|
+
region = WorldDb::Model::Region.find_by_key_and_country_id!( region_key, country_id )
|
56
56
|
yield( region )
|
57
57
|
true # bingo - match found
|
58
58
|
elsif is_region?( value ) ## assume region code e.g. TX or N
|
59
|
-
region = WorldDb::
|
59
|
+
region = WorldDb::Model::Region.find_by_key_and_country_id!( value.downcase, country_id )
|
60
60
|
yield( region )
|
61
61
|
true # bingo - match found
|
62
62
|
else
|
@@ -68,7 +68,7 @@ module TextUtils
|
|
68
68
|
def match_city( value ) # NB: might be nil (city not found)
|
69
69
|
if value =~ /^city:/ ## city:
|
70
70
|
city_key = value[5..-1] ## cut off city: prefix
|
71
|
-
city = WorldDb::
|
71
|
+
city = WorldDb::Model::City.find_by_key( city_key )
|
72
72
|
yield( city ) # NB: might be nil (city not found)
|
73
73
|
true # bingo - match found
|
74
74
|
else
|
@@ -80,7 +80,7 @@ module TextUtils
|
|
80
80
|
def match_metro( value )
|
81
81
|
if value =~ /^metro:/ ## metro:
|
82
82
|
city_key = value[6..-1] ## cut off metro: prefix
|
83
|
-
city = WorldDb::
|
83
|
+
city = WorldDb::Model::City.find_by_key!( city_key ) # NB: parent city/metro required, that is, lookup w/ !
|
84
84
|
yield( city )
|
85
85
|
true # bingo - match found
|
86
86
|
else
|
@@ -113,7 +113,7 @@ module TextUtils
|
|
113
113
|
def match_brewery( value )
|
114
114
|
if value =~ /^by:/ ## by: -brewed by/brewery
|
115
115
|
brewery_key = value[3..-1] ## cut off by: prefix
|
116
|
-
brewery = BeerDb::
|
116
|
+
brewery = BeerDb::Model::Brewery.find_by_key!( brewery_key )
|
117
117
|
yield( brewery )
|
118
118
|
true # bingo - match found
|
119
119
|
else
|
@@ -288,6 +288,10 @@ module TextUtils
|
|
288
288
|
attribs[:grade] = grade
|
289
289
|
end
|
290
290
|
|
291
|
+
## fix/todo: add find parts ??
|
292
|
+
# e.g. ‹Estrella› ‹Damm› Inedit
|
293
|
+
# becomes => title: 'Estrella Damm Inedit' and parts: ['Estrella','Damm']
|
294
|
+
|
291
295
|
## title (split of optional synonyms)
|
292
296
|
# e.g. FC Bayern Muenchen|Bayern Muenchen|Bayern
|
293
297
|
titles = title_col.split('|')
|
@@ -58,11 +58,20 @@ class ValuesReader
|
|
58
58
|
|
59
59
|
|
60
60
|
def initialize( path, more_attribs={} )
|
61
|
-
@path = path
|
62
|
-
|
63
61
|
@more_attribs = more_attribs
|
64
62
|
|
65
|
-
|
63
|
+
### workaround/hack
|
64
|
+
# if path includes newline assume it's a string buffer not a file name
|
65
|
+
# fix: use from_file an from_string etc. for ctor
|
66
|
+
# check what is the best convention (follow ???)
|
67
|
+
|
68
|
+
if path =~ /\n/m
|
69
|
+
@path = 'stringio' # what name to use ???
|
70
|
+
@data = path.dup # make a duplicate ?? why? why not?
|
71
|
+
else
|
72
|
+
@path = path
|
73
|
+
@data = File.read_utf8( @path )
|
74
|
+
end
|
66
75
|
end
|
67
76
|
|
68
77
|
|
@@ -76,13 +85,12 @@ class ValuesReader
|
|
76
85
|
|
77
86
|
def each_line # support multi line records
|
78
87
|
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
88
|
+
inside_record = false
|
89
|
+
blank_counter = 0 # count of number of blank lines (note: 1+ blank lines clear multi-line record)
|
90
|
+
values = []
|
83
91
|
|
84
92
|
@data.each_line do |line|
|
85
|
-
|
93
|
+
|
86
94
|
## allow alternative comment lines
|
87
95
|
## e.g. -- comment or
|
88
96
|
## % comment
|
@@ -91,7 +99,10 @@ class ValuesReader
|
|
91
99
|
## NB: for now alternative comment lines not allowed as end of line style e.g
|
92
100
|
## some data, more data -- comment here
|
93
101
|
|
94
|
-
if line =~ /^\s*#/
|
102
|
+
if line =~ /^\s*#/ ||
|
103
|
+
line =~ /^\s*--/ ||
|
104
|
+
line =~ /^\s*%/ ||
|
105
|
+
line =~ /^\s*__/
|
95
106
|
# skip komments and do NOT copy to result (keep comments secret!)
|
96
107
|
logger.debug 'skipping comment line'
|
97
108
|
next
|
@@ -99,7 +110,8 @@ class ValuesReader
|
|
99
110
|
|
100
111
|
if line =~ /^\s*$/
|
101
112
|
# kommentar oder leerzeile überspringen
|
102
|
-
|
113
|
+
blank_counter += 1
|
114
|
+
logger.debug "skipping blank line (#{blank_counter})"
|
103
115
|
next
|
104
116
|
end
|
105
117
|
|
@@ -114,71 +126,109 @@ class ValuesReader
|
|
114
126
|
line = line.strip
|
115
127
|
|
116
128
|
|
129
|
+
if line =~ /^-\s/ # check for group headers e.g. - St. James Brewery
|
130
|
+
logger.info " skip group header #{line} for now (fix/add soon)"
|
131
|
+
next
|
132
|
+
elsif line =~ /^\[([a-z][a-z]+)\]/
|
117
133
|
### check for multiline record
|
118
|
-
## must start with key
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
more_values.unshift( line.dup ) # add value upfront to array (first value); lets us keep (optional) tags as last entry; fix!! see valuereaderEx v2
|
125
|
-
next
|
126
|
-
else
|
127
|
-
# NB: new record clears/ends multi-line record
|
128
|
-
|
129
|
-
if inside_line # check if we already processed a line? if yes; yield last line
|
134
|
+
## must start with key e.g. [guiness]
|
135
|
+
## for now only supports key with letter a-z (no digits/numbers or underscore or dots)
|
136
|
+
|
137
|
+
if values.length > 0 # check if we already processed a record? if yes; yield last record (before reset)
|
138
|
+
attribs, more_values = find_key_n_title( values )
|
139
|
+
attribs = attribs.merge( @more_attribs ) # e.g. merge country_id and other defaults if present
|
130
140
|
yield( attribs, more_values )
|
131
|
-
|
132
|
-
|
141
|
+
values = []
|
142
|
+
end
|
143
|
+
|
144
|
+
inside_record = true
|
145
|
+
blank_counter = 0
|
146
|
+
|
147
|
+
# NB: every additional line is one value e.g. city:wien, etc.
|
148
|
+
# allows you to use any chars
|
149
|
+
logger.debug " multi-line record w/ key »#{$1}«"
|
150
|
+
|
151
|
+
values = [$1.dup] # add key as first value in ary
|
152
|
+
elsif inside_record && blank_counter == 0 && line =~ /\/{2}/ # check address line (must contain //)
|
153
|
+
values += [line.dup] # assume single value column (no need to escape commas)
|
154
|
+
elsif inside_record && blank_counter == 0 && line =~ /^[a-z][a-z0-9.]*[a-z0-9]:/ # check key: value pair
|
155
|
+
values += [line.dup] # assume single value column (no need to escape commas)
|
156
|
+
else
|
157
|
+
if inside_record && blank_counter == 0 # continue adding more values
|
158
|
+
values += find_values( line )
|
159
|
+
else # assume single-line (stand-alone / classic csv) record
|
160
|
+
if values.length > 0
|
161
|
+
attribs, more_values = find_key_n_title( values )
|
162
|
+
attribs = attribs.merge( @more_attribs ) # e.g. merge country_id and other defaults if present
|
163
|
+
yield( attribs, more_values )
|
164
|
+
values = []
|
165
|
+
end
|
166
|
+
inside_record = false
|
167
|
+
blank_counter = 0
|
168
|
+
values = find_values( line )
|
133
169
|
end
|
134
|
-
inside_line = true
|
135
170
|
end
|
136
171
|
|
172
|
+
end # each lines
|
137
173
|
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
## restore escaped commas (before split)
|
145
|
-
line = line.gsub( '♣', ',' )
|
174
|
+
# do NOT forget to yield last line (if present/processed)
|
175
|
+
if values.length > 0
|
176
|
+
attribs, more_values = find_key_n_title( values )
|
177
|
+
attribs = attribs.merge( @more_attribs ) # e.g. merge country_id and other defaults if present
|
178
|
+
yield( attribs, more_values )
|
179
|
+
end
|
146
180
|
|
147
|
-
|
181
|
+
end # method each_line
|
148
182
|
|
149
|
-
values = line.split( '›' )
|
150
|
-
|
151
|
-
# pass 1) remove leading and trailing whitespace for values
|
152
183
|
|
153
|
-
|
184
|
+
### todo:
|
185
|
+
## move to helper for reuse a la find_key_n_title ???
|
186
|
+
## use different/better name ?? e.g. find_values_in_line or split_line_into_values ??
|
187
|
+
def find_values( line )
|
188
|
+
## note returns an array of values (strings)
|
154
189
|
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
values = values.select do |value|
|
159
|
-
if value =~ /^#/ ## start with # treat it as a comment column; e.g. remove it
|
160
|
-
logger.debug " removing column with value »#{value}«"
|
161
|
-
false
|
162
|
-
else
|
163
|
-
true
|
164
|
-
end
|
165
|
-
end
|
190
|
+
meta_comma = '«KOMMA»'
|
191
|
+
meta_separator = '« »'
|
166
192
|
|
167
|
-
|
193
|
+
# guard escaped commas
|
194
|
+
# e.g. convert \, to «KOMMA»
|
195
|
+
line = line.gsub( '\,', meta_comma )
|
168
196
|
|
169
|
-
|
197
|
+
# note: use generic separator (allow us to configure separator)
|
198
|
+
# e.g « »
|
199
|
+
line = line.gsub( ',', meta_separator )
|
170
200
|
|
171
|
-
|
201
|
+
# restore escaped commas (before split)
|
202
|
+
line = line.gsub( meta_comma, ',' )
|
172
203
|
|
173
|
-
|
204
|
+
logger.debug "line: |»#{line}«|"
|
174
205
|
|
175
|
-
|
176
|
-
if inside_line
|
177
|
-
yield( attribs, more_values )
|
178
|
-
end
|
206
|
+
values = line.split( meta_separator )
|
179
207
|
|
208
|
+
# pass 1) remove leading and trailing whitespace for values
|
180
209
|
|
181
|
-
|
210
|
+
values = values.map { |value| value.strip }
|
211
|
+
|
212
|
+
|
213
|
+
##### todo/fix:
|
214
|
+
# !!!REMOVE!!!
|
215
|
+
# remove support of comment column? (NB: must NOT include commas)
|
216
|
+
# pass 2) remove comment columns
|
217
|
+
#
|
218
|
+
# todo/fix: check if still possible ?? - add an example here how it looks like/works
|
219
|
+
|
220
|
+
values = values.select do |value|
|
221
|
+
if value =~ /^#/ ## start with # treat it as a comment column; e.g. remove it
|
222
|
+
logger.info " removing column with value »#{value}«"
|
223
|
+
false
|
224
|
+
else
|
225
|
+
true
|
226
|
+
end
|
227
|
+
end
|
228
|
+
|
229
|
+
logger.debug " values: |»#{values.join('« »')}«|"
|
230
|
+
values
|
231
|
+
end
|
182
232
|
|
183
233
|
|
184
234
|
end # class ValuesReader
|
data/lib/textutils/version.rb
CHANGED
data/test/test_title_helper.rb
CHANGED
@@ -8,6 +8,7 @@
|
|
8
8
|
|
9
9
|
require 'helper'
|
10
10
|
|
11
|
+
|
11
12
|
class TestTitleHelper < MiniTest::Unit::TestCase
|
12
13
|
|
13
14
|
def test_title_to_key
|
@@ -15,7 +16,9 @@ class TestTitleHelper < MiniTest::Unit::TestCase
|
|
15
16
|
txt_io = [
|
16
17
|
[ 'São Paulo', 'saopaulo' ],
|
17
18
|
[ 'São Gonçalo', 'saogoncalo' ],
|
18
|
-
[ 'Výčepní', 'vycepni' ]
|
19
|
+
[ 'Výčepní', 'vycepni' ],
|
20
|
+
[ 'Bock ‹Damm›', 'bockdamm' ],
|
21
|
+
[ '‹Estrella› ‹Damm› Inedit', 'estrelladamminedit' ]
|
19
22
|
]
|
20
23
|
|
21
24
|
txt_io.each do |txt|
|
@@ -0,0 +1,311 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
###
|
4
|
+
# to run use
|
5
|
+
# ruby -I ./lib -I ./test test/test_helper.rb
|
6
|
+
# or better
|
7
|
+
# rake test
|
8
|
+
|
9
|
+
require 'helper'
|
10
|
+
|
11
|
+
class TestValuesReader < MiniTest::Unit::TestCase
|
12
|
+
|
13
|
+
def test_escape_comma
|
14
|
+
# note: double espace comma e.g. \\, becomes literal \,
|
15
|
+
txt =<<EOS
|
16
|
+
[fuller]
|
17
|
+
Fuller\\, Smith & Turner, 1845
|
18
|
+
The Griffin Brewery // Chiswick Lane South // London, W4 2QB
|
19
|
+
brands: Fuller's
|
20
|
+
|
21
|
+
fuller, Fuller\\, Smith & Turner, 1845, The Griffin Brewery // Chiswick Lane South // London\\, W4 2QB
|
22
|
+
EOS
|
23
|
+
|
24
|
+
pp txt
|
25
|
+
|
26
|
+
reader = ValuesReader.new( txt )
|
27
|
+
|
28
|
+
i = 0
|
29
|
+
reader.each_line do |attribs, values|
|
30
|
+
i += 1
|
31
|
+
|
32
|
+
puts "attribs:"
|
33
|
+
pp attribs
|
34
|
+
puts "values:"
|
35
|
+
pp values
|
36
|
+
|
37
|
+
if i == 1
|
38
|
+
assert_equal attribs[:key], 'fuller'
|
39
|
+
assert_equal attribs[:title], 'Fuller, Smith & Turner'
|
40
|
+
assert_equal attribs[:grade], nil
|
41
|
+
assert_equal attribs[:synonyms], nil
|
42
|
+
|
43
|
+
assert_equal values[0], '1845'
|
44
|
+
assert_equal values[1], 'The Griffin Brewery // Chiswick Lane South // London, W4 2QB'
|
45
|
+
assert_equal values[2], "brands: Fuller's"
|
46
|
+
elsif i == 2
|
47
|
+
assert_equal attribs[:key], 'fuller'
|
48
|
+
assert_equal attribs[:title], 'Fuller, Smith & Turner'
|
49
|
+
assert_equal attribs[:grade], nil
|
50
|
+
assert_equal attribs[:synonyms], nil
|
51
|
+
|
52
|
+
assert_equal values[0], '1845'
|
53
|
+
assert_equal values[1], 'The Griffin Brewery // Chiswick Lane South // London, W4 2QB'
|
54
|
+
else
|
55
|
+
assert_equal true, false # should not get here
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end # test_escape_comma
|
59
|
+
|
60
|
+
|
61
|
+
def test_mixed
|
62
|
+
txt =<<EOS
|
63
|
+
##########
|
64
|
+
# Wien Umbgebung
|
65
|
+
|
66
|
+
[schwechat]
|
67
|
+
Brauerei Schwechat (Brau Union) **, 1796
|
68
|
+
www.schwechater.at
|
69
|
+
2320 Schwechat // Mautner Markhof-Straße 11
|
70
|
+
brands: Schwechater
|
71
|
+
brau_union # Part of Brau Union
|
72
|
+
|
73
|
+
|
74
|
+
#############
|
75
|
+
# Waldviertel
|
76
|
+
|
77
|
+
zwettler, Zwettler Brauerei|Privatbrauerei Zwettl **, 1709, www.zwettler.at, 3910 Zwettl // Syrnauer Straße 22-25
|
78
|
+
weitra, Weitra Bräu Bierwerkstatt|Brauerei Weitra *, 1321, www.bierwerkstatt.at, 3970 Weitra // Sparkasseplatz 160, zwettler # Part of Zwettler
|
79
|
+
|
80
|
+
#############
|
81
|
+
# Weinviertel
|
82
|
+
|
83
|
+
[hubertus]
|
84
|
+
Hubertus Bräu *, 1454
|
85
|
+
www.hubertus.at
|
86
|
+
2136 Laa/Thaya // Hubertusgasse 1
|
87
|
+
brands: Hubertus
|
88
|
+
|
89
|
+
egger, Privatbrauerei Fritz Egger **, 1978, www.egger-bier.at, 3105 Unterradlberg // Tiroler Straße 18
|
90
|
+
EOS
|
91
|
+
|
92
|
+
reader = ValuesReader.new( txt )
|
93
|
+
|
94
|
+
i = 0
|
95
|
+
reader.each_line do |attribs, values|
|
96
|
+
i += 1
|
97
|
+
|
98
|
+
puts "attribs:"
|
99
|
+
pp attribs
|
100
|
+
puts "values:"
|
101
|
+
pp values
|
102
|
+
|
103
|
+
if i == 1
|
104
|
+
assert_equal attribs[:key], 'schwechat'
|
105
|
+
assert_equal attribs[:title], 'Brauerei Schwechat (Brau Union)'
|
106
|
+
assert_equal attribs[:grade], 2
|
107
|
+
assert_equal attribs[:synonyms], nil
|
108
|
+
|
109
|
+
assert_equal values[0], '1796'
|
110
|
+
assert_equal values[1], 'www.schwechater.at'
|
111
|
+
assert_equal values[2], '2320 Schwechat // Mautner Markhof-Straße 11'
|
112
|
+
assert_equal values[3], 'brands: Schwechater'
|
113
|
+
assert_equal values[-1], 'brau_union'
|
114
|
+
elsif i == 2
|
115
|
+
assert_equal attribs[:key], 'zwettler'
|
116
|
+
assert_equal attribs[:title], 'Zwettler Brauerei'
|
117
|
+
assert_equal attribs[:grade], 2
|
118
|
+
assert_equal attribs[:synonyms], 'Privatbrauerei Zwettl'
|
119
|
+
|
120
|
+
assert_equal values[0], '1709'
|
121
|
+
assert_equal values[1], 'www.zwettler.at'
|
122
|
+
assert_equal values[2], '3910 Zwettl // Syrnauer Straße 22-25'
|
123
|
+
elsif i == 3
|
124
|
+
assert_equal attribs[:key], 'weitra'
|
125
|
+
assert_equal attribs[:title], 'Weitra Bräu Bierwerkstatt'
|
126
|
+
assert_equal attribs[:grade], 3
|
127
|
+
assert_equal attribs[:synonyms], 'Brauerei Weitra'
|
128
|
+
|
129
|
+
assert_equal values[0], '1321'
|
130
|
+
assert_equal values[1], 'www.bierwerkstatt.at'
|
131
|
+
assert_equal values[2], '3970 Weitra // Sparkasseplatz 160'
|
132
|
+
assert_equal values[-1], 'zwettler'
|
133
|
+
elsif i == 4
|
134
|
+
assert_equal attribs[:key], 'hubertus'
|
135
|
+
assert_equal attribs[:title], 'Hubertus Bräu'
|
136
|
+
assert_equal attribs[:grade], 3
|
137
|
+
assert_equal attribs[:synonyms], nil
|
138
|
+
|
139
|
+
assert_equal values[0], '1454'
|
140
|
+
assert_equal values[1], 'www.hubertus.at'
|
141
|
+
assert_equal values[2], '2136 Laa/Thaya // Hubertusgasse 1'
|
142
|
+
assert_equal values[3], 'brands: Hubertus'
|
143
|
+
elsif i == 5
|
144
|
+
assert_equal attribs[:key], 'egger'
|
145
|
+
assert_equal attribs[:title], 'Privatbrauerei Fritz Egger'
|
146
|
+
assert_equal attribs[:grade], 2
|
147
|
+
assert_equal attribs[:synonyms], nil
|
148
|
+
|
149
|
+
assert_equal values[0], '1978'
|
150
|
+
assert_equal values[1], 'www.egger-bier.at'
|
151
|
+
assert_equal values[2], '3105 Unterradlberg // Tiroler Straße 18'
|
152
|
+
else
|
153
|
+
assert_equal true, false # should not get here
|
154
|
+
end
|
155
|
+
end
|
156
|
+
end # test_mixed
|
157
|
+
|
158
|
+
|
159
|
+
def test_multi_line_records
|
160
|
+
txt =<<EOS
|
161
|
+
##########
|
162
|
+
# Wien Umbgebung
|
163
|
+
|
164
|
+
[schwechat]
|
165
|
+
Brauerei Schwechat (Brau Union) **, 1796
|
166
|
+
www.schwechater.at
|
167
|
+
2320 Schwechat // Mautner Markhof-Straße 11
|
168
|
+
brands: Schwechater
|
169
|
+
brau_union # Part of Brau Union
|
170
|
+
|
171
|
+
|
172
|
+
#############
|
173
|
+
# Waldviertel
|
174
|
+
|
175
|
+
[zwettler]
|
176
|
+
Zwettler Brauerei|Privatbrauerei Zwettl **, 1709
|
177
|
+
www.zwettler.at
|
178
|
+
3910 Zwettl // Syrnauer Straße 22-25
|
179
|
+
brands: Zwettler
|
180
|
+
EOS
|
181
|
+
|
182
|
+
reader = ValuesReader.new( txt )
|
183
|
+
|
184
|
+
i = 0
|
185
|
+
reader.each_line do |attribs, values|
|
186
|
+
i += 1
|
187
|
+
|
188
|
+
puts "attribs:"
|
189
|
+
pp attribs
|
190
|
+
puts "values:"
|
191
|
+
pp values
|
192
|
+
|
193
|
+
if i == 1
|
194
|
+
assert_equal attribs[:key], 'schwechat'
|
195
|
+
assert_equal attribs[:title], 'Brauerei Schwechat (Brau Union)'
|
196
|
+
assert_equal attribs[:grade], 2
|
197
|
+
assert_equal attribs[:synonyms], nil
|
198
|
+
|
199
|
+
assert_equal values[0], '1796'
|
200
|
+
assert_equal values[1], 'www.schwechater.at'
|
201
|
+
assert_equal values[2], '2320 Schwechat // Mautner Markhof-Straße 11'
|
202
|
+
assert_equal values[3], 'brands: Schwechater'
|
203
|
+
assert_equal values[-1], 'brau_union'
|
204
|
+
elsif i == 2
|
205
|
+
assert_equal attribs[:key], 'zwettler'
|
206
|
+
assert_equal attribs[:title], 'Zwettler Brauerei'
|
207
|
+
assert_equal attribs[:grade], 2
|
208
|
+
assert_equal attribs[:synonyms], 'Privatbrauerei Zwettl'
|
209
|
+
|
210
|
+
assert_equal values[0], '1709'
|
211
|
+
assert_equal values[1], 'www.zwettler.at'
|
212
|
+
assert_equal values[2], '3910 Zwettl // Syrnauer Straße 22-25'
|
213
|
+
assert_equal values[3], 'brands: Zwettler'
|
214
|
+
else
|
215
|
+
assert_equal true, false # should not get here
|
216
|
+
end
|
217
|
+
end
|
218
|
+
end # test_multi_line_records
|
219
|
+
|
220
|
+
|
221
|
+
def test_classic_csv_records
|
222
|
+
|
223
|
+
txt =<<EOS
|
224
|
+
arsenal, Arsenal|Arsenal FC|FC Arsenal, ARS, city:london
|
225
|
+
manunited, Manchester United|Man Utd|Manchester U., MUN, city:manchester
|
226
|
+
liverpool, Liverpool|Liverpool FC|FC Liverpool, LIV, city:liverpool
|
227
|
+
EOS
|
228
|
+
|
229
|
+
reader = ValuesReader.new( txt )
|
230
|
+
|
231
|
+
i = 0
|
232
|
+
reader.each_line do |attribs, values|
|
233
|
+
i += 1
|
234
|
+
|
235
|
+
puts "attribs:"
|
236
|
+
pp attribs
|
237
|
+
puts "values:"
|
238
|
+
pp values
|
239
|
+
|
240
|
+
if i == 1
|
241
|
+
assert_equal attribs[:key], 'arsenal'
|
242
|
+
assert_equal attribs[:title], 'Arsenal'
|
243
|
+
assert_equal attribs[:synonyms], 'Arsenal FC|FC Arsenal'
|
244
|
+
|
245
|
+
assert_equal values[0], 'ARS'
|
246
|
+
assert_equal values[1], 'city:london'
|
247
|
+
elsif i == 2
|
248
|
+
elsif i == 3
|
249
|
+
else
|
250
|
+
assert_equal true, false # should not get here
|
251
|
+
end
|
252
|
+
end
|
253
|
+
end # test_classic_csv_records
|
254
|
+
|
255
|
+
|
256
|
+
def test_autogen_keys
|
257
|
+
txt =<<EOS
|
258
|
+
Ottakringer Helles, 5.2 %, 11.8°
|
259
|
+
Ottakringer Gold Fassl Spezial, 5.6 %, 12.7°
|
260
|
+
Ottakringer (Gold Fassl) Pils, 4.6 %, 11.2°
|
261
|
+
Ottakringer (Gold Fassl) Pur {Bio}, 5.2 %, 11.8°, bio
|
262
|
+
EOS
|
263
|
+
|
264
|
+
reader = ValuesReader.new( txt )
|
265
|
+
|
266
|
+
i = 0
|
267
|
+
reader.each_line do |attribs, values|
|
268
|
+
i += 1
|
269
|
+
|
270
|
+
puts "attribs:"
|
271
|
+
pp attribs
|
272
|
+
puts "values:"
|
273
|
+
pp values
|
274
|
+
|
275
|
+
if i == 1
|
276
|
+
assert_equal attribs[:key], 'ottakringerhelles'
|
277
|
+
assert_equal attribs[:title], 'Ottakringer Helles'
|
278
|
+
assert_equal attribs[:synonyms], nil
|
279
|
+
|
280
|
+
assert_equal values[0], '5.2 %'
|
281
|
+
assert_equal values[1], '11.8°'
|
282
|
+
elsif i == 2
|
283
|
+
assert_equal attribs[:key], 'ottakringergoldfasslspezial'
|
284
|
+
assert_equal attribs[:title], 'Ottakringer Gold Fassl Spezial'
|
285
|
+
assert_equal attribs[:synonyms], nil
|
286
|
+
|
287
|
+
assert_equal values[0], '5.6 %'
|
288
|
+
assert_equal values[1], '12.7°'
|
289
|
+
elsif i == 3
|
290
|
+
assert_equal attribs[:key], 'ottakringerpils'
|
291
|
+
assert_equal attribs[:title], 'Ottakringer (Gold Fassl) Pils'
|
292
|
+
assert_equal attribs[:synonyms], nil
|
293
|
+
|
294
|
+
assert_equal values[0], '4.6 %'
|
295
|
+
assert_equal values[1], '11.2°'
|
296
|
+
elsif i == 4
|
297
|
+
assert_equal attribs[:key], 'ottakringerpur'
|
298
|
+
assert_equal attribs[:title], 'Ottakringer (Gold Fassl) Pur {Bio}'
|
299
|
+
assert_equal attribs[:synonyms], nil
|
300
|
+
|
301
|
+
assert_equal values[0], '5.2 %'
|
302
|
+
assert_equal values[1], '11.8°'
|
303
|
+
assert_equal values[-1], 'bio'
|
304
|
+
else
|
305
|
+
assert_equal true, false # should not get here
|
306
|
+
end
|
307
|
+
end
|
308
|
+
end # test_autogen_keys
|
309
|
+
|
310
|
+
|
311
|
+
end # class TestValuesReader
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: textutils
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.8.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2014-01-17 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: logutils
|
16
|
-
requirement: &
|
16
|
+
requirement: &3086976 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ~>
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: '0.5'
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *3086976
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: rdoc
|
27
|
-
requirement: &
|
27
|
+
requirement: &3085764 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ~>
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: '4.0'
|
33
33
|
type: :development
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *3085764
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: hoe
|
38
|
-
requirement: &
|
38
|
+
requirement: &3082920 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ~>
|
@@ -43,17 +43,19 @@ dependencies:
|
|
43
43
|
version: '3.7'
|
44
44
|
type: :development
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *3082920
|
47
47
|
description: textutils - Text Filters, Helpers, Readers and More
|
48
48
|
email: ruby-talk@ruby-lang.org
|
49
49
|
executables: []
|
50
50
|
extensions: []
|
51
51
|
extra_rdoc_files:
|
52
|
+
- History.md
|
52
53
|
- Manifest.txt
|
54
|
+
- README.md
|
53
55
|
files:
|
54
|
-
- History.
|
56
|
+
- History.md
|
55
57
|
- Manifest.txt
|
56
|
-
- README.
|
58
|
+
- README.md
|
57
59
|
- Rakefile
|
58
60
|
- lib/textutils.rb
|
59
61
|
- lib/textutils/classifier.rb
|
@@ -82,6 +84,7 @@ files:
|
|
82
84
|
- test/test_hypertext_helper.rb
|
83
85
|
- test/test_title_helper.rb
|
84
86
|
- test/test_unicode_helper.rb
|
87
|
+
- test/test_values_reader.rb
|
85
88
|
- .gemtest
|
86
89
|
homepage: https://github.com/rubylibs/textutils
|
87
90
|
licenses:
|
@@ -89,7 +92,7 @@ licenses:
|
|
89
92
|
post_install_message:
|
90
93
|
rdoc_options:
|
91
94
|
- --main
|
92
|
-
- README.
|
95
|
+
- README.md
|
93
96
|
require_paths:
|
94
97
|
- lib
|
95
98
|
required_ruby_version: !ruby/object:Gem::Requirement
|
@@ -114,3 +117,4 @@ test_files:
|
|
114
117
|
- test/test_hypertext_helper.rb
|
115
118
|
- test/test_title_helper.rb
|
116
119
|
- test/test_unicode_helper.rb
|
120
|
+
- test/test_values_reader.rb
|