textutils 0.7.1 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/{History.markdown → History.md} +0 -0
- data/Manifest.txt +3 -2
- data/{README.markdown → README.md} +4 -1
- data/Rakefile +2 -2
- data/lib/textutils/helper/title_helper.rb +12 -3
- data/lib/textutils/helper/value_helper.rb +11 -7
- data/lib/textutils/reader/values_reader.rb +108 -58
- data/lib/textutils/version.rb +2 -1
- data/test/test_title_helper.rb +4 -1
- data/test/test_values_reader.rb +311 -0
- metadata +15 -11
File without changes
|
data/Manifest.txt
CHANGED
@@ -1,6 +1,6 @@
|
|
1
|
-
History.
|
1
|
+
History.md
|
2
2
|
Manifest.txt
|
3
|
-
README.
|
3
|
+
README.md
|
4
4
|
Rakefile
|
5
5
|
lib/textutils.rb
|
6
6
|
lib/textutils/classifier.rb
|
@@ -29,3 +29,4 @@ test/helper.rb
|
|
29
29
|
test/test_hypertext_helper.rb
|
30
30
|
test/test_title_helper.rb
|
31
31
|
test/test_unicode_helper.rb
|
32
|
+
test/test_values_reader.rb
|
@@ -1,4 +1,6 @@
|
|
1
|
-
# `textutils`
|
1
|
+
# `textutils`
|
2
|
+
|
3
|
+
Text Filters, Helpers, Readers and More in Ruby
|
2
4
|
|
3
5
|
* home :: [github.com/rubylibs/textutils](https://github.com/rubylibs/textutils)
|
4
6
|
* bugs :: [github.com/rubylibs/textutils/issues](https://github.com/rubylibs/textutils/issues)
|
@@ -7,6 +9,7 @@
|
|
7
9
|
* forum :: [ruby-talk@ruby-lang.org](www.ruby-lang.org/en/community/mailing-lists/)
|
8
10
|
|
9
11
|
|
12
|
+
|
10
13
|
## Filters
|
11
14
|
|
12
15
|
### `comments_percent_style` Filter
|
data/Rakefile
CHANGED
@@ -14,8 +14,8 @@ Hoe.spec 'textutils' do
|
|
14
14
|
self.email = 'ruby-talk@ruby-lang.org'
|
15
15
|
|
16
16
|
# switch extension to .markdown for gihub formatting
|
17
|
-
self.readme_file = 'README.
|
18
|
-
self.history_file = 'History.
|
17
|
+
self.readme_file = 'README.md'
|
18
|
+
self.history_file = 'History.md'
|
19
19
|
|
20
20
|
self.extra_deps = [
|
21
21
|
['logutils', '~> 0.5'] # e.g. >= 0.5 <= 1.0
|
@@ -10,13 +10,20 @@ module TextUtils
|
|
10
10
|
# lets us use "classic" web helpers a la rails
|
11
11
|
# find a good name for sub module - Reader? Fixtures? Values? Parser?
|
12
12
|
|
13
|
+
def strip_part_markers( title ) # use different name e.g. strip_name_markers/strip_name_enclosure etc.??
|
14
|
+
# remove optional part markers
|
15
|
+
# e.g. Bock ‹Damm› becomes => Bock Damm
|
16
|
+
# ‹Estrella› ‹Damm› Inedit becomes => Estrella Damm Inedit
|
13
17
|
|
18
|
+
# todo: also allow reguluar <> for easy typing/input ??? why? why not? used for anything else already?
|
19
|
+
title.gsub( /[‹›]/, '' )
|
20
|
+
end
|
14
21
|
|
15
22
|
def strip_translations( title )
|
16
23
|
# remove optional english translation in square brackets ([])
|
17
24
|
# e.g. Wien [Vienna] => Wien
|
18
25
|
|
19
|
-
title.gsub( /\[
|
26
|
+
title.gsub( /\[[^\]]+\]/, '' )
|
20
27
|
end
|
21
28
|
|
22
29
|
def strip_subtitles( title )
|
@@ -24,7 +31,7 @@ module TextUtils
|
|
24
31
|
# e.g. Las Palmas (de Gran Canaria) => Las Palmas
|
25
32
|
# Palma (de Mallorca) => Palma
|
26
33
|
|
27
|
-
title.gsub( /\(
|
34
|
+
title.gsub( /\([^\)]+\)/, '' )
|
28
35
|
end
|
29
36
|
|
30
37
|
def strip_tags( title ) # todo: use an alias or rename for better name ??
|
@@ -34,7 +41,7 @@ module TextUtils
|
|
34
41
|
#
|
35
42
|
# todo: use for autotags? e.g. {Bio} => bio
|
36
43
|
|
37
|
-
title.gsub( /\{
|
44
|
+
title.gsub( /\{[^\}]+\}/, '' )
|
38
45
|
end
|
39
46
|
|
40
47
|
def strip_whitespaces( title )
|
@@ -54,6 +61,8 @@ module TextUtils
|
|
54
61
|
## NB: downcase does NOT work for accented chars (thus, include in alternatives)
|
55
62
|
key = title.downcase
|
56
63
|
|
64
|
+
key = strip_part_markers( key ) # e.g. ‹Estrella› ‹Damm› Inedit becomes => Estrella Damm Inedit
|
65
|
+
|
57
66
|
key = strip_translations( key )
|
58
67
|
|
59
68
|
key = strip_subtitles( key )
|
@@ -12,7 +12,7 @@ module TextUtils
|
|
12
12
|
def match_country( value )
|
13
13
|
if value =~ /^country:/ # country:
|
14
14
|
country_key = value[8..-1] # cut off country: prefix
|
15
|
-
country = WorldDb::
|
15
|
+
country = WorldDb::Model::Country.find_by_key!( country_key )
|
16
16
|
yield( country )
|
17
17
|
true # bingo - match found
|
18
18
|
else
|
@@ -23,7 +23,7 @@ module TextUtils
|
|
23
23
|
def match_supra( value )
|
24
24
|
if value =~ /^supra:/ # supra:
|
25
25
|
country_key = value[6..-1] # cut off supra: prefix
|
26
|
-
country = WorldDb::
|
26
|
+
country = WorldDb::Model::Country.find_by_key!( country_key )
|
27
27
|
yield( country )
|
28
28
|
true # bingo - match found
|
29
29
|
else
|
@@ -52,11 +52,11 @@ module TextUtils
|
|
52
52
|
def match_region_for_country( value, country_id ) ## NB: required country_id
|
53
53
|
if value =~ /^region:/ ## region:
|
54
54
|
region_key = value[7..-1] ## cut off region: prefix
|
55
|
-
region = WorldDb::
|
55
|
+
region = WorldDb::Model::Region.find_by_key_and_country_id!( region_key, country_id )
|
56
56
|
yield( region )
|
57
57
|
true # bingo - match found
|
58
58
|
elsif is_region?( value ) ## assume region code e.g. TX or N
|
59
|
-
region = WorldDb::
|
59
|
+
region = WorldDb::Model::Region.find_by_key_and_country_id!( value.downcase, country_id )
|
60
60
|
yield( region )
|
61
61
|
true # bingo - match found
|
62
62
|
else
|
@@ -68,7 +68,7 @@ module TextUtils
|
|
68
68
|
def match_city( value ) # NB: might be nil (city not found)
|
69
69
|
if value =~ /^city:/ ## city:
|
70
70
|
city_key = value[5..-1] ## cut off city: prefix
|
71
|
-
city = WorldDb::
|
71
|
+
city = WorldDb::Model::City.find_by_key( city_key )
|
72
72
|
yield( city ) # NB: might be nil (city not found)
|
73
73
|
true # bingo - match found
|
74
74
|
else
|
@@ -80,7 +80,7 @@ module TextUtils
|
|
80
80
|
def match_metro( value )
|
81
81
|
if value =~ /^metro:/ ## metro:
|
82
82
|
city_key = value[6..-1] ## cut off metro: prefix
|
83
|
-
city = WorldDb::
|
83
|
+
city = WorldDb::Model::City.find_by_key!( city_key ) # NB: parent city/metro required, that is, lookup w/ !
|
84
84
|
yield( city )
|
85
85
|
true # bingo - match found
|
86
86
|
else
|
@@ -113,7 +113,7 @@ module TextUtils
|
|
113
113
|
def match_brewery( value )
|
114
114
|
if value =~ /^by:/ ## by: -brewed by/brewery
|
115
115
|
brewery_key = value[3..-1] ## cut off by: prefix
|
116
|
-
brewery = BeerDb::
|
116
|
+
brewery = BeerDb::Model::Brewery.find_by_key!( brewery_key )
|
117
117
|
yield( brewery )
|
118
118
|
true # bingo - match found
|
119
119
|
else
|
@@ -288,6 +288,10 @@ module TextUtils
|
|
288
288
|
attribs[:grade] = grade
|
289
289
|
end
|
290
290
|
|
291
|
+
## fix/todo: add find parts ??
|
292
|
+
# e.g. ‹Estrella› ‹Damm› Inedit
|
293
|
+
# becomes => title: 'Estrella Damm Inedit' and parts: ['Estrella','Damm']
|
294
|
+
|
291
295
|
## title (split of optional synonyms)
|
292
296
|
# e.g. FC Bayern Muenchen|Bayern Muenchen|Bayern
|
293
297
|
titles = title_col.split('|')
|
@@ -58,11 +58,20 @@ class ValuesReader
|
|
58
58
|
|
59
59
|
|
60
60
|
def initialize( path, more_attribs={} )
|
61
|
-
@path = path
|
62
|
-
|
63
61
|
@more_attribs = more_attribs
|
64
62
|
|
65
|
-
|
63
|
+
### workaround/hack
|
64
|
+
# if path includes newline assume it's a string buffer not a file name
|
65
|
+
# fix: use from_file an from_string etc. for ctor
|
66
|
+
# check what is the best convention (follow ???)
|
67
|
+
|
68
|
+
if path =~ /\n/m
|
69
|
+
@path = 'stringio' # what name to use ???
|
70
|
+
@data = path.dup # make a duplicate ?? why? why not?
|
71
|
+
else
|
72
|
+
@path = path
|
73
|
+
@data = File.read_utf8( @path )
|
74
|
+
end
|
66
75
|
end
|
67
76
|
|
68
77
|
|
@@ -76,13 +85,12 @@ class ValuesReader
|
|
76
85
|
|
77
86
|
def each_line # support multi line records
|
78
87
|
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
88
|
+
inside_record = false
|
89
|
+
blank_counter = 0 # count of number of blank lines (note: 1+ blank lines clear multi-line record)
|
90
|
+
values = []
|
83
91
|
|
84
92
|
@data.each_line do |line|
|
85
|
-
|
93
|
+
|
86
94
|
## allow alternative comment lines
|
87
95
|
## e.g. -- comment or
|
88
96
|
## % comment
|
@@ -91,7 +99,10 @@ class ValuesReader
|
|
91
99
|
## NB: for now alternative comment lines not allowed as end of line style e.g
|
92
100
|
## some data, more data -- comment here
|
93
101
|
|
94
|
-
if line =~ /^\s*#/
|
102
|
+
if line =~ /^\s*#/ ||
|
103
|
+
line =~ /^\s*--/ ||
|
104
|
+
line =~ /^\s*%/ ||
|
105
|
+
line =~ /^\s*__/
|
95
106
|
# skip komments and do NOT copy to result (keep comments secret!)
|
96
107
|
logger.debug 'skipping comment line'
|
97
108
|
next
|
@@ -99,7 +110,8 @@ class ValuesReader
|
|
99
110
|
|
100
111
|
if line =~ /^\s*$/
|
101
112
|
# kommentar oder leerzeile überspringen
|
102
|
-
|
113
|
+
blank_counter += 1
|
114
|
+
logger.debug "skipping blank line (#{blank_counter})"
|
103
115
|
next
|
104
116
|
end
|
105
117
|
|
@@ -114,71 +126,109 @@ class ValuesReader
|
|
114
126
|
line = line.strip
|
115
127
|
|
116
128
|
|
129
|
+
if line =~ /^-\s/ # check for group headers e.g. - St. James Brewery
|
130
|
+
logger.info " skip group header #{line} for now (fix/add soon)"
|
131
|
+
next
|
132
|
+
elsif line =~ /^\[([a-z][a-z]+)\]/
|
117
133
|
### check for multiline record
|
118
|
-
## must start with key
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
more_values.unshift( line.dup ) # add value upfront to array (first value); lets us keep (optional) tags as last entry; fix!! see valuereaderEx v2
|
125
|
-
next
|
126
|
-
else
|
127
|
-
# NB: new record clears/ends multi-line record
|
128
|
-
|
129
|
-
if inside_line # check if we already processed a line? if yes; yield last line
|
134
|
+
## must start with key e.g. [guiness]
|
135
|
+
## for now only supports key with letter a-z (no digits/numbers or underscore or dots)
|
136
|
+
|
137
|
+
if values.length > 0 # check if we already processed a record? if yes; yield last record (before reset)
|
138
|
+
attribs, more_values = find_key_n_title( values )
|
139
|
+
attribs = attribs.merge( @more_attribs ) # e.g. merge country_id and other defaults if present
|
130
140
|
yield( attribs, more_values )
|
131
|
-
|
132
|
-
|
141
|
+
values = []
|
142
|
+
end
|
143
|
+
|
144
|
+
inside_record = true
|
145
|
+
blank_counter = 0
|
146
|
+
|
147
|
+
# NB: every additional line is one value e.g. city:wien, etc.
|
148
|
+
# allows you to use any chars
|
149
|
+
logger.debug " multi-line record w/ key »#{$1}«"
|
150
|
+
|
151
|
+
values = [$1.dup] # add key as first value in ary
|
152
|
+
elsif inside_record && blank_counter == 0 && line =~ /\/{2}/ # check address line (must contain //)
|
153
|
+
values += [line.dup] # assume single value column (no need to escape commas)
|
154
|
+
elsif inside_record && blank_counter == 0 && line =~ /^[a-z][a-z0-9.]*[a-z0-9]:/ # check key: value pair
|
155
|
+
values += [line.dup] # assume single value column (no need to escape commas)
|
156
|
+
else
|
157
|
+
if inside_record && blank_counter == 0 # continue adding more values
|
158
|
+
values += find_values( line )
|
159
|
+
else # assume single-line (stand-alone / classic csv) record
|
160
|
+
if values.length > 0
|
161
|
+
attribs, more_values = find_key_n_title( values )
|
162
|
+
attribs = attribs.merge( @more_attribs ) # e.g. merge country_id and other defaults if present
|
163
|
+
yield( attribs, more_values )
|
164
|
+
values = []
|
165
|
+
end
|
166
|
+
inside_record = false
|
167
|
+
blank_counter = 0
|
168
|
+
values = find_values( line )
|
133
169
|
end
|
134
|
-
inside_line = true
|
135
170
|
end
|
136
171
|
|
172
|
+
end # each lines
|
137
173
|
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
## restore escaped commas (before split)
|
145
|
-
line = line.gsub( '♣', ',' )
|
174
|
+
# do NOT forget to yield last line (if present/processed)
|
175
|
+
if values.length > 0
|
176
|
+
attribs, more_values = find_key_n_title( values )
|
177
|
+
attribs = attribs.merge( @more_attribs ) # e.g. merge country_id and other defaults if present
|
178
|
+
yield( attribs, more_values )
|
179
|
+
end
|
146
180
|
|
147
|
-
|
181
|
+
end # method each_line
|
148
182
|
|
149
|
-
values = line.split( '›' )
|
150
|
-
|
151
|
-
# pass 1) remove leading and trailing whitespace for values
|
152
183
|
|
153
|
-
|
184
|
+
### todo:
|
185
|
+
## move to helper for reuse a la find_key_n_title ???
|
186
|
+
## use different/better name ?? e.g. find_values_in_line or split_line_into_values ??
|
187
|
+
def find_values( line )
|
188
|
+
## note returns an array of values (strings)
|
154
189
|
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
values = values.select do |value|
|
159
|
-
if value =~ /^#/ ## start with # treat it as a comment column; e.g. remove it
|
160
|
-
logger.debug " removing column with value »#{value}«"
|
161
|
-
false
|
162
|
-
else
|
163
|
-
true
|
164
|
-
end
|
165
|
-
end
|
190
|
+
meta_comma = '«KOMMA»'
|
191
|
+
meta_separator = '« »'
|
166
192
|
|
167
|
-
|
193
|
+
# guard escaped commas
|
194
|
+
# e.g. convert \, to «KOMMA»
|
195
|
+
line = line.gsub( '\,', meta_comma )
|
168
196
|
|
169
|
-
|
197
|
+
# note: use generic separator (allow us to configure separator)
|
198
|
+
# e.g « »
|
199
|
+
line = line.gsub( ',', meta_separator )
|
170
200
|
|
171
|
-
|
201
|
+
# restore escaped commas (before split)
|
202
|
+
line = line.gsub( meta_comma, ',' )
|
172
203
|
|
173
|
-
|
204
|
+
logger.debug "line: |»#{line}«|"
|
174
205
|
|
175
|
-
|
176
|
-
if inside_line
|
177
|
-
yield( attribs, more_values )
|
178
|
-
end
|
206
|
+
values = line.split( meta_separator )
|
179
207
|
|
208
|
+
# pass 1) remove leading and trailing whitespace for values
|
180
209
|
|
181
|
-
|
210
|
+
values = values.map { |value| value.strip }
|
211
|
+
|
212
|
+
|
213
|
+
##### todo/fix:
|
214
|
+
# !!!REMOVE!!!
|
215
|
+
# remove support of comment column? (NB: must NOT include commas)
|
216
|
+
# pass 2) remove comment columns
|
217
|
+
#
|
218
|
+
# todo/fix: check if still possible ?? - add an example here how it looks like/works
|
219
|
+
|
220
|
+
values = values.select do |value|
|
221
|
+
if value =~ /^#/ ## start with # treat it as a comment column; e.g. remove it
|
222
|
+
logger.info " removing column with value »#{value}«"
|
223
|
+
false
|
224
|
+
else
|
225
|
+
true
|
226
|
+
end
|
227
|
+
end
|
228
|
+
|
229
|
+
logger.debug " values: |»#{values.join('« »')}«|"
|
230
|
+
values
|
231
|
+
end
|
182
232
|
|
183
233
|
|
184
234
|
end # class ValuesReader
|
data/lib/textutils/version.rb
CHANGED
data/test/test_title_helper.rb
CHANGED
@@ -8,6 +8,7 @@
|
|
8
8
|
|
9
9
|
require 'helper'
|
10
10
|
|
11
|
+
|
11
12
|
class TestTitleHelper < MiniTest::Unit::TestCase
|
12
13
|
|
13
14
|
def test_title_to_key
|
@@ -15,7 +16,9 @@ class TestTitleHelper < MiniTest::Unit::TestCase
|
|
15
16
|
txt_io = [
|
16
17
|
[ 'São Paulo', 'saopaulo' ],
|
17
18
|
[ 'São Gonçalo', 'saogoncalo' ],
|
18
|
-
[ 'Výčepní', 'vycepni' ]
|
19
|
+
[ 'Výčepní', 'vycepni' ],
|
20
|
+
[ 'Bock ‹Damm›', 'bockdamm' ],
|
21
|
+
[ '‹Estrella› ‹Damm› Inedit', 'estrelladamminedit' ]
|
19
22
|
]
|
20
23
|
|
21
24
|
txt_io.each do |txt|
|
@@ -0,0 +1,311 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
###
|
4
|
+
# to run use
|
5
|
+
# ruby -I ./lib -I ./test test/test_helper.rb
|
6
|
+
# or better
|
7
|
+
# rake test
|
8
|
+
|
9
|
+
require 'helper'
|
10
|
+
|
11
|
+
class TestValuesReader < MiniTest::Unit::TestCase
|
12
|
+
|
13
|
+
def test_escape_comma
|
14
|
+
# note: double espace comma e.g. \\, becomes literal \,
|
15
|
+
txt =<<EOS
|
16
|
+
[fuller]
|
17
|
+
Fuller\\, Smith & Turner, 1845
|
18
|
+
The Griffin Brewery // Chiswick Lane South // London, W4 2QB
|
19
|
+
brands: Fuller's
|
20
|
+
|
21
|
+
fuller, Fuller\\, Smith & Turner, 1845, The Griffin Brewery // Chiswick Lane South // London\\, W4 2QB
|
22
|
+
EOS
|
23
|
+
|
24
|
+
pp txt
|
25
|
+
|
26
|
+
reader = ValuesReader.new( txt )
|
27
|
+
|
28
|
+
i = 0
|
29
|
+
reader.each_line do |attribs, values|
|
30
|
+
i += 1
|
31
|
+
|
32
|
+
puts "attribs:"
|
33
|
+
pp attribs
|
34
|
+
puts "values:"
|
35
|
+
pp values
|
36
|
+
|
37
|
+
if i == 1
|
38
|
+
assert_equal attribs[:key], 'fuller'
|
39
|
+
assert_equal attribs[:title], 'Fuller, Smith & Turner'
|
40
|
+
assert_equal attribs[:grade], nil
|
41
|
+
assert_equal attribs[:synonyms], nil
|
42
|
+
|
43
|
+
assert_equal values[0], '1845'
|
44
|
+
assert_equal values[1], 'The Griffin Brewery // Chiswick Lane South // London, W4 2QB'
|
45
|
+
assert_equal values[2], "brands: Fuller's"
|
46
|
+
elsif i == 2
|
47
|
+
assert_equal attribs[:key], 'fuller'
|
48
|
+
assert_equal attribs[:title], 'Fuller, Smith & Turner'
|
49
|
+
assert_equal attribs[:grade], nil
|
50
|
+
assert_equal attribs[:synonyms], nil
|
51
|
+
|
52
|
+
assert_equal values[0], '1845'
|
53
|
+
assert_equal values[1], 'The Griffin Brewery // Chiswick Lane South // London, W4 2QB'
|
54
|
+
else
|
55
|
+
assert_equal true, false # should not get here
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end # test_escape_comma
|
59
|
+
|
60
|
+
|
61
|
+
def test_mixed
|
62
|
+
txt =<<EOS
|
63
|
+
##########
|
64
|
+
# Wien Umbgebung
|
65
|
+
|
66
|
+
[schwechat]
|
67
|
+
Brauerei Schwechat (Brau Union) **, 1796
|
68
|
+
www.schwechater.at
|
69
|
+
2320 Schwechat // Mautner Markhof-Straße 11
|
70
|
+
brands: Schwechater
|
71
|
+
brau_union # Part of Brau Union
|
72
|
+
|
73
|
+
|
74
|
+
#############
|
75
|
+
# Waldviertel
|
76
|
+
|
77
|
+
zwettler, Zwettler Brauerei|Privatbrauerei Zwettl **, 1709, www.zwettler.at, 3910 Zwettl // Syrnauer Straße 22-25
|
78
|
+
weitra, Weitra Bräu Bierwerkstatt|Brauerei Weitra *, 1321, www.bierwerkstatt.at, 3970 Weitra // Sparkasseplatz 160, zwettler # Part of Zwettler
|
79
|
+
|
80
|
+
#############
|
81
|
+
# Weinviertel
|
82
|
+
|
83
|
+
[hubertus]
|
84
|
+
Hubertus Bräu *, 1454
|
85
|
+
www.hubertus.at
|
86
|
+
2136 Laa/Thaya // Hubertusgasse 1
|
87
|
+
brands: Hubertus
|
88
|
+
|
89
|
+
egger, Privatbrauerei Fritz Egger **, 1978, www.egger-bier.at, 3105 Unterradlberg // Tiroler Straße 18
|
90
|
+
EOS
|
91
|
+
|
92
|
+
reader = ValuesReader.new( txt )
|
93
|
+
|
94
|
+
i = 0
|
95
|
+
reader.each_line do |attribs, values|
|
96
|
+
i += 1
|
97
|
+
|
98
|
+
puts "attribs:"
|
99
|
+
pp attribs
|
100
|
+
puts "values:"
|
101
|
+
pp values
|
102
|
+
|
103
|
+
if i == 1
|
104
|
+
assert_equal attribs[:key], 'schwechat'
|
105
|
+
assert_equal attribs[:title], 'Brauerei Schwechat (Brau Union)'
|
106
|
+
assert_equal attribs[:grade], 2
|
107
|
+
assert_equal attribs[:synonyms], nil
|
108
|
+
|
109
|
+
assert_equal values[0], '1796'
|
110
|
+
assert_equal values[1], 'www.schwechater.at'
|
111
|
+
assert_equal values[2], '2320 Schwechat // Mautner Markhof-Straße 11'
|
112
|
+
assert_equal values[3], 'brands: Schwechater'
|
113
|
+
assert_equal values[-1], 'brau_union'
|
114
|
+
elsif i == 2
|
115
|
+
assert_equal attribs[:key], 'zwettler'
|
116
|
+
assert_equal attribs[:title], 'Zwettler Brauerei'
|
117
|
+
assert_equal attribs[:grade], 2
|
118
|
+
assert_equal attribs[:synonyms], 'Privatbrauerei Zwettl'
|
119
|
+
|
120
|
+
assert_equal values[0], '1709'
|
121
|
+
assert_equal values[1], 'www.zwettler.at'
|
122
|
+
assert_equal values[2], '3910 Zwettl // Syrnauer Straße 22-25'
|
123
|
+
elsif i == 3
|
124
|
+
assert_equal attribs[:key], 'weitra'
|
125
|
+
assert_equal attribs[:title], 'Weitra Bräu Bierwerkstatt'
|
126
|
+
assert_equal attribs[:grade], 3
|
127
|
+
assert_equal attribs[:synonyms], 'Brauerei Weitra'
|
128
|
+
|
129
|
+
assert_equal values[0], '1321'
|
130
|
+
assert_equal values[1], 'www.bierwerkstatt.at'
|
131
|
+
assert_equal values[2], '3970 Weitra // Sparkasseplatz 160'
|
132
|
+
assert_equal values[-1], 'zwettler'
|
133
|
+
elsif i == 4
|
134
|
+
assert_equal attribs[:key], 'hubertus'
|
135
|
+
assert_equal attribs[:title], 'Hubertus Bräu'
|
136
|
+
assert_equal attribs[:grade], 3
|
137
|
+
assert_equal attribs[:synonyms], nil
|
138
|
+
|
139
|
+
assert_equal values[0], '1454'
|
140
|
+
assert_equal values[1], 'www.hubertus.at'
|
141
|
+
assert_equal values[2], '2136 Laa/Thaya // Hubertusgasse 1'
|
142
|
+
assert_equal values[3], 'brands: Hubertus'
|
143
|
+
elsif i == 5
|
144
|
+
assert_equal attribs[:key], 'egger'
|
145
|
+
assert_equal attribs[:title], 'Privatbrauerei Fritz Egger'
|
146
|
+
assert_equal attribs[:grade], 2
|
147
|
+
assert_equal attribs[:synonyms], nil
|
148
|
+
|
149
|
+
assert_equal values[0], '1978'
|
150
|
+
assert_equal values[1], 'www.egger-bier.at'
|
151
|
+
assert_equal values[2], '3105 Unterradlberg // Tiroler Straße 18'
|
152
|
+
else
|
153
|
+
assert_equal true, false # should not get here
|
154
|
+
end
|
155
|
+
end
|
156
|
+
end # test_mixed
|
157
|
+
|
158
|
+
|
159
|
+
def test_multi_line_records
|
160
|
+
txt =<<EOS
|
161
|
+
##########
|
162
|
+
# Wien Umbgebung
|
163
|
+
|
164
|
+
[schwechat]
|
165
|
+
Brauerei Schwechat (Brau Union) **, 1796
|
166
|
+
www.schwechater.at
|
167
|
+
2320 Schwechat // Mautner Markhof-Straße 11
|
168
|
+
brands: Schwechater
|
169
|
+
brau_union # Part of Brau Union
|
170
|
+
|
171
|
+
|
172
|
+
#############
|
173
|
+
# Waldviertel
|
174
|
+
|
175
|
+
[zwettler]
|
176
|
+
Zwettler Brauerei|Privatbrauerei Zwettl **, 1709
|
177
|
+
www.zwettler.at
|
178
|
+
3910 Zwettl // Syrnauer Straße 22-25
|
179
|
+
brands: Zwettler
|
180
|
+
EOS
|
181
|
+
|
182
|
+
reader = ValuesReader.new( txt )
|
183
|
+
|
184
|
+
i = 0
|
185
|
+
reader.each_line do |attribs, values|
|
186
|
+
i += 1
|
187
|
+
|
188
|
+
puts "attribs:"
|
189
|
+
pp attribs
|
190
|
+
puts "values:"
|
191
|
+
pp values
|
192
|
+
|
193
|
+
if i == 1
|
194
|
+
assert_equal attribs[:key], 'schwechat'
|
195
|
+
assert_equal attribs[:title], 'Brauerei Schwechat (Brau Union)'
|
196
|
+
assert_equal attribs[:grade], 2
|
197
|
+
assert_equal attribs[:synonyms], nil
|
198
|
+
|
199
|
+
assert_equal values[0], '1796'
|
200
|
+
assert_equal values[1], 'www.schwechater.at'
|
201
|
+
assert_equal values[2], '2320 Schwechat // Mautner Markhof-Straße 11'
|
202
|
+
assert_equal values[3], 'brands: Schwechater'
|
203
|
+
assert_equal values[-1], 'brau_union'
|
204
|
+
elsif i == 2
|
205
|
+
assert_equal attribs[:key], 'zwettler'
|
206
|
+
assert_equal attribs[:title], 'Zwettler Brauerei'
|
207
|
+
assert_equal attribs[:grade], 2
|
208
|
+
assert_equal attribs[:synonyms], 'Privatbrauerei Zwettl'
|
209
|
+
|
210
|
+
assert_equal values[0], '1709'
|
211
|
+
assert_equal values[1], 'www.zwettler.at'
|
212
|
+
assert_equal values[2], '3910 Zwettl // Syrnauer Straße 22-25'
|
213
|
+
assert_equal values[3], 'brands: Zwettler'
|
214
|
+
else
|
215
|
+
assert_equal true, false # should not get here
|
216
|
+
end
|
217
|
+
end
|
218
|
+
end # test_multi_line_records
|
219
|
+
|
220
|
+
|
221
|
+
def test_classic_csv_records
|
222
|
+
|
223
|
+
txt =<<EOS
|
224
|
+
arsenal, Arsenal|Arsenal FC|FC Arsenal, ARS, city:london
|
225
|
+
manunited, Manchester United|Man Utd|Manchester U., MUN, city:manchester
|
226
|
+
liverpool, Liverpool|Liverpool FC|FC Liverpool, LIV, city:liverpool
|
227
|
+
EOS
|
228
|
+
|
229
|
+
reader = ValuesReader.new( txt )
|
230
|
+
|
231
|
+
i = 0
|
232
|
+
reader.each_line do |attribs, values|
|
233
|
+
i += 1
|
234
|
+
|
235
|
+
puts "attribs:"
|
236
|
+
pp attribs
|
237
|
+
puts "values:"
|
238
|
+
pp values
|
239
|
+
|
240
|
+
if i == 1
|
241
|
+
assert_equal attribs[:key], 'arsenal'
|
242
|
+
assert_equal attribs[:title], 'Arsenal'
|
243
|
+
assert_equal attribs[:synonyms], 'Arsenal FC|FC Arsenal'
|
244
|
+
|
245
|
+
assert_equal values[0], 'ARS'
|
246
|
+
assert_equal values[1], 'city:london'
|
247
|
+
elsif i == 2
|
248
|
+
elsif i == 3
|
249
|
+
else
|
250
|
+
assert_equal true, false # should not get here
|
251
|
+
end
|
252
|
+
end
|
253
|
+
end # test_classic_csv_records
|
254
|
+
|
255
|
+
|
256
|
+
def test_autogen_keys
|
257
|
+
txt =<<EOS
|
258
|
+
Ottakringer Helles, 5.2 %, 11.8°
|
259
|
+
Ottakringer Gold Fassl Spezial, 5.6 %, 12.7°
|
260
|
+
Ottakringer (Gold Fassl) Pils, 4.6 %, 11.2°
|
261
|
+
Ottakringer (Gold Fassl) Pur {Bio}, 5.2 %, 11.8°, bio
|
262
|
+
EOS
|
263
|
+
|
264
|
+
reader = ValuesReader.new( txt )
|
265
|
+
|
266
|
+
i = 0
|
267
|
+
reader.each_line do |attribs, values|
|
268
|
+
i += 1
|
269
|
+
|
270
|
+
puts "attribs:"
|
271
|
+
pp attribs
|
272
|
+
puts "values:"
|
273
|
+
pp values
|
274
|
+
|
275
|
+
if i == 1
|
276
|
+
assert_equal attribs[:key], 'ottakringerhelles'
|
277
|
+
assert_equal attribs[:title], 'Ottakringer Helles'
|
278
|
+
assert_equal attribs[:synonyms], nil
|
279
|
+
|
280
|
+
assert_equal values[0], '5.2 %'
|
281
|
+
assert_equal values[1], '11.8°'
|
282
|
+
elsif i == 2
|
283
|
+
assert_equal attribs[:key], 'ottakringergoldfasslspezial'
|
284
|
+
assert_equal attribs[:title], 'Ottakringer Gold Fassl Spezial'
|
285
|
+
assert_equal attribs[:synonyms], nil
|
286
|
+
|
287
|
+
assert_equal values[0], '5.6 %'
|
288
|
+
assert_equal values[1], '12.7°'
|
289
|
+
elsif i == 3
|
290
|
+
assert_equal attribs[:key], 'ottakringerpils'
|
291
|
+
assert_equal attribs[:title], 'Ottakringer (Gold Fassl) Pils'
|
292
|
+
assert_equal attribs[:synonyms], nil
|
293
|
+
|
294
|
+
assert_equal values[0], '4.6 %'
|
295
|
+
assert_equal values[1], '11.2°'
|
296
|
+
elsif i == 4
|
297
|
+
assert_equal attribs[:key], 'ottakringerpur'
|
298
|
+
assert_equal attribs[:title], 'Ottakringer (Gold Fassl) Pur {Bio}'
|
299
|
+
assert_equal attribs[:synonyms], nil
|
300
|
+
|
301
|
+
assert_equal values[0], '5.2 %'
|
302
|
+
assert_equal values[1], '11.8°'
|
303
|
+
assert_equal values[-1], 'bio'
|
304
|
+
else
|
305
|
+
assert_equal true, false # should not get here
|
306
|
+
end
|
307
|
+
end
|
308
|
+
end # test_autogen_keys
|
309
|
+
|
310
|
+
|
311
|
+
end # class TestValuesReader
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: textutils
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.8.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2014-01-17 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: logutils
|
16
|
-
requirement: &
|
16
|
+
requirement: &3086976 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ~>
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: '0.5'
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *3086976
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: rdoc
|
27
|
-
requirement: &
|
27
|
+
requirement: &3085764 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ~>
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: '4.0'
|
33
33
|
type: :development
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *3085764
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: hoe
|
38
|
-
requirement: &
|
38
|
+
requirement: &3082920 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ~>
|
@@ -43,17 +43,19 @@ dependencies:
|
|
43
43
|
version: '3.7'
|
44
44
|
type: :development
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *3082920
|
47
47
|
description: textutils - Text Filters, Helpers, Readers and More
|
48
48
|
email: ruby-talk@ruby-lang.org
|
49
49
|
executables: []
|
50
50
|
extensions: []
|
51
51
|
extra_rdoc_files:
|
52
|
+
- History.md
|
52
53
|
- Manifest.txt
|
54
|
+
- README.md
|
53
55
|
files:
|
54
|
-
- History.
|
56
|
+
- History.md
|
55
57
|
- Manifest.txt
|
56
|
-
- README.
|
58
|
+
- README.md
|
57
59
|
- Rakefile
|
58
60
|
- lib/textutils.rb
|
59
61
|
- lib/textutils/classifier.rb
|
@@ -82,6 +84,7 @@ files:
|
|
82
84
|
- test/test_hypertext_helper.rb
|
83
85
|
- test/test_title_helper.rb
|
84
86
|
- test/test_unicode_helper.rb
|
87
|
+
- test/test_values_reader.rb
|
85
88
|
- .gemtest
|
86
89
|
homepage: https://github.com/rubylibs/textutils
|
87
90
|
licenses:
|
@@ -89,7 +92,7 @@ licenses:
|
|
89
92
|
post_install_message:
|
90
93
|
rdoc_options:
|
91
94
|
- --main
|
92
|
-
- README.
|
95
|
+
- README.md
|
93
96
|
require_paths:
|
94
97
|
- lib
|
95
98
|
required_ruby_version: !ruby/object:Gem::Requirement
|
@@ -114,3 +117,4 @@ test_files:
|
|
114
117
|
- test/test_hypertext_helper.rb
|
115
118
|
- test/test_title_helper.rb
|
116
119
|
- test/test_unicode_helper.rb
|
120
|
+
- test/test_values_reader.rb
|