brand2csv 0.1.3 → 0.1.4

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,3 +1,7 @@
1
+ === 0.1.4 22.05.2013
2
+
3
+ * Better handling of adresslines like '90 route de Frontenex', 'Via San Salvatore, 2'
4
+
1
5
  === 0.1.3 22.05.2013
2
6
 
3
7
  * Updated Manifest.txt to include bin/brand2csv
@@ -42,6 +42,8 @@ module Brand2csv
42
42
  "tm_lbl_app_date", # Hinterlegungsdatum
43
43
  ]
44
44
 
45
+ MaxZeilen = 5
46
+
45
47
  attr_accessor :marke
46
48
 
47
49
  def initialize(timespan)
@@ -188,27 +190,44 @@ module Brand2csv
188
190
  @lastResponse = response
189
191
  end
190
192
 
191
- def parseAddress(nummer, inhaber)
192
- zeile_1, zeile_2, zeile_3, zeile_4, zeile_5 = inhaber.split(LineSplit)
193
+ def parseAddress(nummer, zeilen)
193
194
  ort = nil
194
195
  plz = nil
195
- if m = AddressRegexp.match(zeile_2)
196
- zeile_2 = nil
197
- plz = m[1]; ort = m[2]
198
- elsif m = AddressRegexp.match(zeile_3)
199
- zeile_3 = nil
200
- plz = m[1]; ort = m[2]
201
- elsif m = AddressRegexp.match(zeile_4)
202
- zeile_4 = nil
203
- plz = m[1]; ort = m[2]
204
- elsif m = AddressRegexp.match(zeile_5)
205
- zeile_5 = nil
206
- plz = m[1]; ort = m[2]
207
- else
208
- puts "Achtung! Konnte Marke #{nummer} mit Inhaber #{inhaber} nicht parsen" if $VERBOSE
196
+
197
+ # Search for plz/address
198
+ 1.upto(zeilen.length-1).each {
199
+ |cnt|
200
+ if m = AddressRegexp.match(zeilen[cnt])
201
+ zeilen[cnt+1] = nil
202
+ plz = m[1]; ort = m[2]
203
+ cnt.upto(MaxZeilen-1).each{ |cnt2| zeilen[cnt2] = nil }
204
+ break
205
+ end
206
+ }
207
+ unless plz
208
+ puts "Achtung! Konnte Marke #{nummer} mit Inhaber #{zeilen.inpsect} nicht parsen" if $VERBOSE
209
209
  return nil, nil, nil, nil, nil, nil, nil, nil
210
210
  end
211
- return zeile_1, zeile_2, zeile_3, zeile_4, zeile_5, plz, ort
211
+ # search for lines with only digits
212
+ found = false
213
+ 1.upto(zeilen.length-1).each {
214
+ |cnt|
215
+ break if zeilen[cnt] == nil
216
+ if /^\d*$/.match(zeilen[cnt])
217
+ found = true
218
+ if zeilen[cnt+1] == nil
219
+ found = 'before'
220
+ zeilen[cnt-1] += LineSplit + zeilen[cnt]
221
+ zeilen.delete_at(cnt)
222
+ else
223
+ found = 'after'
224
+ zeilen[cnt] += LineSplit + zeilen[cnt+1]
225
+ zeilen.delete_at(cnt+1)
226
+ end
227
+ end
228
+ }
229
+ puts "found #{found}: #{zeilen.inspect}" if found and $VERBOSE
230
+ return zeilen[0], zeilen[1], zeilen[2], zeilen[3], zeilen[4], plz, ort
212
231
  end
213
232
 
214
233
  def fetchDetails(nummer) # takes a long time!
@@ -223,7 +242,7 @@ module Brand2csv
223
242
  writeResponse("mechanize/detail_#{nummer}.html", content)
224
243
  doc = Nokogiri::Slop(content)
225
244
  end
226
- puts "Bitte um Geduld. Hole Adressdetails für Marke #{nummer}. (#{@counterDetails} von #{@errors.size})"
245
+ puts "Bitte um Geduld. Holte Adressdetails für Marke #{nummer}. (#{@counterDetails} von #{@errors.size})"
227
246
  path_name = "//html/body/form/div/div/fieldset/div/table/tbody/tr/td"
228
247
  counter = 0
229
248
  doc.xpath(path_name).each{
@@ -232,15 +251,15 @@ module Brand2csv
232
251
  counter += 1
233
252
  next unless /^inhaber/i.match(td.text)
234
253
  zeilen = []
235
- doc.xpath(path_name)[counter].children.each{ |child| zeilen << child.text.gsub(LineSplit,'. ') unless child.text.length == 0 } # avoid adding <br>
254
+ doc.xpath(path_name)[counter].children.each{ |child| zeilen << child.text unless child.text.length == 0 } # avoid adding <br>
236
255
  if info = @errors[nummer]
237
- info.inhaber = zeilen.join(LineSplit)
238
- info.zeile_1, info.zeile_2, info.zeile_3, info.zeile_4, zeile_5, info.plz, info.ort = parseAddress(nummer, info.inhaber)
256
+ info.inhaber = zeilen.join(" ")
257
+ info.zeile_1, info.zeile_2, info.zeile_3, info.zeile_4, zeile_5, info.plz, info.ort = parseAddress(nummer, zeilen)
239
258
  @results << info
240
259
  else
241
260
  bezeichnung = doc.xpath(path_name)[15]
242
- inhaber = zeilen.join(LineSplit)
243
- zeile_1, zeile_2, zeile_3, zeile_4, zeile_5, plz, ort = parseAddress(nummer, inhaber)
261
+ inhaber = zeilen.join(" ")
262
+ zeile_1, zeile_2, zeile_3, zeile_4, zeile_5, plz, ort = parseAddress(nummer, zeilen)
244
263
  hinterlegungsdatum = doc.xpath(path_name)[7]
245
264
  marke = Marke.new(bezeichnung, nummer, inhaber, DefaultCountry, hinterlegungsdatum, zeile_1, zeile_2, zeile_3, zeile_4, zeile_5, plz, ort )
246
265
  @results << marke
@@ -256,7 +275,7 @@ module Brand2csv
256
275
  end
257
276
  nrFailures = 0
258
277
  counter += 1
259
- puts "fetchresult. Counter #{counter} already #{@results.size} Datensätze für die Zeitspanne '#{@timespan}'"
278
+ puts "fetchresult. Counter #{counter} already #{@results.size} Datensätze für die Zeitspanne '#{@timespan}'" if $VERBOSE
260
279
  path_name = "//html/body/form/div/div/fieldset/table/tbody/tr/td/table/tr/td"
261
280
  hasNext = false
262
281
  doc.xpath(path_name).each{
@@ -277,7 +296,7 @@ module Brand2csv
277
296
  if bezeichnung.length == 0
278
297
  bezeichnung = elem.children[1].children[0].children[0].children[0].attribute('src').to_s
279
298
  end
280
- zeile_1, zeile_2, zeile_3, zeile_4, zeile_5, plz, ort = parseAddress(nummer, inhaber)
299
+ zeile_1, zeile_2, zeile_3, zeile_4, zeile_5, plz, ort = parseAddress(nummer, inhaber.split(LineSplit))
281
300
  if zeile_1
282
301
  @results << Marke.new(bezeichnung, elem.elements[2].text, elem.elements[3].text, land, elem.elements[5].text,
283
302
  zeile_1, zeile_2, zeile_3, zeile_4, zeile_5, plz, ort )
@@ -1,3 +1,3 @@
1
1
  module Brand2csv
2
- VERSION = "0.1.3"
2
+ VERSION = "0.1.4"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: brand2csv
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.3
4
+ version: 0.1.4
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors: