brand2csv 0.1.5 → 0.1.6

Sign up to get free protection for your applications and to get access to all the features.
data/.gemtest ADDED
File without changes
data/.gitignore CHANGED
@@ -1,3 +1,4 @@
1
1
  *~
2
2
  *#
3
3
  ausgabe.csv
4
+ mechanize/
data/History.txt CHANGED
@@ -1,3 +1,10 @@
1
+ === 0.1.6 27.05.2013
2
+
3
+ * Added support for parsing results up to 10'000 hits
4
+ * Added rspec
5
+ * Added (undocumented) second parameter to limit according the trademark name
6
+ * Added .travis.yml to enable running tests via http://about.travis-ci.org/docs/
7
+
1
8
  === 0.1.5 23.05.2013
2
9
 
3
10
  * Run under Ruby 1.8.7
data/bin/brand2csv CHANGED
@@ -37,13 +37,13 @@ rescue OptionParser::MissingArgument,
37
37
  end
38
38
 
39
39
 
40
- unless args.size == 1
40
+ unless args.size >= 1
41
41
  puts help
42
42
  exit 1
43
43
  end
44
44
 
45
45
  begin
46
- Brand2csv::run(args[0])
46
+ Brand2csv::run(args[0], args[1])
47
47
  rescue Interrupt
48
48
  puts "Unterbrochen. Breche mit Fehler ab"
49
49
  exit 1
data/lib/brand2csv.rb CHANGED
@@ -27,6 +27,11 @@ module Brand2csv
27
27
  ]
28
28
  Base_uri = 'https://www.swissreg.ch'
29
29
  Start_uri = "#{Base_uri}/srclient/faces/jsp/start.jsp"
30
+ Sr1 = "#{Base_uri}/srclient/faces/jsp/trademark/sr1.jsp"
31
+ Sr2 = "#{Base_uri}/srclient/faces/jsp/trademark/sr2.jsp"
32
+ Sr3 = "#{Base_uri}/srclient/faces/jsp/trademark/sr3.jsp"
33
+ Sr30 = "#{Base_uri}/srclient/faces/jsp/trademark/sr30.jsp"
34
+ Sr300 = "#{Base_uri}/srclient/faces/jsp/trademark/sr300.jsp"
30
35
  AddressRegexp = /^(\d\d\d\d)\W*(.*)/
31
36
  LineSplit = ', '
32
37
  DefaultCountry = 'Schweiz'
@@ -77,20 +82,21 @@ module Brand2csv
77
82
 
78
83
 
79
84
  MaxZeilen = 5
85
+ HitsPerPage = 250
86
+ LogDir = 'mechanize'
80
87
 
81
- attr_accessor :marke
88
+ attr_accessor :marke, :results, :timespan
82
89
 
83
- def initialize(timespan)
90
+ def initialize(timespan, marke = nil)
84
91
  @timespan = timespan
85
- @marke = nil
92
+ @marke = marke
86
93
  @number = nil
87
- @hitsPerPage = 100
88
94
 
89
95
  @agent = Mechanize.new { |agent|
90
96
  agent.user_agent = 'Mozilla/5.0 (X11; Linux x86_64; rv:16.0) Gecko/20100101 Firefox/16.0'
91
97
  agent.verify_mode = OpenSSL::SSL::VERIFY_NONE
92
- FileUtils.makedirs 'mechanize' if $VERBOSE
93
- agent.log = Logger.new("mechanize/mechanize.log") if $VERBOSE
98
+ FileUtils.makedirs(LogDir) if $VERBOSE or defined?(RSpec)
99
+ agent.log = Logger.new("#{LogDir}/mechanize.log") if $VERBOSE
94
100
  }
95
101
  @results = []
96
102
  @errors = Hash.new
@@ -109,7 +115,6 @@ module Brand2csv
109
115
  # @marke = "*WEIH*"
110
116
  @timespan = nil
111
117
  end
112
- @marke = 'asp*'
113
118
  end
114
119
 
115
120
  def writeResponse(filename)
@@ -141,6 +146,8 @@ module Brand2csv
141
146
  }
142
147
  end
143
148
 
149
+ UseClick = false
150
+
144
151
  def parse_swissreg(timespan = @timespan, # sollte 377 Treffer ergeben, für 01.06.2007-10.06.2007, 559271 wurde in diesem Zeitraum registriert
145
152
  marke = @marke,
146
153
  nummer =@number) # nummer = "559271" ergibt genau einen treffer
@@ -150,7 +157,7 @@ module Brand2csv
150
157
  # HTTP status code is also strange at redirection.
151
158
  @agent.get Start_uri # get a cookie for the session
152
159
  content = @agent.get_file Start_uri
153
- writeResponse('mechanize/start.jsp')
160
+ writeResponse("#{LogDir}/start.jsp")
154
161
  # get only view state
155
162
  @state = view_state(content)
156
163
  data = [
@@ -160,9 +167,13 @@ module Brand2csv
160
167
  ["id_swissreg:_idcl", "id_swissreg_sub_nav_ipiNavigation_item0"],
161
168
  ["javax.faces.ViewState", @state],
162
169
  ]
163
-
164
- content = @agent.post(Start_uri, data)
165
- writeResponse('mechanize/start2.jsp')
170
+ if UseClick
171
+ Swissreg::setAllInputValue(@agent.page.forms.first, data)
172
+ @agent.page.forms.first.submit
173
+ else
174
+ @agent.post(Start_uri, data)
175
+ end
176
+ writeResponse("#{LogDir}/start2.jsp")
166
177
  # Navigation with mechanize like this fails and returns to the home page
167
178
  # @agent.page.link_with(:id => "id_swissreg_sub_nav_ipiNavigation_item0").click
168
179
 
@@ -174,20 +185,34 @@ module Brand2csv
174
185
  ["javax.faces.ViewState", @state],
175
186
  ]
176
187
  # sr1 ist die einfache suche, sr3 die erweiterte Suche
177
- @path = "/srclient/faces/jsp/trademark/sr3.jsp"
178
- response = @agent.post(Base_uri + @path, data)
179
- writeResponse('mechanize/sr3.jsp')
188
+ if UseClick
189
+ Swissreg::setAllInputValue(@agent.page.forms.first, data)
190
+ @agent.page.forms.first.submit
191
+ else
192
+ @agent.post(Sr3, data)
193
+ end
194
+ writeResponse("#{LogDir}/sr3.jsp")
180
195
 
181
196
  # Fill out form values
182
- @agent.page.form('id_swissreg').checkboxes.each{ |box|
183
- TMChoiceFields.index(box.value) ? box.check : box.uncheck
184
- box.check if $VERBOSE
185
- # select all publication reasons
186
- box.check if /id_ckbTMPubReason/.match(box.name)
187
- # select all publication states
188
- box.check if /id_ckbTMState/.match(box.name)
189
- }
190
- if $VERBOSE # and false # fill all details for marke 567120
197
+ selectedPublicationStates = ['1', '3']
198
+ @agent.page.form('id_swissreg').checkboxes.each{
199
+ |box|
200
+ TMChoiceFields.index(box.value) ? box.check : box.uncheck
201
+ # box.check if $VERBOSE
202
+ # select all publication reasons
203
+ box.check if /id_ckbTMPubReason/.match(box.name)
204
+ # select all publication states or accept default states
205
+ # box.check if /id_ckbTMState/.match(box.name)
206
+ if /id_ckbTMState/.match(box.name)
207
+ if selectedPublicationStates.index(box.value)
208
+ puts "Select id_ckbTMState #{box.value}" if $VERBOSE
209
+ box.check
210
+ else
211
+ box.uncheck
212
+ end
213
+ end
214
+ }
215
+ if $VERBOSE and false # fill all details for marke 567120
191
216
  # Felder, welche nie bei der Antwort auftauchen
192
217
  @agent.page.form('id_swissreg').field(:name => 'id_swissreg:mainContent:id_txf_licensee') { |x| x.value = 'BBB Inc*' }
193
218
  @agent.page.form('id_swissreg').field(:name => 'id_swissreg:mainContent:id_txf_expiryDate') { |x| x.value = timespan }
@@ -203,13 +228,13 @@ module Brand2csv
203
228
  end
204
229
 
205
230
  # Feld, welches im Resultat angezeigt wird
206
- @agent.page.form('id_swissreg').field(:name => 'id_swissreg:mainContent:id_txf_tm_text') { |x| x.value = "asp*" }
231
+ @agent.page.form('id_swissreg').field(:name => 'id_swissreg:mainContent:id_txf_tm_text') { |x| x.value = @marke}
207
232
 
208
233
  # Felder, welches nie bei der Antwort auftaucht. Ein Versuch .gsub('.', '%2E') schlug ebenfalls fehl!
209
234
  @agent.page.form('id_swissreg').field(:name => 'id_swissreg:mainContent:id_txf_appDate') { |x| x.value = timespan}
210
235
 
211
236
  # Feld, welches ebenfalls berücksichtigt wird
212
- @agent.page.form('id_swissreg').field(:name => 'id_swissreg:mainContent:id_cbxHitsPerPage') { |x| x.value = @hitsPerPage }
237
+ @agent.page.form('id_swissreg').field(:name => 'id_swissreg:mainContent:id_cbxHitsPerPage') { |x| x.value = HitsPerPage }
213
238
  @agent.page.form('id_swissreg').field(:name => 'autoScroll') { |x| x.value = '0,0' }
214
239
 
215
240
  if $VERBOSE
@@ -218,165 +243,190 @@ module Brand2csv
218
243
  @agent.page.form('id_swissreg').checkboxes.each{ |box| puts "#{box.name} checked? #{box.checked}"}
219
244
  end
220
245
 
221
- @agent.page.form('id_swissreg').click_button(@agent.page.form('id_swissreg').button_with(:value => "suchen"))
246
+ @criteria = [
247
+ ["autoScroll", "0,829"],
248
+ ["id_swissreg:_link_hidden_", ""],
249
+ ["id_swissreg:mainContent:id_ckbTMState", "1"], # "Hängige Gesuche 1
250
+ # ["id_swissreg:mainContent:id_ckbTMState", "2"], # "Gelöschte Gesuche 2
251
+ ["id_swissreg:mainContent:id_ckbTMState", "3"], # aktive Marken 3
252
+ # ["id_swissreg:mainContent:id_ckbTMState", "4"], # gelöschte Marken 4
253
+ ["id_swissreg:mainContent:id_cbxCountry", "_ALL"], # Auswahl Länder _ALL
254
+ # ["id_swissreg:mainContent:id_txf_tm_no", ""], # Marken Nr
255
+ ["id_swissreg:mainContent:id_txf_tm_no", nummer],# Marken Nr
256
+ ["id_swissreg:mainContent:id_txf_app_no", ""], # Gesuch Nr.
257
+ ["id_swissreg:mainContent:id_txf_tm_text", marke],
258
+ ["id_swissreg:mainContent:id_txf_applicant", ""], # Inhaber/in
259
+ ["id_swissreg:mainContent:id_txf_agent", ""], # Vertreter/in
260
+ ["id_swissreg:mainContent:id_txf_licensee", ""], # Lizenznehmer
261
+ ["id_swissreg:mainContent:id_txf_nizza_class", ""], # Nizza Klassifikation Nr.
262
+ # ["id_swissreg:mainContent:id_txf_appDate", timespan], # Hinterlegungsdatum
263
+ ["id_swissreg:mainContent:id_txf_appDate", timespan] ,
264
+ ["id_swissreg:mainContent:id_txf_expiryDate", ""], # Ablauf Schutzfrist
265
+ # Markenart: Individualmarke 1 Kollektivmarke 2 Garantiemarke 3
266
+ ["id_swissreg:mainContent:id_cbxTMTypeGrp", "_ALL"], # Markenart
267
+ ["id_swissreg:mainContent:id_cbxTMForm", "_ALL"], # Markentyp
268
+ ["id_swissreg:mainContent:id_cbxTMColorClaim", "_ALL"], # Farbanspruch
269
+ ["id_swissreg:mainContent:id_txf_pub_date", ""], # Publikationsdatum
270
+
271
+ # info zu Publikationsgrund id_swissreg:mainContent:id_ckbTMPubReason
272
+ ["id_swissreg:mainContent:id_ckbTMPubReason", "1"], #Neueintragungen
273
+ ["id_swissreg:mainContent:id_ckbTMPubReason", "2"], #Berichtigungen
274
+ ["id_swissreg:mainContent:id_ckbTMPubReason", "3"], #Verlängerungen
275
+ ["id_swissreg:mainContent:id_ckbTMPubReason", "4"], #Löschungen
276
+ ["id_swissreg:mainContent:id_ckbTMPubReason", "5"], #Inhaberänderungen
277
+ ["id_swissreg:mainContent:id_ckbTMPubReason", "6"], #Vertreteränderungen
278
+ ["id_swissreg:mainContent:id_ckbTMPubReason", "7"], #Lizenzänderungen
279
+ ["id_swissreg:mainContent:id_ckbTMPubReason", "8"], #Weitere Registeränderungen
280
+ # ["id_swissreg:mainContent:id_ckbTMEmptyHits", "0"], # Leere Trefferliste anzeigen
281
+
282
+ # "id_swissreg:mainContent:id_cbxFormatChoice" 2 = Publikationsansicht 1 = Registeransicht
283
+ ["id_swissreg:mainContent:id_cbxFormatChoice", "1"],
284
+ ["id_swissreg:mainContent:id_cbxHitsPerPage", HitsPerPage], # Treffer pro Seite
285
+ ]
286
+ TMChoiceFields.each{ | field2display| @criteria << ["id_swissreg:mainContent:id_ckbTMChoice", field2display] }
287
+ # id_swissreg:mainContent:id_ckbTMChoice tm_lbl_tm_text
288
+ puts "Marke ist #{marke}" if marke # Wortlaut der Marke
289
+ puts "Hinterlegungsdatum ist #{timespan}" if $VERBOSE and timespan
290
+ puts "nummer ist #{timespan}" if nummer
291
+ @criteria << ["id_swissreg:mainContent:sub_fieldset:id_submit", "suchen"]
292
+ @criteria << ["id_swissreg_SUBMIT", "1"]
293
+ @criteria << ["id_swissreg:_idcl", ""]
294
+ @criteria << ["id_swissreg:_link_hidden_", ""]
295
+ @criteria << ["javax.faces.ViewState", @state]
296
+
297
+ if true # UseClick
298
+ # Swissreg::setAllInputValue(@agent.page.forms.first, @criteria)
299
+ # setPublicationStates(@agent.page.form('id_swissreg'))
300
+ @agent.page.form('id_swissreg').click_button(@agent.page.form('id_swissreg').button_with(:value => "suchen"))
301
+ else # use post
302
+ writeResponse("#{LogDir}/vor_post_sr3.jsp")
303
+ @agent.post(Sr3, @criteria)
304
+ writeResponse("#{LogDir}/erweiterte_suche.html")
305
+ @agent.page.form('id_swissreg').click_button(@agent.page.form('id_swissreg').button_with(:value => "suchen"))
306
+ end
222
307
  # Hier sollten eigentlich alle Felder auftauchen, wie
223
- # Marke=asp*; Land (Inhaber/in)=Schweiz; Markenart=Alle; Markentyp=Alle; Farbanspruch=Alle; Publikationsgrund= Neueintragungen, Berichtigungen, Verlängerungen, Löschungen, Inhaberänderungen, Vertreteränderungen, Lizenzänderungen, Weitere Registeränderungen; Status= hängige Gesuche, aktive Marken
224
- writeResponse('mechanize/result.jsp')
308
+ # Marke=asp*; Land (Inhaber/in)=Schweiz; Markenart=Alle; Markentyp=Alle; Farbanspruch=Alle; Publikationsgrund= Neueintragungen, Berichtigungen, Verlängerungen, Löschungen, Inhaberänderungen, Vertreteränderungen, Lizenzänderungen, Weitere Registeränderungen; Status= hängige Gesuche, aktive Marken
309
+ writeResponse("#{LogDir}/resultate.jsp")
225
310
  end
226
311
 
227
- def parseAddress(nummer, zeilen)
312
+ # the number is only passed to facilitate debugging
313
+ # lines are the address lines
314
+ def Swissreg::parseAddress(number, lines)
228
315
  ort = nil
229
316
  plz = nil
230
317
 
231
318
  # Search for plz/address
232
- 1.upto(zeilen.length-1).each {
319
+ 1.upto(lines.length-1).each {
233
320
  |cnt|
234
- if m = AddressRegexp.match(zeilen[cnt])
235
- zeilen[cnt+1] = nil
321
+ if m = AddressRegexp.match(lines[cnt])
322
+ lines[cnt+1] = nil
236
323
  plz = m[1]; ort = m[2]
237
- cnt.upto(MaxZeilen-1).each{ |cnt2| zeilen[cnt2] = nil }
324
+ cnt.upto(MaxZeilen-1).each{ |cnt2| lines[cnt2] = nil }
238
325
  break
239
326
  end
240
327
  }
241
328
  unless plz
242
- puts "Achtung! Konnte Marke #{nummer} mit Inhaber #{zeilen.inspect} nicht parsen" if $VERBOSE
329
+ puts "Achtung! Konnte Marke #{number} mit Inhaber #{lines.inspect} nicht parsen" if $VERBOSE
243
330
  return nil, nil, nil, nil, nil, nil, nil, nil
244
331
  end
245
332
  # search for lines with only digits
246
333
  found = false
247
- 1.upto(zeilen.length-1).each {
334
+ 1.upto(lines.length-1).each {
248
335
  |cnt|
249
- break if zeilen[cnt] == nil
250
- if /^\d*$/.match(zeilen[cnt])
336
+ break if lines[cnt] == nil
337
+ if /^\d*$/.match(lines[cnt])
251
338
  found = true
252
- if zeilen[cnt+1] == nil
339
+ if lines[cnt+1] == nil
253
340
  found = 'before'
254
- zeilen[cnt-1] += LineSplit + zeilen[cnt]
255
- zeilen.delete_at(cnt)
341
+ lines[cnt-1] += LineSplit + lines[cnt]
342
+ lines.delete_at(cnt)
256
343
  else
257
344
  found = 'after'
258
- zeilen[cnt] += LineSplit + zeilen[cnt+1]
259
- zeilen.delete_at(cnt+1)
345
+ lines[cnt] += LineSplit + lines[cnt+1]
346
+ lines.delete_at(cnt+1)
260
347
  end
261
348
  end
262
349
  }
263
- puts "found #{found}: #{zeilen.inspect}" if found and $VERBOSE
264
- return zeilen[0], zeilen[1], zeilen[2], zeilen[3], zeilen[4], plz, ort
350
+ puts "found #{found}: #{lines.inspect}" if found and $VERBOSE
351
+ return lines[0], lines[1], lines[2], lines[3], lines[4], plz, ort
265
352
  end
266
353
 
267
- def fetchDetails(nummer) # takes a long time!
268
- @counterDetails += 1
269
- filename = "mechanize/detail_#{nummer}.html"
270
- if File.exists?(filename)
271
- doc = Nokogiri::Slop(File.open(filename))
272
- else
273
- url = "https://www.swissreg.ch/srclient/faces/jsp/trademark/sr300.jsp?language=de&section=tm&id=#{nummer}"
274
- pp "Opening #{url}" if $VERBOSE
275
- content = @agent.get_file url
276
- writeResponse("mechanize/detail_#{nummer}.html")
277
- doc = Nokogiri::Slop(content)
278
- end
279
- puts "Bitte um Geduld. Holte Adressdetails für Marke #{nummer}. (#{@counterDetails} von #{@errors.size})"
280
- path_name = "//html/body/form/div/div/fieldset/div/table/tbody/tr/td"
281
- counter = 0
282
- doc.xpath(path_name).each{
283
- |td|
284
- pp "#{counter}: #{td.text}" if $VERBOSE
285
- counter += 1
286
- next unless /^inhaber/i.match(td.text)
287
- zeilen = []
288
- doc.xpath(path_name)[counter].children.each{ |child| zeilen << child.text unless child.text.length == 0 } # avoid adding <br>
289
- if info = @errors[nummer]
290
- info.inhaber = zeilen.join(" ")
291
- info.zeile_1, info.zeile_2, info.zeile_3, info.zeile_4, zeile_5, info.plz, info.ort = parseAddress(nummer, zeilen)
292
- @results << info
293
- else
294
- bezeichnung = doc.xpath(path_name)[15]
295
- inhaber = zeilen.join(" ")
296
- zeile_1, zeile_2, zeile_3, zeile_4, zeile_5, plz, ort = parseAddress(nummer, zeilen)
297
- hinterlegungsdatum = doc.xpath(path_name)[7]
298
- marke = Marke.new(bezeichnung, nummer, inhaber, DefaultCountry, hinterlegungsdatum, zeile_1, zeile_2, zeile_3, zeile_4, zeile_5, plz, ort )
299
- @results << marke
300
- end
301
- }
354
+ def Swissreg::getInputValuesFromPage(body) # body of HTML page
355
+ contentData = []
356
+ body.search('input').each{ |input|
357
+ # puts "name: #{input.attribute('name')} value #{input.attribute('value')}"
358
+ contentData << [ input.attribute('name').to_s, input.attribute('value').to_s ]
359
+ }
360
+ contentData
361
+ end
362
+
363
+ # return value of an array of POST values
364
+ def Swissreg::inputValue(values, key)
365
+ values.each{ |val|
366
+ return val[1] if key.eql?(val[0])
367
+ }
368
+ return nil
369
+ end
370
+
371
+ # set value for a key of an array of POST values
372
+ def Swissreg::setInputValue(values, key, newValue)
373
+ values.each{ |val|
374
+ if key.eql?(val[0])
375
+ val[1] = newValue
376
+ return
377
+ end
378
+ }
379
+ return
380
+ end
381
+
382
+ def Swissreg::setAllInputValue(form, values)
383
+ values.each{ |newValue|
384
+ # puts "x: 0 #{ newValue[0].to_s} 1 #{newValue[1].to_s}"
385
+ form.field(:name => newValue[0].to_s) { |elem|
386
+ next if elem == nil # puts "Cannot set #{newValue[0].to_s}"
387
+ elem.value = newValue[1].to_s
388
+ }
389
+ }
302
390
  end
303
391
 
304
- def fetchresult(filename = nil, counter = 1)
305
- if filename
306
- doc = Nokogiri::Slop(File.open(filename))
307
- else
308
- body = @agent.page.body
309
- body.force_encoding('utf-8')
310
- doc = Nokogiri::Slop(body)
311
- end
312
- nrFailures = 0
313
- counter += 1
314
- puts "fetchresult. Counter #{counter} already #{@results.size} Datensätze für die Zeitspanne '#{@timespan}'" if $VERBOSE
315
- path_name = "//html/body/form/div/div/fieldset/table/tbody/tr/td/table/tr/td"
316
- hasNext = false
317
- doc.xpath(path_name).each{
318
- |elem|
319
- if /scroll_1idx#{counter}/.match(elem.to_s)
320
- hasNext = true
321
- break
322
- end
392
+ def Swissreg::getMarkenInfoFromDetail(doc)
393
+ marke = nil
394
+ number = 'invalid'
395
+ bezeichnung = nil
396
+ inhaber = nil
397
+ hinterlegungsdatum = nil
398
+ zeilen = []
399
+ doc.xpath("//html/body/form/div/div/fieldset/div/table/tbody/tr").each{
400
+ |x|
401
+ if x.children.first.text.eql?('Marke')
402
+ if x.children[1].text.index('Markenabbildung')
403
+ # we must fetch the link to the image
404
+ bezeichnung = x.children[1].elements.first.attribute('href').text
405
+ else # we got a trademark
406
+ bezeichnung = x.children[1].text
407
+ end
408
+ end
409
+ if x.children.first.text.eql?('Inhaber/in')
410
+ inhaber = />(.*)<\/td/.match(x.children[1].to_s)[1].gsub('<br>',LineSplit)
411
+ x.children[1].children.each{ |child| zeilen << child.text unless child.text.length == 0 } # avoid adding <br>
412
+ end
413
+ hinterlegungsdatum = x.children[1].text if x.children.first.text.eql?('Hinterlegungsdatum')
414
+ number = x.children[1].text if x.children.first.text.eql?('Gesuch Nr.')
323
415
  }
324
- path_name = "//html/body/form/div/div/fieldset/table/tbody/tr/td/table/tbody/tr"
325
- doc.xpath(path_name).each{
326
- |elem|
327
- bezeichnung = elem.elements[1].text
328
- land = elem.elements[4].text
329
- next unless /#{DefaultCountry}/i.match(land)
330
- inhaber = elem.elements[3].text
331
- nummer = elem.elements[2].text
332
- if bezeichnung.length == 0
333
- bezeichnung = elem.children[1].children[0].children[0].children[0].attribute('src').to_s
334
- end
335
- zeile_1, zeile_2, zeile_3, zeile_4, zeile_5, plz, ort = parseAddress(nummer, inhaber.split(LineSplit))
336
- if zeile_1
337
- @results << Marke.new(bezeichnung, elem.elements[2].text, elem.elements[3].text, land, elem.elements[5].text,
338
- zeile_1, zeile_2, zeile_3, zeile_4, zeile_5, plz, ort )
339
- else
340
- nrFailures += 1
341
- @errors[nummer] = Marke.new(bezeichnung, elem.elements[2].text, elem.elements[3].text, land, elem.elements[5].text,
342
- zeile_1, zeile_2, zeile_3, zeile_4, zeile_5, plz, ort )
343
- end
344
- } if doc.xpath(path_name)
345
- if hasNext
346
- @path = "/srclient/faces/jsp/trademark/sr30.jsp"
347
- puts "Calling sub #{counter} with #{@path}" if $VERBOSE
348
- data = [
349
- ["autoScroll", "0,0"],
350
- ["id_swissreg:mainContent:id_sub_options_result:sub_fieldset:id_cbxHitsPerPage", @hitsPerPage],
351
- # ["id_swissreg:mainContent:vivian", "TRADEMARK REGISTER SEARCH TIMES: QUERY=[20] SELECT=[823] SERVER=[846] DELEGATE=[861] (HITS=[96])"],
352
- ["id_swissreg_SUBMIT", "1"],
353
- ["id_swissreg:_idcl", "id_swissreg:mainContent:scroll_1idx#{counter}"],
354
- ["id_swissreg:mainContent:scroll_1", "idx#{counter}"],
355
- ["tmMainId", ""],
356
- ["id_swissreg:_link_hidden_ "],
357
- ["javax.faces.ViewState", @state],
358
- ]
359
- TMChoiceFields.each{ | field2display| data << ["id_swissreg:mainContent:id_sub_options_result:id_ckbTMChoice", field2display] }
360
- response = @agent.post(Base_uri + @path, data)
361
- writeResponse("mechanize/resultate_#{counter}.html")
362
- checkErrors(response.body)
363
- fetchresult(nil, counter)
364
- else
365
- puts "Es gab #{nrFailures} Fehler beim Lesen von #{filename}" if $VERBOSE
366
- puts "Fand #{@results.size} Datensätze für die Zeitspanne '#{@timespan}'. Von #{@errors.size} muss die Adresse noch geholt werden."
367
- end
416
+ zeile_1, zeile_2, zeile_3, zeile_4, zeile_5, plz, ort = Swissreg::parseAddress(number, zeilen)
417
+ marke = Marke.new(bezeichnung, number, inhaber, DefaultCountry, hinterlegungsdatum, zeile_1, zeile_2, zeile_3, zeile_4, zeile_5, plz, ort )
368
418
  end
369
-
370
- def emitCsv(filename='ausgabe.csv')
371
- return if @results.size == 0
419
+
420
+ def Swissreg::emitCsv(results, filename='ausgabe.csv')
421
+ return if results == nil or results.size == 0
372
422
  if /^1\.8/.match(RUBY_VERSION)
373
423
  ausgabe = File.open(filename, 'w+')
374
424
  # Write header
375
425
  s=''
376
- @results[0].members.each { |member| s += member + ';' }
426
+ results[0].members.each { |member| s += member + ';' }
377
427
  ausgabe.puts s.chop
378
428
  # write all line
379
- @results.each{
429
+ results.each{
380
430
  |result|
381
431
  s = ''
382
432
  result.members.each{ |member|
@@ -391,28 +441,155 @@ module Brand2csv
391
441
  ausgabe.puts s.chop
392
442
  }
393
443
  else
394
- CSV.open(filename, 'w', :headers=>@results[0].members,
444
+ CSV.open(filename, 'w', :headers=>results[0].members,
395
445
  :write_headers => true,
396
446
  :col_sep => ';'
397
- ) do |csv| @results.each{ |x| csv << x }
447
+ ) do |csv| results.each{ |x| csv << x }
398
448
  end
399
449
  end
400
450
  end
451
+
452
+ class Swissreg::Vereinfachte
453
+ attr_reader :links2details, :trademark_search_id, :inputData, :firstHit, :nrHits, :nrSubPages, :pageNr
454
+ HitRegexpDE = /Seite (\d*) von ([\d']*) - Treffer ([\d']*)-([\d']*) von ([\d']*)/
455
+ Vivian = 'id_swissreg:mainContent:vivian'
456
+
457
+ # Parse a HTML page from swissreg sr3.jsp
458
+ # There we find info like "Seite 1 von 26 - Treffer 1-250 von 6'349" and upto 250 links to details
459
+ def initialize(doc)
460
+ @inputData = []
461
+ m = HitRegexpDE.match(doc.text)
462
+ @pageNr = m[1].sub("'", '').to_i
463
+ @nrSubPages = m[2].sub("'", '').to_i
464
+ @firstHit = m[3].sub("'", '').to_i
465
+ @nrHits = m[5].sub("'", '').to_i
466
+ @trademark_search_id = Swissreg::inputValue(Swissreg::getInputValuesFromPage(doc), Vivian)
467
+ @links2details = []
468
+ doc.search('input').each{ |input|
469
+ # puts "name: #{input.attribute('name')} value #{input.attribute('value')}" if $VERBOSE
470
+ @inputData << [ input.attribute('name').to_s, input.attribute('value').to_s ]
471
+ }
472
+
473
+ @state = Swissreg::inputValue(Swissreg::getInputValuesFromPage(doc), 'javax.faces.ViewState')
474
+ doc.search('a').each{
475
+ |link|
476
+ if m = /d_swissreg:mainContent:data:(\d*):tm_no_detail:id_detail/i.match(link.attribute('id'))
477
+ # puts "XXX #{link.attribute('onclick').to_s} href: #{link.attribute('href').to_s} value #{link.attribute('value').to_s}" if $VERBOSE
478
+ m = /'tmMainId','(\d*)'/.match(link.attribute('onclick').to_s)
479
+ tmMainId = m[1].to_i
480
+ @links2details << tmMainId
481
+ end
482
+ }
483
+ end
484
+
485
+ def getPostDataForDetail(position, id)
486
+ [
487
+ [ "autoScroll", "0,0"],
488
+ [ "id_swissreg:mainContent:sub_options_result:sub_fieldset:cbxHitsPerPage", "#{HitsPerPage}"],
489
+ [ "id_swissreg:mainContent:vivian", @trademark_search_id],
490
+ [ "id_swissreg_SUBMIT", "1"],
491
+ [ "id_swissreg:_idcl", "id_swissreg:mainContent:data:#{position}:tm_no_detail:id_detail", ""],
492
+ [ "id_swissreg:mainContent:scroll_1", ""],
493
+ [ "tmMainId", "#{id}"],
494
+ [ "id_swissreg:_link_hidden_ "],
495
+ [ "javax.faces.ViewState", @state]
496
+ ]
497
+ end
498
+
499
+ def getPostDataForSubpage(pageNr)
500
+ [
501
+ [ "autoScroll", "0,0"],
502
+ [ "id_swissreg:mainContent:sub_options_result:sub_fieldset:cbxHitsPerPage", "#{HitsPerPage}"],
503
+ [ "id_swissreg:mainContent:vivian", @trademark_search_id],
504
+ [ "id_swissreg_SUBMIT", "1"],
505
+ [ "id_swissreg:_idcl", "id_swissreg:mainContent:scroll_1idx#{pageNr}"],
506
+ [ "id_swissreg:mainContent:scroll_1", "idx#{pageNr}"],
507
+ [ "tmMainId", ""],
508
+ [ "id_swissreg:_link_hidden_ "],
509
+ [ "javax.faces.ViewState", @state]
510
+ ]
511
+ end
401
512
 
402
- def fetchMissingDetails
403
- @errors.each{
404
- |markennummer, info|
405
- fetchDetails(markennummer)
513
+ end
514
+
515
+ def getAllHits(filename = nil, pageNr = 1)
516
+ if filename && File.exists?(filename)
517
+ doc = Nokogiri::Slop(File.open(filename))
518
+ else
519
+ body = @agent.page.body
520
+ body.force_encoding('utf-8')
521
+ doc = Nokogiri::Slop(body)
522
+ filename = "#{LogDir}/vereinfachte_#{pageNr}.html"
523
+ writeResponse(filename)
524
+ end
525
+
526
+ einfach = Swissreg::Vereinfachte.new(doc)
527
+ puts "#{Time.now.strftime("%H:%M:%S")} status: fetch #{pageNr} of #{einfach.nrSubPages}"
528
+ subPage2Fetch = pageNr + 1
529
+ data2 = einfach.getPostDataForSubpage(subPage2Fetch).clone
530
+ if (HitsPerPage < einfach.nrHits - einfach.firstHit)
531
+ itemsToFetch = HitsPerPage
532
+ else
533
+ itemsToFetch = einfach.nrHits - einfach.firstHit
534
+ end
535
+ 0.upto(itemsToFetch-1) {
536
+ |position|
537
+ id = einfach.links2details[position]
538
+ nextId = einfach.firstHit.to_i - 1 + position.to_i
539
+ data3 = einfach.getPostDataForDetail(nextId, id)
540
+ Swissreg::setAllInputValue(@agent.page.forms.first, data3)
541
+ @agent.page.forms.first.submit
542
+ filename = "#{LogDir}/vereinfachte_detail_#{einfach.firstHit + position}.html"
543
+ writeResponse(filename)
544
+ matchResult = @agent.page.search('h1').text
545
+ unless /Detailansicht zu (Gesuch|Marke)/.match(matchResult)
546
+ puts matchResult
547
+ puts "Attention did not find 'Detailansicht' in #{filename}. Someting went wrong!"
548
+ break
549
+ end
550
+ @results << Swissreg::getMarkenInfoFromDetail(Nokogiri::Slop(@agent.page.body))
551
+ @agent.back
406
552
  }
553
+ filename = "#{LogDir}/vereinfachte_#{pageNr}_back.html"
554
+ writeResponse(filename)
555
+ if pageNr < (einfach.nrSubPages-1)
556
+ puts "Fetching page #{subPage2Fetch} of #{einfach.nrSubPages}" if $VERBOSE
557
+ Swissreg::setAllInputValue(@agent.page.forms.first, data2)
558
+ @agent.page.forms.first.submit
559
+ getAllHits(nil, subPage2Fetch)
560
+ @agent.back
561
+ end
562
+
407
563
  end
564
+
565
+ def fetchresult(filename = "#{LogDir}/fetch_1.html", counter = 1)
566
+ if filename && File.exists?(filename)
567
+ doc = Nokogiri::Slop(File.open(filename))
568
+ else
569
+ body = @agent.page.body
570
+ body.force_encoding('utf-8')
571
+ doc = Nokogiri::Slop(body)
572
+ writeResponse(filename)
573
+ end
574
+
575
+ if /Vereinfachte Trefferliste anzeigen/i.match(doc.text)
576
+ form = @agent.page.forms.first
577
+ button = form.button_with(:value => /Vereinfachte/i)
578
+ # submit the form using that button
579
+ @agent.submit(form, button)
580
+ filename = "#{LogDir}/vereinfacht.html"
581
+ writeResponse(filename)
582
+ end
583
+ getAllHits(filename, counter)
584
+ end
585
+
408
586
  end # class Swissreg
409
587
 
410
- def Brand2csv::run(timespan)
411
- session = Swissreg.new(timespan)
588
+ def Brand2csv::run(timespan, marke = 'a*')
589
+ session = Swissreg.new(timespan, marke)
412
590
  session.parse_swissreg
413
591
  session.fetchresult
414
- session.fetchMissingDetails
415
- session.emitCsv("#{timespan}.csv")
592
+ Swissreg::emitCsv(session.results, "#{timespan}.csv")
416
593
  end
417
594
 
418
595
  end # module Brand2csv
@@ -1,3 +1,3 @@
1
1
  module Brand2csv
2
- VERSION = "0.1.5"
2
+ VERSION = "0.1.6"
3
3
  end
@@ -1,3 +1,14 @@
1
+ * Work on 2013.05.27
2
+
3
+ * Should be able to fetch up to 10'000 hits.
4
+ ** Problems: Seems to hang silently after a few thousands hits
5
+ ** Cannot limit search to only "Hängige Gesuche" and "Aktive Marken"
6
+
7
+ * Work on 2013.05.26
8
+
9
+ * Added first rake tests to speed up work for 10'000 hits
10
+ * Added second parameter to limit according to trademark name as passing a timespan is not honoured by swissreg
11
+
1
12
  * Work on 2013.05.22
2
13
 
3
14
  ** Use timespan als filename
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: brand2csv
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.5
4
+ version: 0.1.6
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-05-23 00:00:00.000000000 Z
12
+ date: 2013-05-27 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: mechanize
@@ -124,6 +124,7 @@ files:
124
124
  - spike.rb
125
125
  - spike_mechanize_swissreg.rb
126
126
  - spike_watir.rb
127
+ - .gemtest
127
128
  homepage: https://github.com/zdavatz/brand2csv
128
129
  licenses: []
129
130
  post_install_message: