brand2csv 0.1.5 → 0.1.6
Sign up to get free protection for your applications and to get access to all the features.
- data/.gemtest +0 -0
- data/.gitignore +1 -0
- data/History.txt +7 -0
- data/bin/brand2csv +2 -2
- data/lib/brand2csv.rb +334 -157
- data/lib/brand2csv/version.rb +1 -1
- data/protocol.2013.05.21.textile +11 -0
- metadata +3 -2
data/.gemtest
ADDED
File without changes
|
data/.gitignore
CHANGED
data/History.txt
CHANGED
@@ -1,3 +1,10 @@
|
|
1
|
+
=== 0.1.6 27.05.2013
|
2
|
+
|
3
|
+
* Added support for parsing results up to 10'000 hits
|
4
|
+
* Added rspec
|
5
|
+
* Added (undocumented) second parameter to limit according the trademark name
|
6
|
+
* Added .travis.yml to enable running tests via http://about.travis-ci.org/docs/
|
7
|
+
|
1
8
|
=== 0.1.5 23.05.2013
|
2
9
|
|
3
10
|
* Run under Ruby 1.8.7
|
data/bin/brand2csv
CHANGED
@@ -37,13 +37,13 @@ rescue OptionParser::MissingArgument,
|
|
37
37
|
end
|
38
38
|
|
39
39
|
|
40
|
-
unless args.size
|
40
|
+
unless args.size >= 1
|
41
41
|
puts help
|
42
42
|
exit 1
|
43
43
|
end
|
44
44
|
|
45
45
|
begin
|
46
|
-
Brand2csv::run(args[0])
|
46
|
+
Brand2csv::run(args[0], args[1])
|
47
47
|
rescue Interrupt
|
48
48
|
puts "Unterbrochen. Breche mit Fehler ab"
|
49
49
|
exit 1
|
data/lib/brand2csv.rb
CHANGED
@@ -27,6 +27,11 @@ module Brand2csv
|
|
27
27
|
]
|
28
28
|
Base_uri = 'https://www.swissreg.ch'
|
29
29
|
Start_uri = "#{Base_uri}/srclient/faces/jsp/start.jsp"
|
30
|
+
Sr1 = "#{Base_uri}/srclient/faces/jsp/trademark/sr1.jsp"
|
31
|
+
Sr2 = "#{Base_uri}/srclient/faces/jsp/trademark/sr2.jsp"
|
32
|
+
Sr3 = "#{Base_uri}/srclient/faces/jsp/trademark/sr3.jsp"
|
33
|
+
Sr30 = "#{Base_uri}/srclient/faces/jsp/trademark/sr30.jsp"
|
34
|
+
Sr300 = "#{Base_uri}/srclient/faces/jsp/trademark/sr300.jsp"
|
30
35
|
AddressRegexp = /^(\d\d\d\d)\W*(.*)/
|
31
36
|
LineSplit = ', '
|
32
37
|
DefaultCountry = 'Schweiz'
|
@@ -77,20 +82,21 @@ module Brand2csv
|
|
77
82
|
|
78
83
|
|
79
84
|
MaxZeilen = 5
|
85
|
+
HitsPerPage = 250
|
86
|
+
LogDir = 'mechanize'
|
80
87
|
|
81
|
-
attr_accessor :marke
|
88
|
+
attr_accessor :marke, :results, :timespan
|
82
89
|
|
83
|
-
def initialize(timespan)
|
90
|
+
def initialize(timespan, marke = nil)
|
84
91
|
@timespan = timespan
|
85
|
-
@marke =
|
92
|
+
@marke = marke
|
86
93
|
@number = nil
|
87
|
-
@hitsPerPage = 100
|
88
94
|
|
89
95
|
@agent = Mechanize.new { |agent|
|
90
96
|
agent.user_agent = 'Mozilla/5.0 (X11; Linux x86_64; rv:16.0) Gecko/20100101 Firefox/16.0'
|
91
97
|
agent.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
92
|
-
FileUtils.makedirs
|
93
|
-
agent.log = Logger.new("
|
98
|
+
FileUtils.makedirs(LogDir) if $VERBOSE or defined?(RSpec)
|
99
|
+
agent.log = Logger.new("#{LogDir}/mechanize.log") if $VERBOSE
|
94
100
|
}
|
95
101
|
@results = []
|
96
102
|
@errors = Hash.new
|
@@ -109,7 +115,6 @@ module Brand2csv
|
|
109
115
|
# @marke = "*WEIH*"
|
110
116
|
@timespan = nil
|
111
117
|
end
|
112
|
-
@marke = 'asp*'
|
113
118
|
end
|
114
119
|
|
115
120
|
def writeResponse(filename)
|
@@ -141,6 +146,8 @@ module Brand2csv
|
|
141
146
|
}
|
142
147
|
end
|
143
148
|
|
149
|
+
UseClick = false
|
150
|
+
|
144
151
|
def parse_swissreg(timespan = @timespan, # sollte 377 Treffer ergeben, für 01.06.2007-10.06.2007, 559271 wurde in diesem Zeitraum registriert
|
145
152
|
marke = @marke,
|
146
153
|
nummer =@number) # nummer = "559271" ergibt genau einen treffer
|
@@ -150,7 +157,7 @@ module Brand2csv
|
|
150
157
|
# HTTP status code is also strange at redirection.
|
151
158
|
@agent.get Start_uri # get a cookie for the session
|
152
159
|
content = @agent.get_file Start_uri
|
153
|
-
writeResponse(
|
160
|
+
writeResponse("#{LogDir}/start.jsp")
|
154
161
|
# get only view state
|
155
162
|
@state = view_state(content)
|
156
163
|
data = [
|
@@ -160,9 +167,13 @@ module Brand2csv
|
|
160
167
|
["id_swissreg:_idcl", "id_swissreg_sub_nav_ipiNavigation_item0"],
|
161
168
|
["javax.faces.ViewState", @state],
|
162
169
|
]
|
163
|
-
|
164
|
-
|
165
|
-
|
170
|
+
if UseClick
|
171
|
+
Swissreg::setAllInputValue(@agent.page.forms.first, data)
|
172
|
+
@agent.page.forms.first.submit
|
173
|
+
else
|
174
|
+
@agent.post(Start_uri, data)
|
175
|
+
end
|
176
|
+
writeResponse("#{LogDir}/start2.jsp")
|
166
177
|
# Navigation with mechanize like this fails and returns to the home page
|
167
178
|
# @agent.page.link_with(:id => "id_swissreg_sub_nav_ipiNavigation_item0").click
|
168
179
|
|
@@ -174,20 +185,34 @@ module Brand2csv
|
|
174
185
|
["javax.faces.ViewState", @state],
|
175
186
|
]
|
176
187
|
# sr1 ist die einfache suche, sr3 die erweiterte Suche
|
177
|
-
|
178
|
-
|
179
|
-
|
188
|
+
if UseClick
|
189
|
+
Swissreg::setAllInputValue(@agent.page.forms.first, data)
|
190
|
+
@agent.page.forms.first.submit
|
191
|
+
else
|
192
|
+
@agent.post(Sr3, data)
|
193
|
+
end
|
194
|
+
writeResponse("#{LogDir}/sr3.jsp")
|
180
195
|
|
181
196
|
# Fill out form values
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
197
|
+
selectedPublicationStates = ['1', '3']
|
198
|
+
@agent.page.form('id_swissreg').checkboxes.each{
|
199
|
+
|box|
|
200
|
+
TMChoiceFields.index(box.value) ? box.check : box.uncheck
|
201
|
+
# box.check if $VERBOSE
|
202
|
+
# select all publication reasons
|
203
|
+
box.check if /id_ckbTMPubReason/.match(box.name)
|
204
|
+
# select all publication states or accept default states
|
205
|
+
# box.check if /id_ckbTMState/.match(box.name)
|
206
|
+
if /id_ckbTMState/.match(box.name)
|
207
|
+
if selectedPublicationStates.index(box.value)
|
208
|
+
puts "Select id_ckbTMState #{box.value}" if $VERBOSE
|
209
|
+
box.check
|
210
|
+
else
|
211
|
+
box.uncheck
|
212
|
+
end
|
213
|
+
end
|
214
|
+
}
|
215
|
+
if $VERBOSE and false # fill all details for marke 567120
|
191
216
|
# Felder, welche nie bei der Antwort auftauchen
|
192
217
|
@agent.page.form('id_swissreg').field(:name => 'id_swissreg:mainContent:id_txf_licensee') { |x| x.value = 'BBB Inc*' }
|
193
218
|
@agent.page.form('id_swissreg').field(:name => 'id_swissreg:mainContent:id_txf_expiryDate') { |x| x.value = timespan }
|
@@ -203,13 +228,13 @@ module Brand2csv
|
|
203
228
|
end
|
204
229
|
|
205
230
|
# Feld, welches im Resultat angezeigt wird
|
206
|
-
@agent.page.form('id_swissreg').field(:name => 'id_swissreg:mainContent:id_txf_tm_text') { |x| x.value =
|
231
|
+
@agent.page.form('id_swissreg').field(:name => 'id_swissreg:mainContent:id_txf_tm_text') { |x| x.value = @marke}
|
207
232
|
|
208
233
|
# Felder, welches nie bei der Antwort auftaucht. Ein Versuch .gsub('.', '%2E') schlug ebenfalls fehl!
|
209
234
|
@agent.page.form('id_swissreg').field(:name => 'id_swissreg:mainContent:id_txf_appDate') { |x| x.value = timespan}
|
210
235
|
|
211
236
|
# Feld, welches ebenfalls berücksichtigt wird
|
212
|
-
@agent.page.form('id_swissreg').field(:name => 'id_swissreg:mainContent:id_cbxHitsPerPage') { |x| x.value =
|
237
|
+
@agent.page.form('id_swissreg').field(:name => 'id_swissreg:mainContent:id_cbxHitsPerPage') { |x| x.value = HitsPerPage }
|
213
238
|
@agent.page.form('id_swissreg').field(:name => 'autoScroll') { |x| x.value = '0,0' }
|
214
239
|
|
215
240
|
if $VERBOSE
|
@@ -218,165 +243,190 @@ module Brand2csv
|
|
218
243
|
@agent.page.form('id_swissreg').checkboxes.each{ |box| puts "#{box.name} checked? #{box.checked}"}
|
219
244
|
end
|
220
245
|
|
221
|
-
|
246
|
+
@criteria = [
|
247
|
+
["autoScroll", "0,829"],
|
248
|
+
["id_swissreg:_link_hidden_", ""],
|
249
|
+
["id_swissreg:mainContent:id_ckbTMState", "1"], # "Hängige Gesuche 1
|
250
|
+
# ["id_swissreg:mainContent:id_ckbTMState", "2"], # "Gelöschte Gesuche 2
|
251
|
+
["id_swissreg:mainContent:id_ckbTMState", "3"], # aktive Marken 3
|
252
|
+
# ["id_swissreg:mainContent:id_ckbTMState", "4"], # gelöschte Marken 4
|
253
|
+
["id_swissreg:mainContent:id_cbxCountry", "_ALL"], # Auswahl Länder _ALL
|
254
|
+
# ["id_swissreg:mainContent:id_txf_tm_no", ""], # Marken Nr
|
255
|
+
["id_swissreg:mainContent:id_txf_tm_no", nummer],# Marken Nr
|
256
|
+
["id_swissreg:mainContent:id_txf_app_no", ""], # Gesuch Nr.
|
257
|
+
["id_swissreg:mainContent:id_txf_tm_text", marke],
|
258
|
+
["id_swissreg:mainContent:id_txf_applicant", ""], # Inhaber/in
|
259
|
+
["id_swissreg:mainContent:id_txf_agent", ""], # Vertreter/in
|
260
|
+
["id_swissreg:mainContent:id_txf_licensee", ""], # Lizenznehmer
|
261
|
+
["id_swissreg:mainContent:id_txf_nizza_class", ""], # Nizza Klassifikation Nr.
|
262
|
+
# ["id_swissreg:mainContent:id_txf_appDate", timespan], # Hinterlegungsdatum
|
263
|
+
["id_swissreg:mainContent:id_txf_appDate", timespan] ,
|
264
|
+
["id_swissreg:mainContent:id_txf_expiryDate", ""], # Ablauf Schutzfrist
|
265
|
+
# Markenart: Individualmarke 1 Kollektivmarke 2 Garantiemarke 3
|
266
|
+
["id_swissreg:mainContent:id_cbxTMTypeGrp", "_ALL"], # Markenart
|
267
|
+
["id_swissreg:mainContent:id_cbxTMForm", "_ALL"], # Markentyp
|
268
|
+
["id_swissreg:mainContent:id_cbxTMColorClaim", "_ALL"], # Farbanspruch
|
269
|
+
["id_swissreg:mainContent:id_txf_pub_date", ""], # Publikationsdatum
|
270
|
+
|
271
|
+
# info zu Publikationsgrund id_swissreg:mainContent:id_ckbTMPubReason
|
272
|
+
["id_swissreg:mainContent:id_ckbTMPubReason", "1"], #Neueintragungen
|
273
|
+
["id_swissreg:mainContent:id_ckbTMPubReason", "2"], #Berichtigungen
|
274
|
+
["id_swissreg:mainContent:id_ckbTMPubReason", "3"], #Verlängerungen
|
275
|
+
["id_swissreg:mainContent:id_ckbTMPubReason", "4"], #Löschungen
|
276
|
+
["id_swissreg:mainContent:id_ckbTMPubReason", "5"], #Inhaberänderungen
|
277
|
+
["id_swissreg:mainContent:id_ckbTMPubReason", "6"], #Vertreteränderungen
|
278
|
+
["id_swissreg:mainContent:id_ckbTMPubReason", "7"], #Lizenzänderungen
|
279
|
+
["id_swissreg:mainContent:id_ckbTMPubReason", "8"], #Weitere Registeränderungen
|
280
|
+
# ["id_swissreg:mainContent:id_ckbTMEmptyHits", "0"], # Leere Trefferliste anzeigen
|
281
|
+
|
282
|
+
# "id_swissreg:mainContent:id_cbxFormatChoice" 2 = Publikationsansicht 1 = Registeransicht
|
283
|
+
["id_swissreg:mainContent:id_cbxFormatChoice", "1"],
|
284
|
+
["id_swissreg:mainContent:id_cbxHitsPerPage", HitsPerPage], # Treffer pro Seite
|
285
|
+
]
|
286
|
+
TMChoiceFields.each{ | field2display| @criteria << ["id_swissreg:mainContent:id_ckbTMChoice", field2display] }
|
287
|
+
# id_swissreg:mainContent:id_ckbTMChoice tm_lbl_tm_text
|
288
|
+
puts "Marke ist #{marke}" if marke # Wortlaut der Marke
|
289
|
+
puts "Hinterlegungsdatum ist #{timespan}" if $VERBOSE and timespan
|
290
|
+
puts "nummer ist #{timespan}" if nummer
|
291
|
+
@criteria << ["id_swissreg:mainContent:sub_fieldset:id_submit", "suchen"]
|
292
|
+
@criteria << ["id_swissreg_SUBMIT", "1"]
|
293
|
+
@criteria << ["id_swissreg:_idcl", ""]
|
294
|
+
@criteria << ["id_swissreg:_link_hidden_", ""]
|
295
|
+
@criteria << ["javax.faces.ViewState", @state]
|
296
|
+
|
297
|
+
if true # UseClick
|
298
|
+
# Swissreg::setAllInputValue(@agent.page.forms.first, @criteria)
|
299
|
+
# setPublicationStates(@agent.page.form('id_swissreg'))
|
300
|
+
@agent.page.form('id_swissreg').click_button(@agent.page.form('id_swissreg').button_with(:value => "suchen"))
|
301
|
+
else # use post
|
302
|
+
writeResponse("#{LogDir}/vor_post_sr3.jsp")
|
303
|
+
@agent.post(Sr3, @criteria)
|
304
|
+
writeResponse("#{LogDir}/erweiterte_suche.html")
|
305
|
+
@agent.page.form('id_swissreg').click_button(@agent.page.form('id_swissreg').button_with(:value => "suchen"))
|
306
|
+
end
|
222
307
|
# Hier sollten eigentlich alle Felder auftauchen, wie
|
223
|
-
# Marke=asp*; Land (Inhaber/in)=Schweiz; Markenart=Alle; Markentyp=Alle; Farbanspruch=Alle; Publikationsgrund= Neueintragungen, Berichtigungen, Verlängerungen, Löschungen, Inhaberänderungen, Vertreteränderungen, Lizenzänderungen, Weitere Registeränderungen; Status= hängige Gesuche, aktive Marken
|
224
|
-
writeResponse(
|
308
|
+
# Marke=asp*; Land (Inhaber/in)=Schweiz; Markenart=Alle; Markentyp=Alle; Farbanspruch=Alle; Publikationsgrund= Neueintragungen, Berichtigungen, Verlängerungen, Löschungen, Inhaberänderungen, Vertreteränderungen, Lizenzänderungen, Weitere Registeränderungen; Status= hängige Gesuche, aktive Marken
|
309
|
+
writeResponse("#{LogDir}/resultate.jsp")
|
225
310
|
end
|
226
311
|
|
227
|
-
|
312
|
+
# the number is only passed to facilitate debugging
|
313
|
+
# lines are the address lines
|
314
|
+
def Swissreg::parseAddress(number, lines)
|
228
315
|
ort = nil
|
229
316
|
plz = nil
|
230
317
|
|
231
318
|
# Search for plz/address
|
232
|
-
1.upto(
|
319
|
+
1.upto(lines.length-1).each {
|
233
320
|
|cnt|
|
234
|
-
if m = AddressRegexp.match(
|
235
|
-
|
321
|
+
if m = AddressRegexp.match(lines[cnt])
|
322
|
+
lines[cnt+1] = nil
|
236
323
|
plz = m[1]; ort = m[2]
|
237
|
-
cnt.upto(MaxZeilen-1).each{ |cnt2|
|
324
|
+
cnt.upto(MaxZeilen-1).each{ |cnt2| lines[cnt2] = nil }
|
238
325
|
break
|
239
326
|
end
|
240
327
|
}
|
241
328
|
unless plz
|
242
|
-
puts "Achtung! Konnte Marke #{
|
329
|
+
puts "Achtung! Konnte Marke #{number} mit Inhaber #{lines.inspect} nicht parsen" if $VERBOSE
|
243
330
|
return nil, nil, nil, nil, nil, nil, nil, nil
|
244
331
|
end
|
245
332
|
# search for lines with only digits
|
246
333
|
found = false
|
247
|
-
1.upto(
|
334
|
+
1.upto(lines.length-1).each {
|
248
335
|
|cnt|
|
249
|
-
break if
|
250
|
-
if /^\d*$/.match(
|
336
|
+
break if lines[cnt] == nil
|
337
|
+
if /^\d*$/.match(lines[cnt])
|
251
338
|
found = true
|
252
|
-
if
|
339
|
+
if lines[cnt+1] == nil
|
253
340
|
found = 'before'
|
254
|
-
|
255
|
-
|
341
|
+
lines[cnt-1] += LineSplit + lines[cnt]
|
342
|
+
lines.delete_at(cnt)
|
256
343
|
else
|
257
344
|
found = 'after'
|
258
|
-
|
259
|
-
|
345
|
+
lines[cnt] += LineSplit + lines[cnt+1]
|
346
|
+
lines.delete_at(cnt+1)
|
260
347
|
end
|
261
348
|
end
|
262
349
|
}
|
263
|
-
puts "found #{found}: #{
|
264
|
-
return
|
350
|
+
puts "found #{found}: #{lines.inspect}" if found and $VERBOSE
|
351
|
+
return lines[0], lines[1], lines[2], lines[3], lines[4], plz, ort
|
265
352
|
end
|
266
353
|
|
267
|
-
def
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
354
|
+
def Swissreg::getInputValuesFromPage(body) # body of HTML page
|
355
|
+
contentData = []
|
356
|
+
body.search('input').each{ |input|
|
357
|
+
# puts "name: #{input.attribute('name')} value #{input.attribute('value')}"
|
358
|
+
contentData << [ input.attribute('name').to_s, input.attribute('value').to_s ]
|
359
|
+
}
|
360
|
+
contentData
|
361
|
+
end
|
362
|
+
|
363
|
+
# return value of an array of POST values
|
364
|
+
def Swissreg::inputValue(values, key)
|
365
|
+
values.each{ |val|
|
366
|
+
return val[1] if key.eql?(val[0])
|
367
|
+
}
|
368
|
+
return nil
|
369
|
+
end
|
370
|
+
|
371
|
+
# set value for a key of an array of POST values
|
372
|
+
def Swissreg::setInputValue(values, key, newValue)
|
373
|
+
values.each{ |val|
|
374
|
+
if key.eql?(val[0])
|
375
|
+
val[1] = newValue
|
376
|
+
return
|
377
|
+
end
|
378
|
+
}
|
379
|
+
return
|
380
|
+
end
|
381
|
+
|
382
|
+
def Swissreg::setAllInputValue(form, values)
|
383
|
+
values.each{ |newValue|
|
384
|
+
# puts "x: 0 #{ newValue[0].to_s} 1 #{newValue[1].to_s}"
|
385
|
+
form.field(:name => newValue[0].to_s) { |elem|
|
386
|
+
next if elem == nil # puts "Cannot set #{newValue[0].to_s}"
|
387
|
+
elem.value = newValue[1].to_s
|
388
|
+
}
|
389
|
+
}
|
302
390
|
end
|
303
391
|
|
304
|
-
def
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
392
|
+
def Swissreg::getMarkenInfoFromDetail(doc)
|
393
|
+
marke = nil
|
394
|
+
number = 'invalid'
|
395
|
+
bezeichnung = nil
|
396
|
+
inhaber = nil
|
397
|
+
hinterlegungsdatum = nil
|
398
|
+
zeilen = []
|
399
|
+
doc.xpath("//html/body/form/div/div/fieldset/div/table/tbody/tr").each{
|
400
|
+
|x|
|
401
|
+
if x.children.first.text.eql?('Marke')
|
402
|
+
if x.children[1].text.index('Markenabbildung')
|
403
|
+
# we must fetch the link to the image
|
404
|
+
bezeichnung = x.children[1].elements.first.attribute('href').text
|
405
|
+
else # we got a trademark
|
406
|
+
bezeichnung = x.children[1].text
|
407
|
+
end
|
408
|
+
end
|
409
|
+
if x.children.first.text.eql?('Inhaber/in')
|
410
|
+
inhaber = />(.*)<\/td/.match(x.children[1].to_s)[1].gsub('<br>',LineSplit)
|
411
|
+
x.children[1].children.each{ |child| zeilen << child.text unless child.text.length == 0 } # avoid adding <br>
|
412
|
+
end
|
413
|
+
hinterlegungsdatum = x.children[1].text if x.children.first.text.eql?('Hinterlegungsdatum')
|
414
|
+
number = x.children[1].text if x.children.first.text.eql?('Gesuch Nr.')
|
323
415
|
}
|
324
|
-
|
325
|
-
|
326
|
-
|elem|
|
327
|
-
bezeichnung = elem.elements[1].text
|
328
|
-
land = elem.elements[4].text
|
329
|
-
next unless /#{DefaultCountry}/i.match(land)
|
330
|
-
inhaber = elem.elements[3].text
|
331
|
-
nummer = elem.elements[2].text
|
332
|
-
if bezeichnung.length == 0
|
333
|
-
bezeichnung = elem.children[1].children[0].children[0].children[0].attribute('src').to_s
|
334
|
-
end
|
335
|
-
zeile_1, zeile_2, zeile_3, zeile_4, zeile_5, plz, ort = parseAddress(nummer, inhaber.split(LineSplit))
|
336
|
-
if zeile_1
|
337
|
-
@results << Marke.new(bezeichnung, elem.elements[2].text, elem.elements[3].text, land, elem.elements[5].text,
|
338
|
-
zeile_1, zeile_2, zeile_3, zeile_4, zeile_5, plz, ort )
|
339
|
-
else
|
340
|
-
nrFailures += 1
|
341
|
-
@errors[nummer] = Marke.new(bezeichnung, elem.elements[2].text, elem.elements[3].text, land, elem.elements[5].text,
|
342
|
-
zeile_1, zeile_2, zeile_3, zeile_4, zeile_5, plz, ort )
|
343
|
-
end
|
344
|
-
} if doc.xpath(path_name)
|
345
|
-
if hasNext
|
346
|
-
@path = "/srclient/faces/jsp/trademark/sr30.jsp"
|
347
|
-
puts "Calling sub #{counter} with #{@path}" if $VERBOSE
|
348
|
-
data = [
|
349
|
-
["autoScroll", "0,0"],
|
350
|
-
["id_swissreg:mainContent:id_sub_options_result:sub_fieldset:id_cbxHitsPerPage", @hitsPerPage],
|
351
|
-
# ["id_swissreg:mainContent:vivian", "TRADEMARK REGISTER SEARCH TIMES: QUERY=[20] SELECT=[823] SERVER=[846] DELEGATE=[861] (HITS=[96])"],
|
352
|
-
["id_swissreg_SUBMIT", "1"],
|
353
|
-
["id_swissreg:_idcl", "id_swissreg:mainContent:scroll_1idx#{counter}"],
|
354
|
-
["id_swissreg:mainContent:scroll_1", "idx#{counter}"],
|
355
|
-
["tmMainId", ""],
|
356
|
-
["id_swissreg:_link_hidden_ "],
|
357
|
-
["javax.faces.ViewState", @state],
|
358
|
-
]
|
359
|
-
TMChoiceFields.each{ | field2display| data << ["id_swissreg:mainContent:id_sub_options_result:id_ckbTMChoice", field2display] }
|
360
|
-
response = @agent.post(Base_uri + @path, data)
|
361
|
-
writeResponse("mechanize/resultate_#{counter}.html")
|
362
|
-
checkErrors(response.body)
|
363
|
-
fetchresult(nil, counter)
|
364
|
-
else
|
365
|
-
puts "Es gab #{nrFailures} Fehler beim Lesen von #{filename}" if $VERBOSE
|
366
|
-
puts "Fand #{@results.size} Datensätze für die Zeitspanne '#{@timespan}'. Von #{@errors.size} muss die Adresse noch geholt werden."
|
367
|
-
end
|
416
|
+
zeile_1, zeile_2, zeile_3, zeile_4, zeile_5, plz, ort = Swissreg::parseAddress(number, zeilen)
|
417
|
+
marke = Marke.new(bezeichnung, number, inhaber, DefaultCountry, hinterlegungsdatum, zeile_1, zeile_2, zeile_3, zeile_4, zeile_5, plz, ort )
|
368
418
|
end
|
369
|
-
|
370
|
-
def emitCsv(filename='ausgabe.csv')
|
371
|
-
return if
|
419
|
+
|
420
|
+
def Swissreg::emitCsv(results, filename='ausgabe.csv')
|
421
|
+
return if results == nil or results.size == 0
|
372
422
|
if /^1\.8/.match(RUBY_VERSION)
|
373
423
|
ausgabe = File.open(filename, 'w+')
|
374
424
|
# Write header
|
375
425
|
s=''
|
376
|
-
|
426
|
+
results[0].members.each { |member| s += member + ';' }
|
377
427
|
ausgabe.puts s.chop
|
378
428
|
# write all line
|
379
|
-
|
429
|
+
results.each{
|
380
430
|
|result|
|
381
431
|
s = ''
|
382
432
|
result.members.each{ |member|
|
@@ -391,28 +441,155 @@ module Brand2csv
|
|
391
441
|
ausgabe.puts s.chop
|
392
442
|
}
|
393
443
|
else
|
394
|
-
CSV.open(filename, 'w', :headers
|
444
|
+
CSV.open(filename, 'w', :headers=>results[0].members,
|
395
445
|
:write_headers => true,
|
396
446
|
:col_sep => ';'
|
397
|
-
) do |csv|
|
447
|
+
) do |csv| results.each{ |x| csv << x }
|
398
448
|
end
|
399
449
|
end
|
400
450
|
end
|
451
|
+
|
452
|
+
class Swissreg::Vereinfachte
|
453
|
+
attr_reader :links2details, :trademark_search_id, :inputData, :firstHit, :nrHits, :nrSubPages, :pageNr
|
454
|
+
HitRegexpDE = /Seite (\d*) von ([\d']*) - Treffer ([\d']*)-([\d']*) von ([\d']*)/
|
455
|
+
Vivian = 'id_swissreg:mainContent:vivian'
|
456
|
+
|
457
|
+
# Parse a HTML page from swissreg sr3.jsp
|
458
|
+
# There we find info like "Seite 1 von 26 - Treffer 1-250 von 6'349" and upto 250 links to details
|
459
|
+
def initialize(doc)
|
460
|
+
@inputData = []
|
461
|
+
m = HitRegexpDE.match(doc.text)
|
462
|
+
@pageNr = m[1].sub("'", '').to_i
|
463
|
+
@nrSubPages = m[2].sub("'", '').to_i
|
464
|
+
@firstHit = m[3].sub("'", '').to_i
|
465
|
+
@nrHits = m[5].sub("'", '').to_i
|
466
|
+
@trademark_search_id = Swissreg::inputValue(Swissreg::getInputValuesFromPage(doc), Vivian)
|
467
|
+
@links2details = []
|
468
|
+
doc.search('input').each{ |input|
|
469
|
+
# puts "name: #{input.attribute('name')} value #{input.attribute('value')}" if $VERBOSE
|
470
|
+
@inputData << [ input.attribute('name').to_s, input.attribute('value').to_s ]
|
471
|
+
}
|
472
|
+
|
473
|
+
@state = Swissreg::inputValue(Swissreg::getInputValuesFromPage(doc), 'javax.faces.ViewState')
|
474
|
+
doc.search('a').each{
|
475
|
+
|link|
|
476
|
+
if m = /d_swissreg:mainContent:data:(\d*):tm_no_detail:id_detail/i.match(link.attribute('id'))
|
477
|
+
# puts "XXX #{link.attribute('onclick').to_s} href: #{link.attribute('href').to_s} value #{link.attribute('value').to_s}" if $VERBOSE
|
478
|
+
m = /'tmMainId','(\d*)'/.match(link.attribute('onclick').to_s)
|
479
|
+
tmMainId = m[1].to_i
|
480
|
+
@links2details << tmMainId
|
481
|
+
end
|
482
|
+
}
|
483
|
+
end
|
484
|
+
|
485
|
+
def getPostDataForDetail(position, id)
|
486
|
+
[
|
487
|
+
[ "autoScroll", "0,0"],
|
488
|
+
[ "id_swissreg:mainContent:sub_options_result:sub_fieldset:cbxHitsPerPage", "#{HitsPerPage}"],
|
489
|
+
[ "id_swissreg:mainContent:vivian", @trademark_search_id],
|
490
|
+
[ "id_swissreg_SUBMIT", "1"],
|
491
|
+
[ "id_swissreg:_idcl", "id_swissreg:mainContent:data:#{position}:tm_no_detail:id_detail", ""],
|
492
|
+
[ "id_swissreg:mainContent:scroll_1", ""],
|
493
|
+
[ "tmMainId", "#{id}"],
|
494
|
+
[ "id_swissreg:_link_hidden_ "],
|
495
|
+
[ "javax.faces.ViewState", @state]
|
496
|
+
]
|
497
|
+
end
|
498
|
+
|
499
|
+
def getPostDataForSubpage(pageNr)
|
500
|
+
[
|
501
|
+
[ "autoScroll", "0,0"],
|
502
|
+
[ "id_swissreg:mainContent:sub_options_result:sub_fieldset:cbxHitsPerPage", "#{HitsPerPage}"],
|
503
|
+
[ "id_swissreg:mainContent:vivian", @trademark_search_id],
|
504
|
+
[ "id_swissreg_SUBMIT", "1"],
|
505
|
+
[ "id_swissreg:_idcl", "id_swissreg:mainContent:scroll_1idx#{pageNr}"],
|
506
|
+
[ "id_swissreg:mainContent:scroll_1", "idx#{pageNr}"],
|
507
|
+
[ "tmMainId", ""],
|
508
|
+
[ "id_swissreg:_link_hidden_ "],
|
509
|
+
[ "javax.faces.ViewState", @state]
|
510
|
+
]
|
511
|
+
end
|
401
512
|
|
402
|
-
|
403
|
-
|
404
|
-
|
405
|
-
|
513
|
+
end
|
514
|
+
|
515
|
+
def getAllHits(filename = nil, pageNr = 1)
|
516
|
+
if filename && File.exists?(filename)
|
517
|
+
doc = Nokogiri::Slop(File.open(filename))
|
518
|
+
else
|
519
|
+
body = @agent.page.body
|
520
|
+
body.force_encoding('utf-8')
|
521
|
+
doc = Nokogiri::Slop(body)
|
522
|
+
filename = "#{LogDir}/vereinfachte_#{pageNr}.html"
|
523
|
+
writeResponse(filename)
|
524
|
+
end
|
525
|
+
|
526
|
+
einfach = Swissreg::Vereinfachte.new(doc)
|
527
|
+
puts "#{Time.now.strftime("%H:%M:%S")} status: fetch #{pageNr} of #{einfach.nrSubPages}"
|
528
|
+
subPage2Fetch = pageNr + 1
|
529
|
+
data2 = einfach.getPostDataForSubpage(subPage2Fetch).clone
|
530
|
+
if (HitsPerPage < einfach.nrHits - einfach.firstHit)
|
531
|
+
itemsToFetch = HitsPerPage
|
532
|
+
else
|
533
|
+
itemsToFetch = einfach.nrHits - einfach.firstHit
|
534
|
+
end
|
535
|
+
0.upto(itemsToFetch-1) {
|
536
|
+
|position|
|
537
|
+
id = einfach.links2details[position]
|
538
|
+
nextId = einfach.firstHit.to_i - 1 + position.to_i
|
539
|
+
data3 = einfach.getPostDataForDetail(nextId, id)
|
540
|
+
Swissreg::setAllInputValue(@agent.page.forms.first, data3)
|
541
|
+
@agent.page.forms.first.submit
|
542
|
+
filename = "#{LogDir}/vereinfachte_detail_#{einfach.firstHit + position}.html"
|
543
|
+
writeResponse(filename)
|
544
|
+
matchResult = @agent.page.search('h1').text
|
545
|
+
unless /Detailansicht zu (Gesuch|Marke)/.match(matchResult)
|
546
|
+
puts matchResult
|
547
|
+
puts "Attention did not find 'Detailansicht' in #{filename}. Someting went wrong!"
|
548
|
+
break
|
549
|
+
end
|
550
|
+
@results << Swissreg::getMarkenInfoFromDetail(Nokogiri::Slop(@agent.page.body))
|
551
|
+
@agent.back
|
406
552
|
}
|
553
|
+
filename = "#{LogDir}/vereinfachte_#{pageNr}_back.html"
|
554
|
+
writeResponse(filename)
|
555
|
+
if pageNr < (einfach.nrSubPages-1)
|
556
|
+
puts "Fetching page #{subPage2Fetch} of #{einfach.nrSubPages}" if $VERBOSE
|
557
|
+
Swissreg::setAllInputValue(@agent.page.forms.first, data2)
|
558
|
+
@agent.page.forms.first.submit
|
559
|
+
getAllHits(nil, subPage2Fetch)
|
560
|
+
@agent.back
|
561
|
+
end
|
562
|
+
|
407
563
|
end
|
564
|
+
|
565
|
+
def fetchresult(filename = "#{LogDir}/fetch_1.html", counter = 1)
|
566
|
+
if filename && File.exists?(filename)
|
567
|
+
doc = Nokogiri::Slop(File.open(filename))
|
568
|
+
else
|
569
|
+
body = @agent.page.body
|
570
|
+
body.force_encoding('utf-8')
|
571
|
+
doc = Nokogiri::Slop(body)
|
572
|
+
writeResponse(filename)
|
573
|
+
end
|
574
|
+
|
575
|
+
if /Vereinfachte Trefferliste anzeigen/i.match(doc.text)
|
576
|
+
form = @agent.page.forms.first
|
577
|
+
button = form.button_with(:value => /Vereinfachte/i)
|
578
|
+
# submit the form using that button
|
579
|
+
@agent.submit(form, button)
|
580
|
+
filename = "#{LogDir}/vereinfacht.html"
|
581
|
+
writeResponse(filename)
|
582
|
+
end
|
583
|
+
getAllHits(filename, counter)
|
584
|
+
end
|
585
|
+
|
408
586
|
end # class Swissreg
|
409
587
|
|
410
|
-
def Brand2csv::run(timespan)
|
411
|
-
session = Swissreg.new(timespan)
|
588
|
+
def Brand2csv::run(timespan, marke = 'a*')
|
589
|
+
session = Swissreg.new(timespan, marke)
|
412
590
|
session.parse_swissreg
|
413
591
|
session.fetchresult
|
414
|
-
session.
|
415
|
-
session.emitCsv("#{timespan}.csv")
|
592
|
+
Swissreg::emitCsv(session.results, "#{timespan}.csv")
|
416
593
|
end
|
417
594
|
|
418
595
|
end # module Brand2csv
|
data/lib/brand2csv/version.rb
CHANGED
data/protocol.2013.05.21.textile
CHANGED
@@ -1,3 +1,14 @@
|
|
1
|
+
* Work on 2013.05.27
|
2
|
+
|
3
|
+
* Should be able to fetch up to 10'000 hits.
|
4
|
+
** Problems: Seems to hang silently after a few thousands hits
|
5
|
+
** Cannot limit search to only "Hängige Gesuche" and "Aktive Marken"
|
6
|
+
|
7
|
+
* Work on 2013.05.26
|
8
|
+
|
9
|
+
* Added first rake tests to speed up work for 10'000 hits
|
10
|
+
* Added second parameter to limit according to trademark name as passing a timespan is not honoured by swissreg
|
11
|
+
|
1
12
|
* Work on 2013.05.22
|
2
13
|
|
3
14
|
** Use timespan als filename
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: brand2csv
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.6
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-05-
|
12
|
+
date: 2013-05-27 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: mechanize
|
@@ -124,6 +124,7 @@ files:
|
|
124
124
|
- spike.rb
|
125
125
|
- spike_mechanize_swissreg.rb
|
126
126
|
- spike_watir.rb
|
127
|
+
- .gemtest
|
127
128
|
homepage: https://github.com/zdavatz/brand2csv
|
128
129
|
licenses: []
|
129
130
|
post_install_message:
|