brand2csv 0.1.5 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gemtest +0 -0
- data/.gitignore +1 -0
- data/History.txt +7 -0
- data/bin/brand2csv +2 -2
- data/lib/brand2csv.rb +334 -157
- data/lib/brand2csv/version.rb +1 -1
- data/protocol.2013.05.21.textile +11 -0
- metadata +3 -2
data/.gemtest
ADDED
File without changes
|
data/.gitignore
CHANGED
data/History.txt
CHANGED
@@ -1,3 +1,10 @@
|
|
1
|
+
=== 0.1.6 27.05.2013
|
2
|
+
|
3
|
+
* Added support for parsing results up to 10'000 hits
|
4
|
+
* Added rspec
|
5
|
+
* Added (undocumented) second parameter to limit according the trademark name
|
6
|
+
* Added .travis.yml to enable running tests via http://about.travis-ci.org/docs/
|
7
|
+
|
1
8
|
=== 0.1.5 23.05.2013
|
2
9
|
|
3
10
|
* Run under Ruby 1.8.7
|
data/bin/brand2csv
CHANGED
@@ -37,13 +37,13 @@ rescue OptionParser::MissingArgument,
|
|
37
37
|
end
|
38
38
|
|
39
39
|
|
40
|
-
unless args.size
|
40
|
+
unless args.size >= 1
|
41
41
|
puts help
|
42
42
|
exit 1
|
43
43
|
end
|
44
44
|
|
45
45
|
begin
|
46
|
-
Brand2csv::run(args[0])
|
46
|
+
Brand2csv::run(args[0], args[1])
|
47
47
|
rescue Interrupt
|
48
48
|
puts "Unterbrochen. Breche mit Fehler ab"
|
49
49
|
exit 1
|
data/lib/brand2csv.rb
CHANGED
@@ -27,6 +27,11 @@ module Brand2csv
|
|
27
27
|
]
|
28
28
|
Base_uri = 'https://www.swissreg.ch'
|
29
29
|
Start_uri = "#{Base_uri}/srclient/faces/jsp/start.jsp"
|
30
|
+
Sr1 = "#{Base_uri}/srclient/faces/jsp/trademark/sr1.jsp"
|
31
|
+
Sr2 = "#{Base_uri}/srclient/faces/jsp/trademark/sr2.jsp"
|
32
|
+
Sr3 = "#{Base_uri}/srclient/faces/jsp/trademark/sr3.jsp"
|
33
|
+
Sr30 = "#{Base_uri}/srclient/faces/jsp/trademark/sr30.jsp"
|
34
|
+
Sr300 = "#{Base_uri}/srclient/faces/jsp/trademark/sr300.jsp"
|
30
35
|
AddressRegexp = /^(\d\d\d\d)\W*(.*)/
|
31
36
|
LineSplit = ', '
|
32
37
|
DefaultCountry = 'Schweiz'
|
@@ -77,20 +82,21 @@ module Brand2csv
|
|
77
82
|
|
78
83
|
|
79
84
|
MaxZeilen = 5
|
85
|
+
HitsPerPage = 250
|
86
|
+
LogDir = 'mechanize'
|
80
87
|
|
81
|
-
attr_accessor :marke
|
88
|
+
attr_accessor :marke, :results, :timespan
|
82
89
|
|
83
|
-
def initialize(timespan)
|
90
|
+
def initialize(timespan, marke = nil)
|
84
91
|
@timespan = timespan
|
85
|
-
@marke =
|
92
|
+
@marke = marke
|
86
93
|
@number = nil
|
87
|
-
@hitsPerPage = 100
|
88
94
|
|
89
95
|
@agent = Mechanize.new { |agent|
|
90
96
|
agent.user_agent = 'Mozilla/5.0 (X11; Linux x86_64; rv:16.0) Gecko/20100101 Firefox/16.0'
|
91
97
|
agent.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
92
|
-
FileUtils.makedirs
|
93
|
-
agent.log = Logger.new("
|
98
|
+
FileUtils.makedirs(LogDir) if $VERBOSE or defined?(RSpec)
|
99
|
+
agent.log = Logger.new("#{LogDir}/mechanize.log") if $VERBOSE
|
94
100
|
}
|
95
101
|
@results = []
|
96
102
|
@errors = Hash.new
|
@@ -109,7 +115,6 @@ module Brand2csv
|
|
109
115
|
# @marke = "*WEIH*"
|
110
116
|
@timespan = nil
|
111
117
|
end
|
112
|
-
@marke = 'asp*'
|
113
118
|
end
|
114
119
|
|
115
120
|
def writeResponse(filename)
|
@@ -141,6 +146,8 @@ module Brand2csv
|
|
141
146
|
}
|
142
147
|
end
|
143
148
|
|
149
|
+
UseClick = false
|
150
|
+
|
144
151
|
def parse_swissreg(timespan = @timespan, # sollte 377 Treffer ergeben, für 01.06.2007-10.06.2007, 559271 wurde in diesem Zeitraum registriert
|
145
152
|
marke = @marke,
|
146
153
|
nummer =@number) # nummer = "559271" ergibt genau einen treffer
|
@@ -150,7 +157,7 @@ module Brand2csv
|
|
150
157
|
# HTTP status code is also strange at redirection.
|
151
158
|
@agent.get Start_uri # get a cookie for the session
|
152
159
|
content = @agent.get_file Start_uri
|
153
|
-
writeResponse(
|
160
|
+
writeResponse("#{LogDir}/start.jsp")
|
154
161
|
# get only view state
|
155
162
|
@state = view_state(content)
|
156
163
|
data = [
|
@@ -160,9 +167,13 @@ module Brand2csv
|
|
160
167
|
["id_swissreg:_idcl", "id_swissreg_sub_nav_ipiNavigation_item0"],
|
161
168
|
["javax.faces.ViewState", @state],
|
162
169
|
]
|
163
|
-
|
164
|
-
|
165
|
-
|
170
|
+
if UseClick
|
171
|
+
Swissreg::setAllInputValue(@agent.page.forms.first, data)
|
172
|
+
@agent.page.forms.first.submit
|
173
|
+
else
|
174
|
+
@agent.post(Start_uri, data)
|
175
|
+
end
|
176
|
+
writeResponse("#{LogDir}/start2.jsp")
|
166
177
|
# Navigation with mechanize like this fails and returns to the home page
|
167
178
|
# @agent.page.link_with(:id => "id_swissreg_sub_nav_ipiNavigation_item0").click
|
168
179
|
|
@@ -174,20 +185,34 @@ module Brand2csv
|
|
174
185
|
["javax.faces.ViewState", @state],
|
175
186
|
]
|
176
187
|
# sr1 ist die einfache suche, sr3 die erweiterte Suche
|
177
|
-
|
178
|
-
|
179
|
-
|
188
|
+
if UseClick
|
189
|
+
Swissreg::setAllInputValue(@agent.page.forms.first, data)
|
190
|
+
@agent.page.forms.first.submit
|
191
|
+
else
|
192
|
+
@agent.post(Sr3, data)
|
193
|
+
end
|
194
|
+
writeResponse("#{LogDir}/sr3.jsp")
|
180
195
|
|
181
196
|
# Fill out form values
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
197
|
+
selectedPublicationStates = ['1', '3']
|
198
|
+
@agent.page.form('id_swissreg').checkboxes.each{
|
199
|
+
|box|
|
200
|
+
TMChoiceFields.index(box.value) ? box.check : box.uncheck
|
201
|
+
# box.check if $VERBOSE
|
202
|
+
# select all publication reasons
|
203
|
+
box.check if /id_ckbTMPubReason/.match(box.name)
|
204
|
+
# select all publication states or accept default states
|
205
|
+
# box.check if /id_ckbTMState/.match(box.name)
|
206
|
+
if /id_ckbTMState/.match(box.name)
|
207
|
+
if selectedPublicationStates.index(box.value)
|
208
|
+
puts "Select id_ckbTMState #{box.value}" if $VERBOSE
|
209
|
+
box.check
|
210
|
+
else
|
211
|
+
box.uncheck
|
212
|
+
end
|
213
|
+
end
|
214
|
+
}
|
215
|
+
if $VERBOSE and false # fill all details for marke 567120
|
191
216
|
# Felder, welche nie bei der Antwort auftauchen
|
192
217
|
@agent.page.form('id_swissreg').field(:name => 'id_swissreg:mainContent:id_txf_licensee') { |x| x.value = 'BBB Inc*' }
|
193
218
|
@agent.page.form('id_swissreg').field(:name => 'id_swissreg:mainContent:id_txf_expiryDate') { |x| x.value = timespan }
|
@@ -203,13 +228,13 @@ module Brand2csv
|
|
203
228
|
end
|
204
229
|
|
205
230
|
# Feld, welches im Resultat angezeigt wird
|
206
|
-
@agent.page.form('id_swissreg').field(:name => 'id_swissreg:mainContent:id_txf_tm_text') { |x| x.value =
|
231
|
+
@agent.page.form('id_swissreg').field(:name => 'id_swissreg:mainContent:id_txf_tm_text') { |x| x.value = @marke}
|
207
232
|
|
208
233
|
# Felder, welches nie bei der Antwort auftaucht. Ein Versuch .gsub('.', '%2E') schlug ebenfalls fehl!
|
209
234
|
@agent.page.form('id_swissreg').field(:name => 'id_swissreg:mainContent:id_txf_appDate') { |x| x.value = timespan}
|
210
235
|
|
211
236
|
# Feld, welches ebenfalls berücksichtigt wird
|
212
|
-
@agent.page.form('id_swissreg').field(:name => 'id_swissreg:mainContent:id_cbxHitsPerPage') { |x| x.value =
|
237
|
+
@agent.page.form('id_swissreg').field(:name => 'id_swissreg:mainContent:id_cbxHitsPerPage') { |x| x.value = HitsPerPage }
|
213
238
|
@agent.page.form('id_swissreg').field(:name => 'autoScroll') { |x| x.value = '0,0' }
|
214
239
|
|
215
240
|
if $VERBOSE
|
@@ -218,165 +243,190 @@ module Brand2csv
|
|
218
243
|
@agent.page.form('id_swissreg').checkboxes.each{ |box| puts "#{box.name} checked? #{box.checked}"}
|
219
244
|
end
|
220
245
|
|
221
|
-
|
246
|
+
@criteria = [
|
247
|
+
["autoScroll", "0,829"],
|
248
|
+
["id_swissreg:_link_hidden_", ""],
|
249
|
+
["id_swissreg:mainContent:id_ckbTMState", "1"], # "Hängige Gesuche 1
|
250
|
+
# ["id_swissreg:mainContent:id_ckbTMState", "2"], # "Gelöschte Gesuche 2
|
251
|
+
["id_swissreg:mainContent:id_ckbTMState", "3"], # aktive Marken 3
|
252
|
+
# ["id_swissreg:mainContent:id_ckbTMState", "4"], # gelöschte Marken 4
|
253
|
+
["id_swissreg:mainContent:id_cbxCountry", "_ALL"], # Auswahl Länder _ALL
|
254
|
+
# ["id_swissreg:mainContent:id_txf_tm_no", ""], # Marken Nr
|
255
|
+
["id_swissreg:mainContent:id_txf_tm_no", nummer],# Marken Nr
|
256
|
+
["id_swissreg:mainContent:id_txf_app_no", ""], # Gesuch Nr.
|
257
|
+
["id_swissreg:mainContent:id_txf_tm_text", marke],
|
258
|
+
["id_swissreg:mainContent:id_txf_applicant", ""], # Inhaber/in
|
259
|
+
["id_swissreg:mainContent:id_txf_agent", ""], # Vertreter/in
|
260
|
+
["id_swissreg:mainContent:id_txf_licensee", ""], # Lizenznehmer
|
261
|
+
["id_swissreg:mainContent:id_txf_nizza_class", ""], # Nizza Klassifikation Nr.
|
262
|
+
# ["id_swissreg:mainContent:id_txf_appDate", timespan], # Hinterlegungsdatum
|
263
|
+
["id_swissreg:mainContent:id_txf_appDate", timespan] ,
|
264
|
+
["id_swissreg:mainContent:id_txf_expiryDate", ""], # Ablauf Schutzfrist
|
265
|
+
# Markenart: Individualmarke 1 Kollektivmarke 2 Garantiemarke 3
|
266
|
+
["id_swissreg:mainContent:id_cbxTMTypeGrp", "_ALL"], # Markenart
|
267
|
+
["id_swissreg:mainContent:id_cbxTMForm", "_ALL"], # Markentyp
|
268
|
+
["id_swissreg:mainContent:id_cbxTMColorClaim", "_ALL"], # Farbanspruch
|
269
|
+
["id_swissreg:mainContent:id_txf_pub_date", ""], # Publikationsdatum
|
270
|
+
|
271
|
+
# info zu Publikationsgrund id_swissreg:mainContent:id_ckbTMPubReason
|
272
|
+
["id_swissreg:mainContent:id_ckbTMPubReason", "1"], #Neueintragungen
|
273
|
+
["id_swissreg:mainContent:id_ckbTMPubReason", "2"], #Berichtigungen
|
274
|
+
["id_swissreg:mainContent:id_ckbTMPubReason", "3"], #Verlängerungen
|
275
|
+
["id_swissreg:mainContent:id_ckbTMPubReason", "4"], #Löschungen
|
276
|
+
["id_swissreg:mainContent:id_ckbTMPubReason", "5"], #Inhaberänderungen
|
277
|
+
["id_swissreg:mainContent:id_ckbTMPubReason", "6"], #Vertreteränderungen
|
278
|
+
["id_swissreg:mainContent:id_ckbTMPubReason", "7"], #Lizenzänderungen
|
279
|
+
["id_swissreg:mainContent:id_ckbTMPubReason", "8"], #Weitere Registeränderungen
|
280
|
+
# ["id_swissreg:mainContent:id_ckbTMEmptyHits", "0"], # Leere Trefferliste anzeigen
|
281
|
+
|
282
|
+
# "id_swissreg:mainContent:id_cbxFormatChoice" 2 = Publikationsansicht 1 = Registeransicht
|
283
|
+
["id_swissreg:mainContent:id_cbxFormatChoice", "1"],
|
284
|
+
["id_swissreg:mainContent:id_cbxHitsPerPage", HitsPerPage], # Treffer pro Seite
|
285
|
+
]
|
286
|
+
TMChoiceFields.each{ | field2display| @criteria << ["id_swissreg:mainContent:id_ckbTMChoice", field2display] }
|
287
|
+
# id_swissreg:mainContent:id_ckbTMChoice tm_lbl_tm_text
|
288
|
+
puts "Marke ist #{marke}" if marke # Wortlaut der Marke
|
289
|
+
puts "Hinterlegungsdatum ist #{timespan}" if $VERBOSE and timespan
|
290
|
+
puts "nummer ist #{timespan}" if nummer
|
291
|
+
@criteria << ["id_swissreg:mainContent:sub_fieldset:id_submit", "suchen"]
|
292
|
+
@criteria << ["id_swissreg_SUBMIT", "1"]
|
293
|
+
@criteria << ["id_swissreg:_idcl", ""]
|
294
|
+
@criteria << ["id_swissreg:_link_hidden_", ""]
|
295
|
+
@criteria << ["javax.faces.ViewState", @state]
|
296
|
+
|
297
|
+
if true # UseClick
|
298
|
+
# Swissreg::setAllInputValue(@agent.page.forms.first, @criteria)
|
299
|
+
# setPublicationStates(@agent.page.form('id_swissreg'))
|
300
|
+
@agent.page.form('id_swissreg').click_button(@agent.page.form('id_swissreg').button_with(:value => "suchen"))
|
301
|
+
else # use post
|
302
|
+
writeResponse("#{LogDir}/vor_post_sr3.jsp")
|
303
|
+
@agent.post(Sr3, @criteria)
|
304
|
+
writeResponse("#{LogDir}/erweiterte_suche.html")
|
305
|
+
@agent.page.form('id_swissreg').click_button(@agent.page.form('id_swissreg').button_with(:value => "suchen"))
|
306
|
+
end
|
222
307
|
# Hier sollten eigentlich alle Felder auftauchen, wie
|
223
|
-
# Marke=asp*; Land (Inhaber/in)=Schweiz; Markenart=Alle; Markentyp=Alle; Farbanspruch=Alle; Publikationsgrund= Neueintragungen, Berichtigungen, Verlängerungen, Löschungen, Inhaberänderungen, Vertreteränderungen, Lizenzänderungen, Weitere Registeränderungen; Status= hängige Gesuche, aktive Marken
|
224
|
-
writeResponse(
|
308
|
+
# Marke=asp*; Land (Inhaber/in)=Schweiz; Markenart=Alle; Markentyp=Alle; Farbanspruch=Alle; Publikationsgrund= Neueintragungen, Berichtigungen, Verlängerungen, Löschungen, Inhaberänderungen, Vertreteränderungen, Lizenzänderungen, Weitere Registeränderungen; Status= hängige Gesuche, aktive Marken
|
309
|
+
writeResponse("#{LogDir}/resultate.jsp")
|
225
310
|
end
|
226
311
|
|
227
|
-
|
312
|
+
# the number is only passed to facilitate debugging
|
313
|
+
# lines are the address lines
|
314
|
+
def Swissreg::parseAddress(number, lines)
|
228
315
|
ort = nil
|
229
316
|
plz = nil
|
230
317
|
|
231
318
|
# Search for plz/address
|
232
|
-
1.upto(
|
319
|
+
1.upto(lines.length-1).each {
|
233
320
|
|cnt|
|
234
|
-
if m = AddressRegexp.match(
|
235
|
-
|
321
|
+
if m = AddressRegexp.match(lines[cnt])
|
322
|
+
lines[cnt+1] = nil
|
236
323
|
plz = m[1]; ort = m[2]
|
237
|
-
cnt.upto(MaxZeilen-1).each{ |cnt2|
|
324
|
+
cnt.upto(MaxZeilen-1).each{ |cnt2| lines[cnt2] = nil }
|
238
325
|
break
|
239
326
|
end
|
240
327
|
}
|
241
328
|
unless plz
|
242
|
-
puts "Achtung! Konnte Marke #{
|
329
|
+
puts "Achtung! Konnte Marke #{number} mit Inhaber #{lines.inspect} nicht parsen" if $VERBOSE
|
243
330
|
return nil, nil, nil, nil, nil, nil, nil, nil
|
244
331
|
end
|
245
332
|
# search for lines with only digits
|
246
333
|
found = false
|
247
|
-
1.upto(
|
334
|
+
1.upto(lines.length-1).each {
|
248
335
|
|cnt|
|
249
|
-
break if
|
250
|
-
if /^\d*$/.match(
|
336
|
+
break if lines[cnt] == nil
|
337
|
+
if /^\d*$/.match(lines[cnt])
|
251
338
|
found = true
|
252
|
-
if
|
339
|
+
if lines[cnt+1] == nil
|
253
340
|
found = 'before'
|
254
|
-
|
255
|
-
|
341
|
+
lines[cnt-1] += LineSplit + lines[cnt]
|
342
|
+
lines.delete_at(cnt)
|
256
343
|
else
|
257
344
|
found = 'after'
|
258
|
-
|
259
|
-
|
345
|
+
lines[cnt] += LineSplit + lines[cnt+1]
|
346
|
+
lines.delete_at(cnt+1)
|
260
347
|
end
|
261
348
|
end
|
262
349
|
}
|
263
|
-
puts "found #{found}: #{
|
264
|
-
return
|
350
|
+
puts "found #{found}: #{lines.inspect}" if found and $VERBOSE
|
351
|
+
return lines[0], lines[1], lines[2], lines[3], lines[4], plz, ort
|
265
352
|
end
|
266
353
|
|
267
|
-
def
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
354
|
+
def Swissreg::getInputValuesFromPage(body) # body of HTML page
|
355
|
+
contentData = []
|
356
|
+
body.search('input').each{ |input|
|
357
|
+
# puts "name: #{input.attribute('name')} value #{input.attribute('value')}"
|
358
|
+
contentData << [ input.attribute('name').to_s, input.attribute('value').to_s ]
|
359
|
+
}
|
360
|
+
contentData
|
361
|
+
end
|
362
|
+
|
363
|
+
# return value of an array of POST values
|
364
|
+
def Swissreg::inputValue(values, key)
|
365
|
+
values.each{ |val|
|
366
|
+
return val[1] if key.eql?(val[0])
|
367
|
+
}
|
368
|
+
return nil
|
369
|
+
end
|
370
|
+
|
371
|
+
# set value for a key of an array of POST values
|
372
|
+
def Swissreg::setInputValue(values, key, newValue)
|
373
|
+
values.each{ |val|
|
374
|
+
if key.eql?(val[0])
|
375
|
+
val[1] = newValue
|
376
|
+
return
|
377
|
+
end
|
378
|
+
}
|
379
|
+
return
|
380
|
+
end
|
381
|
+
|
382
|
+
def Swissreg::setAllInputValue(form, values)
|
383
|
+
values.each{ |newValue|
|
384
|
+
# puts "x: 0 #{ newValue[0].to_s} 1 #{newValue[1].to_s}"
|
385
|
+
form.field(:name => newValue[0].to_s) { |elem|
|
386
|
+
next if elem == nil # puts "Cannot set #{newValue[0].to_s}"
|
387
|
+
elem.value = newValue[1].to_s
|
388
|
+
}
|
389
|
+
}
|
302
390
|
end
|
303
391
|
|
304
|
-
def
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
392
|
+
def Swissreg::getMarkenInfoFromDetail(doc)
|
393
|
+
marke = nil
|
394
|
+
number = 'invalid'
|
395
|
+
bezeichnung = nil
|
396
|
+
inhaber = nil
|
397
|
+
hinterlegungsdatum = nil
|
398
|
+
zeilen = []
|
399
|
+
doc.xpath("//html/body/form/div/div/fieldset/div/table/tbody/tr").each{
|
400
|
+
|x|
|
401
|
+
if x.children.first.text.eql?('Marke')
|
402
|
+
if x.children[1].text.index('Markenabbildung')
|
403
|
+
# we must fetch the link to the image
|
404
|
+
bezeichnung = x.children[1].elements.first.attribute('href').text
|
405
|
+
else # we got a trademark
|
406
|
+
bezeichnung = x.children[1].text
|
407
|
+
end
|
408
|
+
end
|
409
|
+
if x.children.first.text.eql?('Inhaber/in')
|
410
|
+
inhaber = />(.*)<\/td/.match(x.children[1].to_s)[1].gsub('<br>',LineSplit)
|
411
|
+
x.children[1].children.each{ |child| zeilen << child.text unless child.text.length == 0 } # avoid adding <br>
|
412
|
+
end
|
413
|
+
hinterlegungsdatum = x.children[1].text if x.children.first.text.eql?('Hinterlegungsdatum')
|
414
|
+
number = x.children[1].text if x.children.first.text.eql?('Gesuch Nr.')
|
323
415
|
}
|
324
|
-
|
325
|
-
|
326
|
-
|elem|
|
327
|
-
bezeichnung = elem.elements[1].text
|
328
|
-
land = elem.elements[4].text
|
329
|
-
next unless /#{DefaultCountry}/i.match(land)
|
330
|
-
inhaber = elem.elements[3].text
|
331
|
-
nummer = elem.elements[2].text
|
332
|
-
if bezeichnung.length == 0
|
333
|
-
bezeichnung = elem.children[1].children[0].children[0].children[0].attribute('src').to_s
|
334
|
-
end
|
335
|
-
zeile_1, zeile_2, zeile_3, zeile_4, zeile_5, plz, ort = parseAddress(nummer, inhaber.split(LineSplit))
|
336
|
-
if zeile_1
|
337
|
-
@results << Marke.new(bezeichnung, elem.elements[2].text, elem.elements[3].text, land, elem.elements[5].text,
|
338
|
-
zeile_1, zeile_2, zeile_3, zeile_4, zeile_5, plz, ort )
|
339
|
-
else
|
340
|
-
nrFailures += 1
|
341
|
-
@errors[nummer] = Marke.new(bezeichnung, elem.elements[2].text, elem.elements[3].text, land, elem.elements[5].text,
|
342
|
-
zeile_1, zeile_2, zeile_3, zeile_4, zeile_5, plz, ort )
|
343
|
-
end
|
344
|
-
} if doc.xpath(path_name)
|
345
|
-
if hasNext
|
346
|
-
@path = "/srclient/faces/jsp/trademark/sr30.jsp"
|
347
|
-
puts "Calling sub #{counter} with #{@path}" if $VERBOSE
|
348
|
-
data = [
|
349
|
-
["autoScroll", "0,0"],
|
350
|
-
["id_swissreg:mainContent:id_sub_options_result:sub_fieldset:id_cbxHitsPerPage", @hitsPerPage],
|
351
|
-
# ["id_swissreg:mainContent:vivian", "TRADEMARK REGISTER SEARCH TIMES: QUERY=[20] SELECT=[823] SERVER=[846] DELEGATE=[861] (HITS=[96])"],
|
352
|
-
["id_swissreg_SUBMIT", "1"],
|
353
|
-
["id_swissreg:_idcl", "id_swissreg:mainContent:scroll_1idx#{counter}"],
|
354
|
-
["id_swissreg:mainContent:scroll_1", "idx#{counter}"],
|
355
|
-
["tmMainId", ""],
|
356
|
-
["id_swissreg:_link_hidden_ "],
|
357
|
-
["javax.faces.ViewState", @state],
|
358
|
-
]
|
359
|
-
TMChoiceFields.each{ | field2display| data << ["id_swissreg:mainContent:id_sub_options_result:id_ckbTMChoice", field2display] }
|
360
|
-
response = @agent.post(Base_uri + @path, data)
|
361
|
-
writeResponse("mechanize/resultate_#{counter}.html")
|
362
|
-
checkErrors(response.body)
|
363
|
-
fetchresult(nil, counter)
|
364
|
-
else
|
365
|
-
puts "Es gab #{nrFailures} Fehler beim Lesen von #{filename}" if $VERBOSE
|
366
|
-
puts "Fand #{@results.size} Datensätze für die Zeitspanne '#{@timespan}'. Von #{@errors.size} muss die Adresse noch geholt werden."
|
367
|
-
end
|
416
|
+
zeile_1, zeile_2, zeile_3, zeile_4, zeile_5, plz, ort = Swissreg::parseAddress(number, zeilen)
|
417
|
+
marke = Marke.new(bezeichnung, number, inhaber, DefaultCountry, hinterlegungsdatum, zeile_1, zeile_2, zeile_3, zeile_4, zeile_5, plz, ort )
|
368
418
|
end
|
369
|
-
|
370
|
-
def emitCsv(filename='ausgabe.csv')
|
371
|
-
return if
|
419
|
+
|
420
|
+
def Swissreg::emitCsv(results, filename='ausgabe.csv')
|
421
|
+
return if results == nil or results.size == 0
|
372
422
|
if /^1\.8/.match(RUBY_VERSION)
|
373
423
|
ausgabe = File.open(filename, 'w+')
|
374
424
|
# Write header
|
375
425
|
s=''
|
376
|
-
|
426
|
+
results[0].members.each { |member| s += member + ';' }
|
377
427
|
ausgabe.puts s.chop
|
378
428
|
# write all line
|
379
|
-
|
429
|
+
results.each{
|
380
430
|
|result|
|
381
431
|
s = ''
|
382
432
|
result.members.each{ |member|
|
@@ -391,28 +441,155 @@ module Brand2csv
|
|
391
441
|
ausgabe.puts s.chop
|
392
442
|
}
|
393
443
|
else
|
394
|
-
CSV.open(filename, 'w', :headers
|
444
|
+
CSV.open(filename, 'w', :headers=>results[0].members,
|
395
445
|
:write_headers => true,
|
396
446
|
:col_sep => ';'
|
397
|
-
) do |csv|
|
447
|
+
) do |csv| results.each{ |x| csv << x }
|
398
448
|
end
|
399
449
|
end
|
400
450
|
end
|
451
|
+
|
452
|
+
class Swissreg::Vereinfachte
|
453
|
+
attr_reader :links2details, :trademark_search_id, :inputData, :firstHit, :nrHits, :nrSubPages, :pageNr
|
454
|
+
HitRegexpDE = /Seite (\d*) von ([\d']*) - Treffer ([\d']*)-([\d']*) von ([\d']*)/
|
455
|
+
Vivian = 'id_swissreg:mainContent:vivian'
|
456
|
+
|
457
|
+
# Parse a HTML page from swissreg sr3.jsp
|
458
|
+
# There we find info like "Seite 1 von 26 - Treffer 1-250 von 6'349" and upto 250 links to details
|
459
|
+
def initialize(doc)
|
460
|
+
@inputData = []
|
461
|
+
m = HitRegexpDE.match(doc.text)
|
462
|
+
@pageNr = m[1].sub("'", '').to_i
|
463
|
+
@nrSubPages = m[2].sub("'", '').to_i
|
464
|
+
@firstHit = m[3].sub("'", '').to_i
|
465
|
+
@nrHits = m[5].sub("'", '').to_i
|
466
|
+
@trademark_search_id = Swissreg::inputValue(Swissreg::getInputValuesFromPage(doc), Vivian)
|
467
|
+
@links2details = []
|
468
|
+
doc.search('input').each{ |input|
|
469
|
+
# puts "name: #{input.attribute('name')} value #{input.attribute('value')}" if $VERBOSE
|
470
|
+
@inputData << [ input.attribute('name').to_s, input.attribute('value').to_s ]
|
471
|
+
}
|
472
|
+
|
473
|
+
@state = Swissreg::inputValue(Swissreg::getInputValuesFromPage(doc), 'javax.faces.ViewState')
|
474
|
+
doc.search('a').each{
|
475
|
+
|link|
|
476
|
+
if m = /d_swissreg:mainContent:data:(\d*):tm_no_detail:id_detail/i.match(link.attribute('id'))
|
477
|
+
# puts "XXX #{link.attribute('onclick').to_s} href: #{link.attribute('href').to_s} value #{link.attribute('value').to_s}" if $VERBOSE
|
478
|
+
m = /'tmMainId','(\d*)'/.match(link.attribute('onclick').to_s)
|
479
|
+
tmMainId = m[1].to_i
|
480
|
+
@links2details << tmMainId
|
481
|
+
end
|
482
|
+
}
|
483
|
+
end
|
484
|
+
|
485
|
+
def getPostDataForDetail(position, id)
|
486
|
+
[
|
487
|
+
[ "autoScroll", "0,0"],
|
488
|
+
[ "id_swissreg:mainContent:sub_options_result:sub_fieldset:cbxHitsPerPage", "#{HitsPerPage}"],
|
489
|
+
[ "id_swissreg:mainContent:vivian", @trademark_search_id],
|
490
|
+
[ "id_swissreg_SUBMIT", "1"],
|
491
|
+
[ "id_swissreg:_idcl", "id_swissreg:mainContent:data:#{position}:tm_no_detail:id_detail", ""],
|
492
|
+
[ "id_swissreg:mainContent:scroll_1", ""],
|
493
|
+
[ "tmMainId", "#{id}"],
|
494
|
+
[ "id_swissreg:_link_hidden_ "],
|
495
|
+
[ "javax.faces.ViewState", @state]
|
496
|
+
]
|
497
|
+
end
|
498
|
+
|
499
|
+
def getPostDataForSubpage(pageNr)
|
500
|
+
[
|
501
|
+
[ "autoScroll", "0,0"],
|
502
|
+
[ "id_swissreg:mainContent:sub_options_result:sub_fieldset:cbxHitsPerPage", "#{HitsPerPage}"],
|
503
|
+
[ "id_swissreg:mainContent:vivian", @trademark_search_id],
|
504
|
+
[ "id_swissreg_SUBMIT", "1"],
|
505
|
+
[ "id_swissreg:_idcl", "id_swissreg:mainContent:scroll_1idx#{pageNr}"],
|
506
|
+
[ "id_swissreg:mainContent:scroll_1", "idx#{pageNr}"],
|
507
|
+
[ "tmMainId", ""],
|
508
|
+
[ "id_swissreg:_link_hidden_ "],
|
509
|
+
[ "javax.faces.ViewState", @state]
|
510
|
+
]
|
511
|
+
end
|
401
512
|
|
402
|
-
|
403
|
-
|
404
|
-
|
405
|
-
|
513
|
+
end
|
514
|
+
|
515
|
+
def getAllHits(filename = nil, pageNr = 1)
|
516
|
+
if filename && File.exists?(filename)
|
517
|
+
doc = Nokogiri::Slop(File.open(filename))
|
518
|
+
else
|
519
|
+
body = @agent.page.body
|
520
|
+
body.force_encoding('utf-8')
|
521
|
+
doc = Nokogiri::Slop(body)
|
522
|
+
filename = "#{LogDir}/vereinfachte_#{pageNr}.html"
|
523
|
+
writeResponse(filename)
|
524
|
+
end
|
525
|
+
|
526
|
+
einfach = Swissreg::Vereinfachte.new(doc)
|
527
|
+
puts "#{Time.now.strftime("%H:%M:%S")} status: fetch #{pageNr} of #{einfach.nrSubPages}"
|
528
|
+
subPage2Fetch = pageNr + 1
|
529
|
+
data2 = einfach.getPostDataForSubpage(subPage2Fetch).clone
|
530
|
+
if (HitsPerPage < einfach.nrHits - einfach.firstHit)
|
531
|
+
itemsToFetch = HitsPerPage
|
532
|
+
else
|
533
|
+
itemsToFetch = einfach.nrHits - einfach.firstHit
|
534
|
+
end
|
535
|
+
0.upto(itemsToFetch-1) {
|
536
|
+
|position|
|
537
|
+
id = einfach.links2details[position]
|
538
|
+
nextId = einfach.firstHit.to_i - 1 + position.to_i
|
539
|
+
data3 = einfach.getPostDataForDetail(nextId, id)
|
540
|
+
Swissreg::setAllInputValue(@agent.page.forms.first, data3)
|
541
|
+
@agent.page.forms.first.submit
|
542
|
+
filename = "#{LogDir}/vereinfachte_detail_#{einfach.firstHit + position}.html"
|
543
|
+
writeResponse(filename)
|
544
|
+
matchResult = @agent.page.search('h1').text
|
545
|
+
unless /Detailansicht zu (Gesuch|Marke)/.match(matchResult)
|
546
|
+
puts matchResult
|
547
|
+
puts "Attention did not find 'Detailansicht' in #{filename}. Someting went wrong!"
|
548
|
+
break
|
549
|
+
end
|
550
|
+
@results << Swissreg::getMarkenInfoFromDetail(Nokogiri::Slop(@agent.page.body))
|
551
|
+
@agent.back
|
406
552
|
}
|
553
|
+
filename = "#{LogDir}/vereinfachte_#{pageNr}_back.html"
|
554
|
+
writeResponse(filename)
|
555
|
+
if pageNr < (einfach.nrSubPages-1)
|
556
|
+
puts "Fetching page #{subPage2Fetch} of #{einfach.nrSubPages}" if $VERBOSE
|
557
|
+
Swissreg::setAllInputValue(@agent.page.forms.first, data2)
|
558
|
+
@agent.page.forms.first.submit
|
559
|
+
getAllHits(nil, subPage2Fetch)
|
560
|
+
@agent.back
|
561
|
+
end
|
562
|
+
|
407
563
|
end
|
564
|
+
|
565
|
+
def fetchresult(filename = "#{LogDir}/fetch_1.html", counter = 1)
|
566
|
+
if filename && File.exists?(filename)
|
567
|
+
doc = Nokogiri::Slop(File.open(filename))
|
568
|
+
else
|
569
|
+
body = @agent.page.body
|
570
|
+
body.force_encoding('utf-8')
|
571
|
+
doc = Nokogiri::Slop(body)
|
572
|
+
writeResponse(filename)
|
573
|
+
end
|
574
|
+
|
575
|
+
if /Vereinfachte Trefferliste anzeigen/i.match(doc.text)
|
576
|
+
form = @agent.page.forms.first
|
577
|
+
button = form.button_with(:value => /Vereinfachte/i)
|
578
|
+
# submit the form using that button
|
579
|
+
@agent.submit(form, button)
|
580
|
+
filename = "#{LogDir}/vereinfacht.html"
|
581
|
+
writeResponse(filename)
|
582
|
+
end
|
583
|
+
getAllHits(filename, counter)
|
584
|
+
end
|
585
|
+
|
408
586
|
end # class Swissreg
|
409
587
|
|
410
|
-
def Brand2csv::run(timespan)
|
411
|
-
session = Swissreg.new(timespan)
|
588
|
+
def Brand2csv::run(timespan, marke = 'a*')
|
589
|
+
session = Swissreg.new(timespan, marke)
|
412
590
|
session.parse_swissreg
|
413
591
|
session.fetchresult
|
414
|
-
session.
|
415
|
-
session.emitCsv("#{timespan}.csv")
|
592
|
+
Swissreg::emitCsv(session.results, "#{timespan}.csv")
|
416
593
|
end
|
417
594
|
|
418
595
|
end # module Brand2csv
|
data/lib/brand2csv/version.rb
CHANGED
data/protocol.2013.05.21.textile
CHANGED
@@ -1,3 +1,14 @@
|
|
1
|
+
* Work on 2013.05.27
|
2
|
+
|
3
|
+
* Should be able to fetch up to 10'000 hits.
|
4
|
+
** Problems: Seems to hang silently after a few thousands hits
|
5
|
+
** Cannot limit search to only "Hängige Gesuche" and "Aktive Marken"
|
6
|
+
|
7
|
+
* Work on 2013.05.26
|
8
|
+
|
9
|
+
* Added first rake tests to speed up work for 10'000 hits
|
10
|
+
* Added second parameter to limit according to trademark name as passing a timespan is not honoured by swissreg
|
11
|
+
|
1
12
|
* Work on 2013.05.22
|
2
13
|
|
3
14
|
** Use timespan als filename
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: brand2csv
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.6
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-05-
|
12
|
+
date: 2013-05-27 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: mechanize
|
@@ -124,6 +124,7 @@ files:
|
|
124
124
|
- spike.rb
|
125
125
|
- spike_mechanize_swissreg.rb
|
126
126
|
- spike_watir.rb
|
127
|
+
- .gemtest
|
127
128
|
homepage: https://github.com/zdavatz/brand2csv
|
128
129
|
licenses: []
|
129
130
|
post_install_message:
|