brand2csv 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +3 -0
- data/.rspec +1 -0
- data/Gemfile +19 -0
- data/Gemfile.lock +73 -0
- data/History.txt +12 -0
- data/LICENCE.txt +515 -0
- data/LICENSE +675 -0
- data/Manifest.txt +19 -0
- data/README.md +6 -0
- data/Rakefile +23 -0
- data/lib/brand2csv.rb +341 -0
- data/lib/brand2csv/version.rb +3 -0
- data/protocol.2013.05.12.textile +56 -0
- data/protocol.2013.05.15.textile +49 -0
- data/protocol.2013.05.21.textile +24 -0
- data/resultat_1.html +697 -0
- data/spike.rb +491 -0
- data/spike_mechanize_swissreg.rb +312 -0
- data/spike_watir.rb +58 -0
- metadata +151 -0
data/Manifest.txt
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
.gitignore
|
2
|
+
.rspec
|
3
|
+
Gemfile
|
4
|
+
Gemfile.lock
|
5
|
+
History.txt
|
6
|
+
LICENCE.txt
|
7
|
+
LICENSE
|
8
|
+
Manifest.txt
|
9
|
+
README.md
|
10
|
+
Rakefile
|
11
|
+
lib/brand2csv.rb
|
12
|
+
lib/brand2csv/version.rb
|
13
|
+
protocol.2013.05.12.textile
|
14
|
+
protocol.2013.05.15.textile
|
15
|
+
protocol.2013.05.21.textile
|
16
|
+
resultat_1.html
|
17
|
+
spike.rb
|
18
|
+
spike_mechanize_swissreg.rb
|
19
|
+
spike_watir.rb
|
data/README.md
ADDED
data/Rakefile
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# encoding: utf-8
|
3
|
+
|
4
|
+
require 'rubygems'
|
5
|
+
require 'hoe'
|
6
|
+
|
7
|
+
Hoe.spec "brand2csv" do
|
8
|
+
self.author = "Niklaus Giger, Zeno R.R. Davatz" # gem.authors
|
9
|
+
self.email = "yasaka@ywesee.com, zdavatz@ywesee.com"
|
10
|
+
self.description = "brand2csv creates csv files for swiss brand registered in a specific time period.
|
11
|
+
The csv contains the brand, link to image (if present), link to the detailinfo at swissreg.ch, name and address of owner (Inhaber)"
|
12
|
+
self.summary = "brand2csv creates csv files for swiss brands."
|
13
|
+
self.urls = ["https://github.com/zdavatz/brand2csv"] # gem.homepage
|
14
|
+
|
15
|
+
# gem.add_runtime_dependency
|
16
|
+
self.extra_deps << ['mechanize', '>= 2.6']
|
17
|
+
|
18
|
+
# gem.add_development_dependency
|
19
|
+
self.extra_dev_deps << ['rspec']
|
20
|
+
self.extra_dev_deps << ['webmock']
|
21
|
+
self.extra_dev_deps << ['hoe', '>= 3.4']
|
22
|
+
self.extra_dev_deps << ['rdoc']
|
23
|
+
end
|
data/lib/brand2csv.rb
ADDED
@@ -0,0 +1,341 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# encoding: utf-8
|
3
|
+
|
4
|
+
require "brand2csv/version"
|
5
|
+
require 'mechanize'
|
6
|
+
require 'prettyprint'
|
7
|
+
require 'optparse'
|
8
|
+
require 'csv'
|
9
|
+
|
10
|
+
module Brand2csv
|
11
|
+
|
12
|
+
class Marke < Struct.new(:name, :markennummer, :inhaber, :land, :hinterlegungsdatum, :zeile_1, :zeile_2, :zeile_3, :zeile_4, :zeile_5, :plz, :ort)
|
13
|
+
end
|
14
|
+
|
15
|
+
class Swissreg
|
16
|
+
|
17
|
+
# Weitere gesehene Fehler
|
18
|
+
BekannteFehler =
|
19
|
+
['Das Datum ist ung', # ültig'
|
20
|
+
'Erweiterte Suche',
|
21
|
+
'Vereinfachte Trefferliste anzeigen',
|
22
|
+
'Es wurden keine Daten gefunden.',
|
23
|
+
'Die Suchkriterien sind teilweise unzul', # ässig',
|
24
|
+
'Geben Sie mindestens ein Suchkriterium ein',
|
25
|
+
'Die Suche wurde abgebrochen, da die maximale Suchzeit von 60 Sekunden',
|
26
|
+
]
|
27
|
+
Base_uri = 'https://www.swissreg.ch'
|
28
|
+
Start_uri = "#{Base_uri}/srclient/faces/jsp/start.jsp"
|
29
|
+
AddressRegexp = /^(\d\d\d\d)\W*(.*)/
|
30
|
+
LineSplit = ', '
|
31
|
+
DefaultCountry = 'Schweiz'
|
32
|
+
# Angezeigte Spalten "id_swissreg:mainContent:id_ckbTMChoice"
|
33
|
+
TMChoiceFields = [
|
34
|
+
"tm_lbl_tm_text", # Marke
|
35
|
+
# "tm_lbl_state"], # Status
|
36
|
+
# "tm_lbl_nizza_class"], # Nizza Klassifikation Nr.
|
37
|
+
# "tm_lbl_no"], # disabled="disabled"], # Nummer
|
38
|
+
"tm_lbl_applicant", # Inhaber/in
|
39
|
+
"tm_lbl_country", # Land (Inhaber/in)
|
40
|
+
# "tm_lbl_agent", # Vertreter/in
|
41
|
+
# "tm_lbl_licensee"], # Lizenznehmer/in
|
42
|
+
"tm_lbl_app_date", # Hinterlegungsdatum
|
43
|
+
]
|
44
|
+
|
45
|
+
attr_accessor :marke
|
46
|
+
|
47
|
+
def initialize(timespan)
|
48
|
+
@timespan = timespan
|
49
|
+
@agent = Mechanize.new { |agent|
|
50
|
+
# agent.user_agent_alias = 'Mac Safari'
|
51
|
+
agent.user_agent = 'Mozilla/5.0 (X11; Linux x86_64; rv:16.0) Gecko/20100101 Firefox/16.0'
|
52
|
+
# agent.redirection_limit = 5
|
53
|
+
agent.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
54
|
+
}
|
55
|
+
@results = []
|
56
|
+
@errors = Hash.new
|
57
|
+
@lastResponse = nil
|
58
|
+
@lastDetail =nil
|
59
|
+
@counterDetails = 0
|
60
|
+
@marke = 'zzzyyzzzzyzzyz*' # => Fehlermeldung: Es wurden keine Daten gefunden
|
61
|
+
# asp* => 138 records werden geholt
|
62
|
+
# a* => Es wurden 25,490 Treffer gefunden. Davon werden 10000 zufällig ausgewählte Schutztitel angezeigt. Bitte schränken Sie Ihre Suche weiter ein.
|
63
|
+
# Ab 501 Treffer wird eine vereinfachte Trefferliste angezeigt.
|
64
|
+
# asp* => 138 records werden geholt
|
65
|
+
|
66
|
+
@marke = nil # => Fehlermeldung: Geben Sie mindestens ein Suchkriterium ein
|
67
|
+
@marke = 'asp*'
|
68
|
+
@number = '500000'
|
69
|
+
@number = nil
|
70
|
+
# @marke = "*WEIH*"
|
71
|
+
@hitsPerPage = 100
|
72
|
+
end
|
73
|
+
|
74
|
+
def writeResponse(filename, body)
|
75
|
+
if defined?(RSpec)
|
76
|
+
ausgabe = File.open(filename, 'w+')
|
77
|
+
ausgabe.puts body
|
78
|
+
ausgabe.close
|
79
|
+
else
|
80
|
+
puts "Skipping writing #{filename}" if $VERBOSE
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
def view_state(response)
|
85
|
+
if match = /javax.faces.ViewState.*?value="([^"]+)"/u.match(response.force_encoding('utf-8'))
|
86
|
+
match[1]
|
87
|
+
else
|
88
|
+
""
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
def checkErrors(body)
|
93
|
+
BekannteFehler.each {
|
94
|
+
|errMsg|
|
95
|
+
if body.to_s.index(errMsg)
|
96
|
+
puts "Tut mir leid. Suche wurde mit Fehlermeldung <#{errMsg}> abgebrochen."
|
97
|
+
exit 2
|
98
|
+
end
|
99
|
+
}
|
100
|
+
end
|
101
|
+
|
102
|
+
def parse_swissreg(timespan = @timespan, # sollte 377 Treffer ergeben, für 01.06.2007-10.06.2007, 559271 wurde in diesem Zeitraum registriert
|
103
|
+
marke = @marke,
|
104
|
+
nummer =@number) # nummer = "559271" ergibt genau einen treffer
|
105
|
+
@agent.get Start_uri # get a cookie for the session
|
106
|
+
content = @agent.get_file Start_uri
|
107
|
+
FileUtils.makedirs 'mechanize'
|
108
|
+
writeResponse('mechanize/main.html', content)
|
109
|
+
@state = view_state(content)
|
110
|
+
data = [
|
111
|
+
["autoScroll", "0,0"],
|
112
|
+
["id_swissreg:_link_hidden_", ""],
|
113
|
+
["id_swissreg_SUBMIT", "1"],
|
114
|
+
["id_swissreg:_idcl", "id_swissreg_sub_nav_ipiNavigation_item0"],
|
115
|
+
["javax.faces.ViewState", @state],
|
116
|
+
]
|
117
|
+
|
118
|
+
content = @agent.post(Start_uri, data)
|
119
|
+
writeResponse('mechanize/einfache_suche.html', content.body)
|
120
|
+
|
121
|
+
data = [
|
122
|
+
["autoScroll", "0,0"],
|
123
|
+
["id_swissreg:_link_hidden_", ""],
|
124
|
+
["id_swissreg_SUBMIT", "1"],
|
125
|
+
["id_swissreg:_idcl", "id_swissreg_sub_nav_ipiNavigation_item0_item3"],
|
126
|
+
["javax.faces.ViewState", @state],
|
127
|
+
]
|
128
|
+
# sr1 ist die einfache suche, sr3 die erweiterte Suche
|
129
|
+
@path = "/srclient/faces/jsp/trademark/sr3.jsp"
|
130
|
+
response = @agent.post(Base_uri + @path, data)
|
131
|
+
writeResponse('mechanize/erweiterte_suche.html', response.body)
|
132
|
+
# Bis hier alles okay
|
133
|
+
@criteria = [
|
134
|
+
["autoScroll", "0,829"],
|
135
|
+
["id_swissreg:_link_hidden_", ""],
|
136
|
+
["id_swissreg:mainContent:id_ckbTMState", "1"], # "Hängige Gesuche 1
|
137
|
+
# ["id_swissreg:mainContent:id_ckbTMState", "2"], # "Gelöschte Gesuche 2
|
138
|
+
["id_swissreg:mainContent:id_ckbTMState", "3"], # aktive Marken 3
|
139
|
+
# ["id_swissreg:mainContent:id_ckbTMState", "4"], # gelöschte Marken 4
|
140
|
+
["id_swissreg:mainContent:id_cbxCountry", "_ALL"], # Auswahl Länder _ALL
|
141
|
+
# ["id_swissreg:mainContent:id_txf_tm_no", ""], # Marken Nr
|
142
|
+
["id_swissreg:mainContent:id_txf_tm_no", nummer],# Marken Nr
|
143
|
+
["id_swissreg:mainContent:id_txf_app_no", ""], # Gesuch Nr.
|
144
|
+
["id_swissreg:mainContent:id_txf_tm_text", marke],
|
145
|
+
["id_swissreg:mainContent:id_txf_applicant", ""], # Inhaber/in
|
146
|
+
["id_swissreg:mainContent:id_txf_agent", ""], # Vertreter/in
|
147
|
+
["id_swissreg:mainContent:id_txf_licensee", ""], # Lizenznehmer
|
148
|
+
["id_swissreg:mainContent:id_txf_nizza_class", ""], # Nizza Klassifikation Nr.
|
149
|
+
# ["id_swissreg:mainContent:id_txf_appDate", timespan], # Hinterlegungsdatum
|
150
|
+
["id_swissreg:mainContent:id_txf_appDate", timespan] ,
|
151
|
+
["id_swissreg:mainContent:id_txf_expiryDate", ""], # Ablauf Schutzfrist
|
152
|
+
# Markenart: Individualmarke 1 Kollektivmarke 2 Garantiemarke 3
|
153
|
+
["id_swissreg:mainContent:id_cbxTMTypeGrp", "_ALL"], # Markenart
|
154
|
+
["id_swissreg:mainContent:id_cbxTMForm", "_ALL"], # Markentyp
|
155
|
+
["id_swissreg:mainContent:id_cbxTMColorClaim", "_ALL"], # Farbanspruch
|
156
|
+
["id_swissreg:mainContent:id_txf_pub_date", ""], # Publikationsdatum
|
157
|
+
|
158
|
+
# info zu Publikationsgrund id_swissreg:mainContent:id_ckbTMPubReason
|
159
|
+
["id_swissreg:mainContent:id_ckbTMPubReason", "1"], #Neueintragungen
|
160
|
+
["id_swissreg:mainContent:id_ckbTMPubReason", "2"], #Berichtigungen
|
161
|
+
["id_swissreg:mainContent:id_ckbTMPubReason", "3"], #Verlängerungen
|
162
|
+
["id_swissreg:mainContent:id_ckbTMPubReason", "4"], #Löschungen
|
163
|
+
["id_swissreg:mainContent:id_ckbTMPubReason", "5"], #Inhaberänderungen
|
164
|
+
["id_swissreg:mainContent:id_ckbTMPubReason", "6"], #Vertreteränderungen
|
165
|
+
["id_swissreg:mainContent:id_ckbTMPubReason", "7"], #Lizenzänderungen
|
166
|
+
["id_swissreg:mainContent:id_ckbTMPubReason", "8"], #Weitere Registeränderungen
|
167
|
+
# ["id_swissreg:mainContent:id_ckbTMEmptyHits", "0"], # Leere Trefferliste anzeigen
|
168
|
+
|
169
|
+
# "id_swissreg:mainContent:id_cbxFormatChoice" 2 = Publikationsansicht 1 = Registeransicht
|
170
|
+
["id_swissreg:mainContent:id_cbxFormatChoice", "1"],
|
171
|
+
["id_swissreg:mainContent:id_cbxHitsPerPage", @hitsPerPage], # Treffer pro Seite
|
172
|
+
]
|
173
|
+
TMChoiceFields.each{ | field2display| @criteria << ["id_swissreg:mainContent:id_ckbTMChoice", field2display] }
|
174
|
+
# id_swissreg:mainContent:id_ckbTMChoice tm_lbl_tm_text
|
175
|
+
puts "Marke ist #{marke}" if marke # Wortlaut der Marke
|
176
|
+
puts "Hinterlegungsdatum ist #{timespan}" if $VERBOSE and timespan
|
177
|
+
puts "nummer ist #{timespan}" if nummer
|
178
|
+
@criteria << ["id_swissreg:mainContent:sub_fieldset:id_submit", "suchen"]
|
179
|
+
@criteria << ["id_swissreg_SUBMIT", "1"]
|
180
|
+
@criteria << ["id_swissreg:_idcl", ""]
|
181
|
+
@criteria << ["id_swissreg:_link_hidden_", ""]
|
182
|
+
@criteria << ["javax.faces.ViewState", @state]
|
183
|
+
|
184
|
+
@path = "/srclient/faces/jsp/trademark/sr3.jsp"
|
185
|
+
response = @agent.post(Base_uri + @path, @criteria)
|
186
|
+
writeResponse('mechanize/resultate_1.html', response.body)
|
187
|
+
checkErrors(response.body)
|
188
|
+
@lastResponse = response
|
189
|
+
end
|
190
|
+
|
191
|
+
def parseAddress(nummer, inhaber)
|
192
|
+
zeile_1, zeile_2, zeile_3, zeile_4, zeile_5 = inhaber.split(LineSplit)
|
193
|
+
ort = nil
|
194
|
+
plz = nil
|
195
|
+
if m = AddressRegexp.match(zeile_2)
|
196
|
+
zeile_2 = nil
|
197
|
+
plz = m[1]; ort = m[2]
|
198
|
+
elsif m = AddressRegexp.match(zeile_3)
|
199
|
+
zeile_3 = nil
|
200
|
+
plz = m[1]; ort = m[2]
|
201
|
+
elsif m = AddressRegexp.match(zeile_4)
|
202
|
+
zeile_4 = nil
|
203
|
+
plz = m[1]; ort = m[2]
|
204
|
+
elsif m = AddressRegexp.match(zeile_5)
|
205
|
+
zeile_5 = nil
|
206
|
+
plz = m[1]; ort = m[2]
|
207
|
+
else
|
208
|
+
puts "Achtung! Konnte Marke #{nummer} mit Inhaber #{inhaber} nicht parsen" if $VERBOSE
|
209
|
+
return nil, nil, nil, nil, nil, nil, nil, nil
|
210
|
+
end
|
211
|
+
return zeile_1, zeile_2, zeile_3, zeile_4, zeile_5, plz, ort
|
212
|
+
end
|
213
|
+
|
214
|
+
def fetchDetails(nummer) # takes a long time!
|
215
|
+
@counterDetails += 1
|
216
|
+
filename = "mechanize/detail_#{nummer}.html"
|
217
|
+
if File.exists?(filename)
|
218
|
+
doc = Nokogiri::Slop(File.open(filename))
|
219
|
+
else
|
220
|
+
url = "https://www.swissreg.ch/srclient/faces/jsp/trademark/sr300.jsp?language=de§ion=tm&id=#{nummer}"
|
221
|
+
pp "Opening #{url}" if $VERBOSE
|
222
|
+
content = @agent.get_file url
|
223
|
+
writeResponse("mechanize/detail_#{nummer}.html", content)
|
224
|
+
doc = Nokogiri::Slop(content)
|
225
|
+
end
|
226
|
+
puts "Bitte um Geduld. Hole Adressdetails für Marke #{nummer}. (#{@counterDetails} von #{@errors.size})"
|
227
|
+
path_name = "//html/body/form/div/div/fieldset/div/table/tbody/tr/td"
|
228
|
+
counter = 0
|
229
|
+
doc.xpath(path_name).each{
|
230
|
+
|td|
|
231
|
+
pp "#{counter}: #{td.text}" if $VERBOSE
|
232
|
+
counter += 1
|
233
|
+
next unless /^inhaber/i.match(td.text)
|
234
|
+
zeilen = []
|
235
|
+
doc.xpath(path_name)[counter].children.each{ |child| zeilen << child.text.gsub(LineSplit,'. ') unless child.text.length == 0 } # avoid adding <br>
|
236
|
+
if info = @errors[nummer]
|
237
|
+
info.inhaber = zeilen.join(LineSplit)
|
238
|
+
info.zeile_1, info.zeile_2, info.zeile_3, info.zeile_4, zeile_5, info.plz, info.ort = parseAddress(nummer, info.inhaber)
|
239
|
+
@results << info
|
240
|
+
else
|
241
|
+
bezeichnung = doc.xpath(path_name)[15]
|
242
|
+
inhaber = zeilen.join(LineSplit)
|
243
|
+
zeile_1, zeile_2, zeile_3, zeile_4, zeile_5, plz, ort = parseAddress(nummer, inhaber)
|
244
|
+
hinterlegungsdatum = doc.xpath(path_name)[7]
|
245
|
+
marke = Marke.new(bezeichnung, nummer, inhaber, DefaultCountry, hinterlegungsdatum, zeile_1, zeile_2, zeile_3, zeile_4, zeile_5, plz, ort )
|
246
|
+
@results << marke
|
247
|
+
end
|
248
|
+
}
|
249
|
+
end
|
250
|
+
|
251
|
+
def fetchresult(filename = nil, counter = 1)
|
252
|
+
if filename
|
253
|
+
doc = Nokogiri::Slop(File.open(filename))
|
254
|
+
else
|
255
|
+
doc = Nokogiri::Slop(@lastResponse.body)
|
256
|
+
end
|
257
|
+
nrFailures = 0
|
258
|
+
counter += 1
|
259
|
+
puts "fetchresult. Counter #{counter} already #{@results.size} Datensätze für die Zeitspanne '#{@timespan}'"
|
260
|
+
path_name = "//html/body/form/div/div/fieldset/table/tbody/tr/td/table/tr/td"
|
261
|
+
hasNext = false
|
262
|
+
doc.xpath(path_name).each{
|
263
|
+
|elem|
|
264
|
+
if /scroll_1idx#{counter}/.match(elem.to_s)
|
265
|
+
hasNext = true
|
266
|
+
break
|
267
|
+
end
|
268
|
+
}
|
269
|
+
path_name = "//html/body/form/div/div/fieldset/table/tbody/tr/td/table/tbody/tr"
|
270
|
+
doc.xpath(path_name).each{
|
271
|
+
|elem|
|
272
|
+
bezeichnung = elem.elements[1].text
|
273
|
+
land = elem.elements[4].text
|
274
|
+
next unless /#{DefaultCountry}/i.match(land)
|
275
|
+
inhaber = elem.elements[3].text
|
276
|
+
nummer = elem.elements[2].text
|
277
|
+
if bezeichnung.length == 0
|
278
|
+
bezeichnung = elem.children[1].children[0].children[0].children[0].attribute('src').to_s
|
279
|
+
end
|
280
|
+
zeile_1, zeile_2, zeile_3, zeile_4, zeile_5, plz, ort = parseAddress(nummer, inhaber)
|
281
|
+
if zeile_1
|
282
|
+
@results << Marke.new(bezeichnung, elem.elements[2].text, elem.elements[3].text, land, elem.elements[5].text,
|
283
|
+
zeile_1, zeile_2, zeile_3, zeile_4, zeile_5, plz, ort )
|
284
|
+
else
|
285
|
+
nrFailures += 1
|
286
|
+
@errors[nummer] = Marke.new(bezeichnung, elem.elements[2].text, elem.elements[3].text, land, elem.elements[5].text,
|
287
|
+
zeile_1, zeile_2, zeile_3, zeile_4, zeile_5, plz, ort )
|
288
|
+
end
|
289
|
+
} if doc.xpath(path_name)
|
290
|
+
if hasNext
|
291
|
+
@path = "/srclient/faces/jsp/trademark/sr30.jsp"
|
292
|
+
puts "Calling sub #{counter} with #{@path}" if $VERBOSE
|
293
|
+
data = [
|
294
|
+
["autoScroll", "0,0"],
|
295
|
+
["id_swissreg:mainContent:id_sub_options_result:sub_fieldset:id_cbxHitsPerPage", @hitsPerPage],
|
296
|
+
# ["id_swissreg:mainContent:vivian", "TRADEMARK REGISTER SEARCH TIMES: QUERY=[20] SELECT=[823] SERVER=[846] DELEGATE=[861] (HITS=[96])"],
|
297
|
+
["id_swissreg_SUBMIT", "1"],
|
298
|
+
["id_swissreg:_idcl", "id_swissreg:mainContent:scroll_1idx#{counter}"],
|
299
|
+
["id_swissreg:mainContent:scroll_1", "idx#{counter}"],
|
300
|
+
["tmMainId", ""],
|
301
|
+
["id_swissreg:_link_hidden_ "],
|
302
|
+
["javax.faces.ViewState", @state],
|
303
|
+
]
|
304
|
+
TMChoiceFields.each{ | field2display| data << ["id_swissreg:mainContent:id_sub_options_result:id_ckbTMChoice", field2display] }
|
305
|
+
response = @agent.post(Base_uri + @path, data)
|
306
|
+
writeResponse("mechanize/resultate_#{counter}.html", response.body)
|
307
|
+
checkErrors(response.body)
|
308
|
+
@lastResponse = response
|
309
|
+
fetchresult(nil, counter)
|
310
|
+
else
|
311
|
+
puts "Es gab #{nrFailures} Fehler beim Lesen von #{filename}" if $VERBOSE
|
312
|
+
puts "Fand #{@results.size} Datensätze für die Zeitspanne '#{@timespan}'. Von #{@errors.size} muss die Adresse noch geholt werden."
|
313
|
+
end
|
314
|
+
end
|
315
|
+
|
316
|
+
def emitCsv(filename='ausgabe.csv')
|
317
|
+
return if @results.size == 0
|
318
|
+
CSV.open(filename, 'w', {:headers=>@results[0].members,
|
319
|
+
:write_headers => true}) do |csv|
|
320
|
+
@results.each{ |x| csv << x }
|
321
|
+
end
|
322
|
+
puts "Speicherte #{@results.size} gefunden Datensätze für die Zeitspanne '#{@timespan}' in #{filename}"
|
323
|
+
end
|
324
|
+
|
325
|
+
def fetchMissingDetails
|
326
|
+
@errors.each{
|
327
|
+
|markennummer, info|
|
328
|
+
fetchDetails(markennummer)
|
329
|
+
}
|
330
|
+
end
|
331
|
+
end # class Swissreg
|
332
|
+
|
333
|
+
def Brand2csv::run(timespan)
|
334
|
+
session = Swissreg.new(timespan)
|
335
|
+
session.parse_swissreg
|
336
|
+
session.fetchresult
|
337
|
+
session.fetchMissingDetails
|
338
|
+
session.emitCsv
|
339
|
+
end
|
340
|
+
|
341
|
+
end # module Brand2csv
|
@@ -0,0 +1,56 @@
|
|
1
|
+
h3. started brand2csv (12 May 2013
|
2
|
+
|
3
|
+
* Added minimal files to create a Ruby gem
|
4
|
+
* Started a spike.rb to fetch some elements from swissreg.ch via mechanize
|
5
|
+
* To get familiar with mechanize used the google example
|
6
|
+
Had to replace @page.form_with(:name => 'f')@ by @page.form_with(:name => 'gbqf')@
|
7
|
+
|
8
|
+
* www.swissreg.ch must be opened with agent.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
9
|
+
|
10
|
+
* Examples of a link to details for a brand record are
|
11
|
+
bc. https://www.swissreg.ch/srclient/de/tm/61082/2011
|
12
|
+
https://www.swissreg.ch/srclient/de/tm/61082/2011
|
13
|
+
https://www.swissreg.ch/srclient/faces/jsp/trademark/sr300.jsp?language=en§ion=tm&id=61082/2011
|
14
|
+
|
15
|
+
* Links
|
16
|
+
** Marken Suchen https://www.swissreg.ch/srclient/faces/jsp/start.jsp
|
17
|
+
** Erweitertete Suchen https://www.swissreg.ch/srclient/faces/jsp/trademark/sr1.jsp
|
18
|
+
** Resultate der Detailsuche unter https://www.swissreg.ch/srclient/faces/jsp/trademark/sr3.jsp
|
19
|
+
|
20
|
+
Wasted some time to discover that swissreg.rb does not use mechanize, but URI and hpricot to fetch the patent registration.
|
21
|
+
|
22
|
+
With watir the following few lines sufficed to fetch a detail
|
23
|
+
|
24
|
+
bc. Swiss_reg_URL = 'https://www.swissreg.ch'
|
25
|
+
client = Selenium::WebDriver::Remote::Http::Default.new
|
26
|
+
browser = Watir::Browser.new :firefox
|
27
|
+
browser.goto Swiss_reg_URL
|
28
|
+
browser.link(:id, "id_swissreg_sub_nav_ipiNavigation_item0").click
|
29
|
+
browser.link(:id, "id_swissreg_sub_nav_ipiNavigation_item0_item3").click
|
30
|
+
browser.text_field(:id, "id_swissreg:mainContent:id_txf_appDate").set("1.10.2011-5.10.2011")
|
31
|
+
browser.button(:value,"suchen").click
|
32
|
+
browser.link(:id, "id_swissreg:mainContent:data:2:tm_no_detail:id_detail").click# puts browser.text
|
33
|
+
|
34
|
+
Was not able to create a spike using either mechnize or uri/hpricot to fetch the details.
|
35
|
+
|
36
|
+
* Thoughts about the CLI interface to csv
|
37
|
+
|
38
|
+
bc. brand2csh --help
|
39
|
+
Useage brand2csh 1.10.2011-5.10.2011 [name_of_brand]
|
40
|
+
Fetches brand records from swissreg for the given date range into results.csv.
|
41
|
+
Each result contains the following fields
|
42
|
+
- date of registration
|
43
|
+
- brandname
|
44
|
+
- owner of brand
|
45
|
+
-- name
|
46
|
+
-- addressline1
|
47
|
+
-- addressline2 (optional)
|
48
|
+
-- zip code
|
49
|
+
-- city
|
50
|
+
Only owners inside Switzerland will be returned.
|
51
|
+
|
52
|
+
* Would this be a good extension?
|
53
|
+
Accumulate all given results (+ temporary result like info_line_1..x) into a sqlite database.
|
54
|
+
Would allow an easy sql manipulation of data for filtering/sorting addresses, etc.
|
55
|
+
|
56
|
+
|