brand2csv 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +3 -0
- data/.rspec +1 -0
- data/Gemfile +19 -0
- data/Gemfile.lock +73 -0
- data/History.txt +12 -0
- data/LICENCE.txt +515 -0
- data/LICENSE +675 -0
- data/Manifest.txt +19 -0
- data/README.md +6 -0
- data/Rakefile +23 -0
- data/lib/brand2csv.rb +341 -0
- data/lib/brand2csv/version.rb +3 -0
- data/protocol.2013.05.12.textile +56 -0
- data/protocol.2013.05.15.textile +49 -0
- data/protocol.2013.05.21.textile +24 -0
- data/resultat_1.html +697 -0
- data/spike.rb +491 -0
- data/spike_mechanize_swissreg.rb +312 -0
- data/spike_watir.rb +58 -0
- metadata +151 -0
data/Manifest.txt
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
.gitignore
|
2
|
+
.rspec
|
3
|
+
Gemfile
|
4
|
+
Gemfile.lock
|
5
|
+
History.txt
|
6
|
+
LICENCE.txt
|
7
|
+
LICENSE
|
8
|
+
Manifest.txt
|
9
|
+
README.md
|
10
|
+
Rakefile
|
11
|
+
lib/brand2csv.rb
|
12
|
+
lib/brand2csv/version.rb
|
13
|
+
protocol.2013.05.12.textile
|
14
|
+
protocol.2013.05.15.textile
|
15
|
+
protocol.2013.05.21.textile
|
16
|
+
resultat_1.html
|
17
|
+
spike.rb
|
18
|
+
spike_mechanize_swissreg.rb
|
19
|
+
spike_watir.rb
|
data/README.md
ADDED
data/Rakefile
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# encoding: utf-8
|
3
|
+
|
4
|
+
require 'rubygems'
|
5
|
+
require 'hoe'
|
6
|
+
|
7
|
+
Hoe.spec "brand2csv" do
|
8
|
+
self.author = "Niklaus Giger, Zeno R.R. Davatz" # gem.authors
|
9
|
+
self.email = "yasaka@ywesee.com, zdavatz@ywesee.com"
|
10
|
+
self.description = "brand2csv creates csv files for swiss brand registered in a specific time period.
|
11
|
+
The csv contains the brand, link to image (if present), link to the detailinfo at swissreg.ch, name and address of owner (Inhaber)"
|
12
|
+
self.summary = "brand2csv creates csv files for swiss brands."
|
13
|
+
self.urls = ["https://github.com/zdavatz/brand2csv"] # gem.homepage
|
14
|
+
|
15
|
+
# gem.add_runtime_dependency
|
16
|
+
self.extra_deps << ['mechanize', '>= 2.6']
|
17
|
+
|
18
|
+
# gem.add_development_dependency
|
19
|
+
self.extra_dev_deps << ['rspec']
|
20
|
+
self.extra_dev_deps << ['webmock']
|
21
|
+
self.extra_dev_deps << ['hoe', '>= 3.4']
|
22
|
+
self.extra_dev_deps << ['rdoc']
|
23
|
+
end
|
data/lib/brand2csv.rb
ADDED
@@ -0,0 +1,341 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# encoding: utf-8
|
3
|
+
|
4
|
+
require "brand2csv/version"
|
5
|
+
require 'mechanize'
|
6
|
+
require 'prettyprint'
|
7
|
+
require 'optparse'
|
8
|
+
require 'csv'
|
9
|
+
|
10
|
+
module Brand2csv
|
11
|
+
|
12
|
+
class Marke < Struct.new(:name, :markennummer, :inhaber, :land, :hinterlegungsdatum, :zeile_1, :zeile_2, :zeile_3, :zeile_4, :zeile_5, :plz, :ort)
|
13
|
+
end
|
14
|
+
|
15
|
+
class Swissreg
|
16
|
+
|
17
|
+
# Weitere gesehene Fehler
|
18
|
+
BekannteFehler =
|
19
|
+
['Das Datum ist ung', # ültig'
|
20
|
+
'Erweiterte Suche',
|
21
|
+
'Vereinfachte Trefferliste anzeigen',
|
22
|
+
'Es wurden keine Daten gefunden.',
|
23
|
+
'Die Suchkriterien sind teilweise unzul', # ässig',
|
24
|
+
'Geben Sie mindestens ein Suchkriterium ein',
|
25
|
+
'Die Suche wurde abgebrochen, da die maximale Suchzeit von 60 Sekunden',
|
26
|
+
]
|
27
|
+
Base_uri = 'https://www.swissreg.ch'
|
28
|
+
Start_uri = "#{Base_uri}/srclient/faces/jsp/start.jsp"
|
29
|
+
AddressRegexp = /^(\d\d\d\d)\W*(.*)/
|
30
|
+
LineSplit = ', '
|
31
|
+
DefaultCountry = 'Schweiz'
|
32
|
+
# Angezeigte Spalten "id_swissreg:mainContent:id_ckbTMChoice"
|
33
|
+
TMChoiceFields = [
|
34
|
+
"tm_lbl_tm_text", # Marke
|
35
|
+
# "tm_lbl_state"], # Status
|
36
|
+
# "tm_lbl_nizza_class"], # Nizza Klassifikation Nr.
|
37
|
+
# "tm_lbl_no"], # disabled="disabled"], # Nummer
|
38
|
+
"tm_lbl_applicant", # Inhaber/in
|
39
|
+
"tm_lbl_country", # Land (Inhaber/in)
|
40
|
+
# "tm_lbl_agent", # Vertreter/in
|
41
|
+
# "tm_lbl_licensee"], # Lizenznehmer/in
|
42
|
+
"tm_lbl_app_date", # Hinterlegungsdatum
|
43
|
+
]
|
44
|
+
|
45
|
+
attr_accessor :marke
|
46
|
+
|
47
|
+
def initialize(timespan)
|
48
|
+
@timespan = timespan
|
49
|
+
@agent = Mechanize.new { |agent|
|
50
|
+
# agent.user_agent_alias = 'Mac Safari'
|
51
|
+
agent.user_agent = 'Mozilla/5.0 (X11; Linux x86_64; rv:16.0) Gecko/20100101 Firefox/16.0'
|
52
|
+
# agent.redirection_limit = 5
|
53
|
+
agent.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
54
|
+
}
|
55
|
+
@results = []
|
56
|
+
@errors = Hash.new
|
57
|
+
@lastResponse = nil
|
58
|
+
@lastDetail =nil
|
59
|
+
@counterDetails = 0
|
60
|
+
@marke = 'zzzyyzzzzyzzyz*' # => Fehlermeldung: Es wurden keine Daten gefunden
|
61
|
+
# asp* => 138 records werden geholt
|
62
|
+
# a* => Es wurden 25,490 Treffer gefunden. Davon werden 10000 zufällig ausgewählte Schutztitel angezeigt. Bitte schränken Sie Ihre Suche weiter ein.
|
63
|
+
# Ab 501 Treffer wird eine vereinfachte Trefferliste angezeigt.
|
64
|
+
# asp* => 138 records werden geholt
|
65
|
+
|
66
|
+
@marke = nil # => Fehlermeldung: Geben Sie mindestens ein Suchkriterium ein
|
67
|
+
@marke = 'asp*'
|
68
|
+
@number = '500000'
|
69
|
+
@number = nil
|
70
|
+
# @marke = "*WEIH*"
|
71
|
+
@hitsPerPage = 100
|
72
|
+
end
|
73
|
+
|
74
|
+
def writeResponse(filename, body)
|
75
|
+
if defined?(RSpec)
|
76
|
+
ausgabe = File.open(filename, 'w+')
|
77
|
+
ausgabe.puts body
|
78
|
+
ausgabe.close
|
79
|
+
else
|
80
|
+
puts "Skipping writing #{filename}" if $VERBOSE
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
def view_state(response)
|
85
|
+
if match = /javax.faces.ViewState.*?value="([^"]+)"/u.match(response.force_encoding('utf-8'))
|
86
|
+
match[1]
|
87
|
+
else
|
88
|
+
""
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
def checkErrors(body)
|
93
|
+
BekannteFehler.each {
|
94
|
+
|errMsg|
|
95
|
+
if body.to_s.index(errMsg)
|
96
|
+
puts "Tut mir leid. Suche wurde mit Fehlermeldung <#{errMsg}> abgebrochen."
|
97
|
+
exit 2
|
98
|
+
end
|
99
|
+
}
|
100
|
+
end
|
101
|
+
|
102
|
+
def parse_swissreg(timespan = @timespan, # sollte 377 Treffer ergeben, für 01.06.2007-10.06.2007, 559271 wurde in diesem Zeitraum registriert
|
103
|
+
marke = @marke,
|
104
|
+
nummer =@number) # nummer = "559271" ergibt genau einen treffer
|
105
|
+
@agent.get Start_uri # get a cookie for the session
|
106
|
+
content = @agent.get_file Start_uri
|
107
|
+
FileUtils.makedirs 'mechanize'
|
108
|
+
writeResponse('mechanize/main.html', content)
|
109
|
+
@state = view_state(content)
|
110
|
+
data = [
|
111
|
+
["autoScroll", "0,0"],
|
112
|
+
["id_swissreg:_link_hidden_", ""],
|
113
|
+
["id_swissreg_SUBMIT", "1"],
|
114
|
+
["id_swissreg:_idcl", "id_swissreg_sub_nav_ipiNavigation_item0"],
|
115
|
+
["javax.faces.ViewState", @state],
|
116
|
+
]
|
117
|
+
|
118
|
+
content = @agent.post(Start_uri, data)
|
119
|
+
writeResponse('mechanize/einfache_suche.html', content.body)
|
120
|
+
|
121
|
+
data = [
|
122
|
+
["autoScroll", "0,0"],
|
123
|
+
["id_swissreg:_link_hidden_", ""],
|
124
|
+
["id_swissreg_SUBMIT", "1"],
|
125
|
+
["id_swissreg:_idcl", "id_swissreg_sub_nav_ipiNavigation_item0_item3"],
|
126
|
+
["javax.faces.ViewState", @state],
|
127
|
+
]
|
128
|
+
# sr1 ist die einfache suche, sr3 die erweiterte Suche
|
129
|
+
@path = "/srclient/faces/jsp/trademark/sr3.jsp"
|
130
|
+
response = @agent.post(Base_uri + @path, data)
|
131
|
+
writeResponse('mechanize/erweiterte_suche.html', response.body)
|
132
|
+
# Bis hier alles okay
|
133
|
+
@criteria = [
|
134
|
+
["autoScroll", "0,829"],
|
135
|
+
["id_swissreg:_link_hidden_", ""],
|
136
|
+
["id_swissreg:mainContent:id_ckbTMState", "1"], # "Hängige Gesuche 1
|
137
|
+
# ["id_swissreg:mainContent:id_ckbTMState", "2"], # "Gelöschte Gesuche 2
|
138
|
+
["id_swissreg:mainContent:id_ckbTMState", "3"], # aktive Marken 3
|
139
|
+
# ["id_swissreg:mainContent:id_ckbTMState", "4"], # gelöschte Marken 4
|
140
|
+
["id_swissreg:mainContent:id_cbxCountry", "_ALL"], # Auswahl Länder _ALL
|
141
|
+
# ["id_swissreg:mainContent:id_txf_tm_no", ""], # Marken Nr
|
142
|
+
["id_swissreg:mainContent:id_txf_tm_no", nummer],# Marken Nr
|
143
|
+
["id_swissreg:mainContent:id_txf_app_no", ""], # Gesuch Nr.
|
144
|
+
["id_swissreg:mainContent:id_txf_tm_text", marke],
|
145
|
+
["id_swissreg:mainContent:id_txf_applicant", ""], # Inhaber/in
|
146
|
+
["id_swissreg:mainContent:id_txf_agent", ""], # Vertreter/in
|
147
|
+
["id_swissreg:mainContent:id_txf_licensee", ""], # Lizenznehmer
|
148
|
+
["id_swissreg:mainContent:id_txf_nizza_class", ""], # Nizza Klassifikation Nr.
|
149
|
+
# ["id_swissreg:mainContent:id_txf_appDate", timespan], # Hinterlegungsdatum
|
150
|
+
["id_swissreg:mainContent:id_txf_appDate", timespan] ,
|
151
|
+
["id_swissreg:mainContent:id_txf_expiryDate", ""], # Ablauf Schutzfrist
|
152
|
+
# Markenart: Individualmarke 1 Kollektivmarke 2 Garantiemarke 3
|
153
|
+
["id_swissreg:mainContent:id_cbxTMTypeGrp", "_ALL"], # Markenart
|
154
|
+
["id_swissreg:mainContent:id_cbxTMForm", "_ALL"], # Markentyp
|
155
|
+
["id_swissreg:mainContent:id_cbxTMColorClaim", "_ALL"], # Farbanspruch
|
156
|
+
["id_swissreg:mainContent:id_txf_pub_date", ""], # Publikationsdatum
|
157
|
+
|
158
|
+
# info zu Publikationsgrund id_swissreg:mainContent:id_ckbTMPubReason
|
159
|
+
["id_swissreg:mainContent:id_ckbTMPubReason", "1"], #Neueintragungen
|
160
|
+
["id_swissreg:mainContent:id_ckbTMPubReason", "2"], #Berichtigungen
|
161
|
+
["id_swissreg:mainContent:id_ckbTMPubReason", "3"], #Verlängerungen
|
162
|
+
["id_swissreg:mainContent:id_ckbTMPubReason", "4"], #Löschungen
|
163
|
+
["id_swissreg:mainContent:id_ckbTMPubReason", "5"], #Inhaberänderungen
|
164
|
+
["id_swissreg:mainContent:id_ckbTMPubReason", "6"], #Vertreteränderungen
|
165
|
+
["id_swissreg:mainContent:id_ckbTMPubReason", "7"], #Lizenzänderungen
|
166
|
+
["id_swissreg:mainContent:id_ckbTMPubReason", "8"], #Weitere Registeränderungen
|
167
|
+
# ["id_swissreg:mainContent:id_ckbTMEmptyHits", "0"], # Leere Trefferliste anzeigen
|
168
|
+
|
169
|
+
# "id_swissreg:mainContent:id_cbxFormatChoice" 2 = Publikationsansicht 1 = Registeransicht
|
170
|
+
["id_swissreg:mainContent:id_cbxFormatChoice", "1"],
|
171
|
+
["id_swissreg:mainContent:id_cbxHitsPerPage", @hitsPerPage], # Treffer pro Seite
|
172
|
+
]
|
173
|
+
TMChoiceFields.each{ | field2display| @criteria << ["id_swissreg:mainContent:id_ckbTMChoice", field2display] }
|
174
|
+
# id_swissreg:mainContent:id_ckbTMChoice tm_lbl_tm_text
|
175
|
+
puts "Marke ist #{marke}" if marke # Wortlaut der Marke
|
176
|
+
puts "Hinterlegungsdatum ist #{timespan}" if $VERBOSE and timespan
|
177
|
+
puts "nummer ist #{timespan}" if nummer
|
178
|
+
@criteria << ["id_swissreg:mainContent:sub_fieldset:id_submit", "suchen"]
|
179
|
+
@criteria << ["id_swissreg_SUBMIT", "1"]
|
180
|
+
@criteria << ["id_swissreg:_idcl", ""]
|
181
|
+
@criteria << ["id_swissreg:_link_hidden_", ""]
|
182
|
+
@criteria << ["javax.faces.ViewState", @state]
|
183
|
+
|
184
|
+
@path = "/srclient/faces/jsp/trademark/sr3.jsp"
|
185
|
+
response = @agent.post(Base_uri + @path, @criteria)
|
186
|
+
writeResponse('mechanize/resultate_1.html', response.body)
|
187
|
+
checkErrors(response.body)
|
188
|
+
@lastResponse = response
|
189
|
+
end
|
190
|
+
|
191
|
+
def parseAddress(nummer, inhaber)
|
192
|
+
zeile_1, zeile_2, zeile_3, zeile_4, zeile_5 = inhaber.split(LineSplit)
|
193
|
+
ort = nil
|
194
|
+
plz = nil
|
195
|
+
if m = AddressRegexp.match(zeile_2)
|
196
|
+
zeile_2 = nil
|
197
|
+
plz = m[1]; ort = m[2]
|
198
|
+
elsif m = AddressRegexp.match(zeile_3)
|
199
|
+
zeile_3 = nil
|
200
|
+
plz = m[1]; ort = m[2]
|
201
|
+
elsif m = AddressRegexp.match(zeile_4)
|
202
|
+
zeile_4 = nil
|
203
|
+
plz = m[1]; ort = m[2]
|
204
|
+
elsif m = AddressRegexp.match(zeile_5)
|
205
|
+
zeile_5 = nil
|
206
|
+
plz = m[1]; ort = m[2]
|
207
|
+
else
|
208
|
+
puts "Achtung! Konnte Marke #{nummer} mit Inhaber #{inhaber} nicht parsen" if $VERBOSE
|
209
|
+
return nil, nil, nil, nil, nil, nil, nil, nil
|
210
|
+
end
|
211
|
+
return zeile_1, zeile_2, zeile_3, zeile_4, zeile_5, plz, ort
|
212
|
+
end
|
213
|
+
|
214
|
+
def fetchDetails(nummer) # takes a long time!
|
215
|
+
@counterDetails += 1
|
216
|
+
filename = "mechanize/detail_#{nummer}.html"
|
217
|
+
if File.exists?(filename)
|
218
|
+
doc = Nokogiri::Slop(File.open(filename))
|
219
|
+
else
|
220
|
+
url = "https://www.swissreg.ch/srclient/faces/jsp/trademark/sr300.jsp?language=de§ion=tm&id=#{nummer}"
|
221
|
+
pp "Opening #{url}" if $VERBOSE
|
222
|
+
content = @agent.get_file url
|
223
|
+
writeResponse("mechanize/detail_#{nummer}.html", content)
|
224
|
+
doc = Nokogiri::Slop(content)
|
225
|
+
end
|
226
|
+
puts "Bitte um Geduld. Hole Adressdetails für Marke #{nummer}. (#{@counterDetails} von #{@errors.size})"
|
227
|
+
path_name = "//html/body/form/div/div/fieldset/div/table/tbody/tr/td"
|
228
|
+
counter = 0
|
229
|
+
doc.xpath(path_name).each{
|
230
|
+
|td|
|
231
|
+
pp "#{counter}: #{td.text}" if $VERBOSE
|
232
|
+
counter += 1
|
233
|
+
next unless /^inhaber/i.match(td.text)
|
234
|
+
zeilen = []
|
235
|
+
doc.xpath(path_name)[counter].children.each{ |child| zeilen << child.text.gsub(LineSplit,'. ') unless child.text.length == 0 } # avoid adding <br>
|
236
|
+
if info = @errors[nummer]
|
237
|
+
info.inhaber = zeilen.join(LineSplit)
|
238
|
+
info.zeile_1, info.zeile_2, info.zeile_3, info.zeile_4, zeile_5, info.plz, info.ort = parseAddress(nummer, info.inhaber)
|
239
|
+
@results << info
|
240
|
+
else
|
241
|
+
bezeichnung = doc.xpath(path_name)[15]
|
242
|
+
inhaber = zeilen.join(LineSplit)
|
243
|
+
zeile_1, zeile_2, zeile_3, zeile_4, zeile_5, plz, ort = parseAddress(nummer, inhaber)
|
244
|
+
hinterlegungsdatum = doc.xpath(path_name)[7]
|
245
|
+
marke = Marke.new(bezeichnung, nummer, inhaber, DefaultCountry, hinterlegungsdatum, zeile_1, zeile_2, zeile_3, zeile_4, zeile_5, plz, ort )
|
246
|
+
@results << marke
|
247
|
+
end
|
248
|
+
}
|
249
|
+
end
|
250
|
+
|
251
|
+
def fetchresult(filename = nil, counter = 1)
|
252
|
+
if filename
|
253
|
+
doc = Nokogiri::Slop(File.open(filename))
|
254
|
+
else
|
255
|
+
doc = Nokogiri::Slop(@lastResponse.body)
|
256
|
+
end
|
257
|
+
nrFailures = 0
|
258
|
+
counter += 1
|
259
|
+
puts "fetchresult. Counter #{counter} already #{@results.size} Datensätze für die Zeitspanne '#{@timespan}'"
|
260
|
+
path_name = "//html/body/form/div/div/fieldset/table/tbody/tr/td/table/tr/td"
|
261
|
+
hasNext = false
|
262
|
+
doc.xpath(path_name).each{
|
263
|
+
|elem|
|
264
|
+
if /scroll_1idx#{counter}/.match(elem.to_s)
|
265
|
+
hasNext = true
|
266
|
+
break
|
267
|
+
end
|
268
|
+
}
|
269
|
+
path_name = "//html/body/form/div/div/fieldset/table/tbody/tr/td/table/tbody/tr"
|
270
|
+
doc.xpath(path_name).each{
|
271
|
+
|elem|
|
272
|
+
bezeichnung = elem.elements[1].text
|
273
|
+
land = elem.elements[4].text
|
274
|
+
next unless /#{DefaultCountry}/i.match(land)
|
275
|
+
inhaber = elem.elements[3].text
|
276
|
+
nummer = elem.elements[2].text
|
277
|
+
if bezeichnung.length == 0
|
278
|
+
bezeichnung = elem.children[1].children[0].children[0].children[0].attribute('src').to_s
|
279
|
+
end
|
280
|
+
zeile_1, zeile_2, zeile_3, zeile_4, zeile_5, plz, ort = parseAddress(nummer, inhaber)
|
281
|
+
if zeile_1
|
282
|
+
@results << Marke.new(bezeichnung, elem.elements[2].text, elem.elements[3].text, land, elem.elements[5].text,
|
283
|
+
zeile_1, zeile_2, zeile_3, zeile_4, zeile_5, plz, ort )
|
284
|
+
else
|
285
|
+
nrFailures += 1
|
286
|
+
@errors[nummer] = Marke.new(bezeichnung, elem.elements[2].text, elem.elements[3].text, land, elem.elements[5].text,
|
287
|
+
zeile_1, zeile_2, zeile_3, zeile_4, zeile_5, plz, ort )
|
288
|
+
end
|
289
|
+
} if doc.xpath(path_name)
|
290
|
+
if hasNext
|
291
|
+
@path = "/srclient/faces/jsp/trademark/sr30.jsp"
|
292
|
+
puts "Calling sub #{counter} with #{@path}" if $VERBOSE
|
293
|
+
data = [
|
294
|
+
["autoScroll", "0,0"],
|
295
|
+
["id_swissreg:mainContent:id_sub_options_result:sub_fieldset:id_cbxHitsPerPage", @hitsPerPage],
|
296
|
+
# ["id_swissreg:mainContent:vivian", "TRADEMARK REGISTER SEARCH TIMES: QUERY=[20] SELECT=[823] SERVER=[846] DELEGATE=[861] (HITS=[96])"],
|
297
|
+
["id_swissreg_SUBMIT", "1"],
|
298
|
+
["id_swissreg:_idcl", "id_swissreg:mainContent:scroll_1idx#{counter}"],
|
299
|
+
["id_swissreg:mainContent:scroll_1", "idx#{counter}"],
|
300
|
+
["tmMainId", ""],
|
301
|
+
["id_swissreg:_link_hidden_ "],
|
302
|
+
["javax.faces.ViewState", @state],
|
303
|
+
]
|
304
|
+
TMChoiceFields.each{ | field2display| data << ["id_swissreg:mainContent:id_sub_options_result:id_ckbTMChoice", field2display] }
|
305
|
+
response = @agent.post(Base_uri + @path, data)
|
306
|
+
writeResponse("mechanize/resultate_#{counter}.html", response.body)
|
307
|
+
checkErrors(response.body)
|
308
|
+
@lastResponse = response
|
309
|
+
fetchresult(nil, counter)
|
310
|
+
else
|
311
|
+
puts "Es gab #{nrFailures} Fehler beim Lesen von #{filename}" if $VERBOSE
|
312
|
+
puts "Fand #{@results.size} Datensätze für die Zeitspanne '#{@timespan}'. Von #{@errors.size} muss die Adresse noch geholt werden."
|
313
|
+
end
|
314
|
+
end
|
315
|
+
|
316
|
+
def emitCsv(filename='ausgabe.csv')
|
317
|
+
return if @results.size == 0
|
318
|
+
CSV.open(filename, 'w', {:headers=>@results[0].members,
|
319
|
+
:write_headers => true}) do |csv|
|
320
|
+
@results.each{ |x| csv << x }
|
321
|
+
end
|
322
|
+
puts "Speicherte #{@results.size} gefunden Datensätze für die Zeitspanne '#{@timespan}' in #{filename}"
|
323
|
+
end
|
324
|
+
|
325
|
+
def fetchMissingDetails
|
326
|
+
@errors.each{
|
327
|
+
|markennummer, info|
|
328
|
+
fetchDetails(markennummer)
|
329
|
+
}
|
330
|
+
end
|
331
|
+
end # class Swissreg
|
332
|
+
|
333
|
+
def Brand2csv::run(timespan)
|
334
|
+
session = Swissreg.new(timespan)
|
335
|
+
session.parse_swissreg
|
336
|
+
session.fetchresult
|
337
|
+
session.fetchMissingDetails
|
338
|
+
session.emitCsv
|
339
|
+
end
|
340
|
+
|
341
|
+
end # module Brand2csv
|
@@ -0,0 +1,56 @@
|
|
1
|
+
h3. started brand2csv (12 May 2013
|
2
|
+
|
3
|
+
* Added minimal files to create a Ruby gem
|
4
|
+
* Started a spike.rb to fetch some elements from swissreg.ch via mechanize
|
5
|
+
* To get familiar with mechanize used the google example
|
6
|
+
Had to replace @page.form_with(:name => 'f')@ by @page.form_with(:name => 'gbqf')@
|
7
|
+
|
8
|
+
* www.swissreg.ch must be opened with agent.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
9
|
+
|
10
|
+
* Examples of a link to details for a brand record are
|
11
|
+
bc. https://www.swissreg.ch/srclient/de/tm/61082/2011
|
12
|
+
https://www.swissreg.ch/srclient/de/tm/61082/2011
|
13
|
+
https://www.swissreg.ch/srclient/faces/jsp/trademark/sr300.jsp?language=en§ion=tm&id=61082/2011
|
14
|
+
|
15
|
+
* Links
|
16
|
+
** Marken Suchen https://www.swissreg.ch/srclient/faces/jsp/start.jsp
|
17
|
+
** Erweitertete Suchen https://www.swissreg.ch/srclient/faces/jsp/trademark/sr1.jsp
|
18
|
+
** Resultate der Detailsuche unter https://www.swissreg.ch/srclient/faces/jsp/trademark/sr3.jsp
|
19
|
+
|
20
|
+
Wasted some time to discover that swissreg.rb does not use mechanize, but URI and hpricot to fetch the patent registration.
|
21
|
+
|
22
|
+
With watir the following few lines sufficed to fetch a detail
|
23
|
+
|
24
|
+
bc. Swiss_reg_URL = 'https://www.swissreg.ch'
|
25
|
+
client = Selenium::WebDriver::Remote::Http::Default.new
|
26
|
+
browser = Watir::Browser.new :firefox
|
27
|
+
browser.goto Swiss_reg_URL
|
28
|
+
browser.link(:id, "id_swissreg_sub_nav_ipiNavigation_item0").click
|
29
|
+
browser.link(:id, "id_swissreg_sub_nav_ipiNavigation_item0_item3").click
|
30
|
+
browser.text_field(:id, "id_swissreg:mainContent:id_txf_appDate").set("1.10.2011-5.10.2011")
|
31
|
+
browser.button(:value,"suchen").click
|
32
|
+
browser.link(:id, "id_swissreg:mainContent:data:2:tm_no_detail:id_detail").click# puts browser.text
|
33
|
+
|
34
|
+
Was not able to create a spike using either mechnize or uri/hpricot to fetch the details.
|
35
|
+
|
36
|
+
* Thoughts about the CLI interface to csv
|
37
|
+
|
38
|
+
bc. brand2csh --help
|
39
|
+
Useage brand2csh 1.10.2011-5.10.2011 [name_of_brand]
|
40
|
+
Fetches brand records from swissreg for the given date range into results.csv.
|
41
|
+
Each result contains the following fields
|
42
|
+
- date of registration
|
43
|
+
- brandname
|
44
|
+
- owner of brand
|
45
|
+
-- name
|
46
|
+
-- addressline1
|
47
|
+
-- addressline2 (optional)
|
48
|
+
-- zip code
|
49
|
+
-- city
|
50
|
+
Only owners inside Switzerland will be returned.
|
51
|
+
|
52
|
+
* Would this be a good extension?
|
53
|
+
Accumulate all given results (+ temporary result like info_line_1..x) into a sqlite database.
|
54
|
+
Would allow an easy sql manipulation of data for filtering/sorting addresses, etc.
|
55
|
+
|
56
|
+
|