brand2csv 0.3.0 → 0.3.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- metadata +117 -93
- data/.gemtest +0 -0
- data/.gitignore +0 -10
- data/.rspec +0 -1
- data/.travis.yml +0 -14
- data/Gemfile +0 -14
- data/Gemfile.lock +0 -76
- data/History.txt +0 -111
- data/LICENCE.txt +0 -515
- data/Manifest.txt +0 -54
- data/README.md +0 -27
- data/Rakefile +0 -25
- data/bin/brand2csv +0 -100
- data/lib/brand2csv.rb +0 -590
- data/lib/brand2csv/version.rb +0 -3
- data/logs/aspen_08_08_1986.html +0 -598
- data/logs/post.rohdaten.httpfox +0 -1
- data/logs/post.rohdaten.mechanize +0 -1
- data/logs/protocol_swissreg.log +0 -86
- data/logs/result_01.10.2005.jsp +0 -598
- data/logs/sr1.jsp +0 -449
- data/logs/sr3.jsp +0 -598
- data/logs/start.jsp +0 -350
- data/logs/start2.jsp +0 -434
- data/protocol.2013.05.12.textile +0 -56
- data/protocol.2013.05.15.textile +0 -49
- data/protocol.2013.05.21.textile +0 -84
- data/spec/brand2csv_spec.rb +0 -62
- data/spec/csv_spec.rb +0 -27
- data/spec/data/aspectra/detail_00001_P-480296.html +0 -531
- data/spec/data/aspectra/detail_00002_P-482236.html +0 -531
- data/spec/data/aspectra/detail_00003_641074.html +0 -539
- data/spec/data/aspectra/first_results.html +0 -600
- data/spec/data/einfache_suche.html +0 -434
- data/spec/data/erweiterte_suche.html +0 -446
- data/spec/data/main.html +0 -350
- data/spec/data/result_short.html +0 -606
- data/spec/data/resultate_1.html +0 -446
- data/spec/data/resultate_2.html +0 -446
- data/spec/data/urner_wildheu/detail_00001_57862.2013.html +0 -516
- data/spec/data/urner_wildheu/first_results.html +0 -598
- data/spec/data/vereinfachte_1.html +0 -847
- data/spec/data/vereinfachte_detail_33.html +0 -516
- data/spec/detail_spec.rb +0 -28
- data/spec/short_spec.rb +0 -55
- data/spec/simple_search.rb +0 -43
- data/spec/spec_helper.rb +0 -34
- data/spec/support/core_ext/kernel.rb +0 -26
- data/spec/support/server_mock_helper.rb +0 -142
- data/spec/swissreg_spec.rb +0 -44
- data/spec/trademark_numbers_spec.rb +0 -21
- data/spec/utilities_spec.rb +0 -83
- data/spike.rb +0 -491
- data/spike_mechanize_swissreg.rb +0 -312
- data/spike_watir.rb +0 -58
- data/swissreg.rb +0 -75
data/Manifest.txt
DELETED
@@ -1,54 +0,0 @@
|
|
1
|
-
.gitignore
|
2
|
-
.rspec
|
3
|
-
.travis.yml
|
4
|
-
Gemfile
|
5
|
-
Gemfile.lock
|
6
|
-
History.txt
|
7
|
-
LICENCE.txt
|
8
|
-
Manifest.txt
|
9
|
-
README.md
|
10
|
-
Rakefile
|
11
|
-
bin/brand2csv
|
12
|
-
lib/brand2csv.rb
|
13
|
-
lib/brand2csv/version.rb
|
14
|
-
logs/aspen_08_08_1986.html
|
15
|
-
logs/post.rohdaten.httpfox
|
16
|
-
logs/post.rohdaten.mechanize
|
17
|
-
logs/protocol_swissreg.log
|
18
|
-
logs/result_01.10.2005.jsp
|
19
|
-
logs/sr1.jsp
|
20
|
-
logs/sr3.jsp
|
21
|
-
logs/start.jsp
|
22
|
-
logs/start2.jsp
|
23
|
-
protocol.2013.05.12.textile
|
24
|
-
protocol.2013.05.15.textile
|
25
|
-
protocol.2013.05.21.textile
|
26
|
-
spec/brand2csv_spec.rb
|
27
|
-
spec/csv_spec.rb
|
28
|
-
spec/data/aspectra/detail_00001_P-480296.html
|
29
|
-
spec/data/aspectra/detail_00002_P-482236.html
|
30
|
-
spec/data/aspectra/detail_00003_641074.html
|
31
|
-
spec/data/aspectra/first_results.html
|
32
|
-
spec/data/einfache_suche.html
|
33
|
-
spec/data/erweiterte_suche.html
|
34
|
-
spec/data/main.html
|
35
|
-
spec/data/result_short.html
|
36
|
-
spec/data/resultate_1.html
|
37
|
-
spec/data/resultate_2.html
|
38
|
-
spec/data/urner_wildheu/detail_00001_57862.2013.html
|
39
|
-
spec/data/urner_wildheu/first_results.html
|
40
|
-
spec/data/vereinfachte_1.html
|
41
|
-
spec/data/vereinfachte_detail_33.html
|
42
|
-
spec/detail_spec.rb
|
43
|
-
spec/short_spec.rb
|
44
|
-
spec/simple_search.rb
|
45
|
-
spec/spec_helper.rb
|
46
|
-
spec/support/core_ext/kernel.rb
|
47
|
-
spec/support/server_mock_helper.rb
|
48
|
-
spec/swissreg_spec.rb
|
49
|
-
spec/trademark_numbers_spec.rb
|
50
|
-
spec/utilities_spec.rb
|
51
|
-
spike.rb
|
52
|
-
spike_mechanize_swissreg.rb
|
53
|
-
spike_watir.rb
|
54
|
-
swissreg.rb
|
data/README.md
DELETED
@@ -1,27 +0,0 @@
|
|
1
|
-
# brand2csv
|
2
|
-
|
3
|
-
[![Build Status](https://secure.travis-ci.org/zdavatz/brand2csv.png)](http://travis-ci.org/zdavatz/brand2csv)
|
4
|
-
|
5
|
-
brand2csv using swissreg.ch to get addresses.
|
6
|
-
|
7
|
-
## Usage
|
8
|
-
```
|
9
|
-
brand2csv 01.01.2013 "b*"
|
10
|
-
brand2csv 1.10.2005-31.10.2005
|
11
|
-
```
|
12
|
-
## Help
|
13
|
-
```
|
14
|
-
~> brand2csv --help
|
15
|
-
/usr/local/bin/brand2csv ver.0.1.9
|
16
|
-
Usage:
|
17
|
-
brand2csv timespan
|
18
|
-
Find all brands registered in switzerland during the given timespan.
|
19
|
-
The following examples valid timespan periods:
|
20
|
-
brand2csv 01.01.2013 "b*" #will search for all brand starting with "b"
|
21
|
-
brand2csv 1.10.2005-31.10.2005 #this will work as well from version 0.1.9
|
22
|
-
The results are stored in the file <date_selected>.csv.
|
23
|
-
The trademark name is either a real brand name or a link to an image.
|
24
|
-
```
|
25
|
-
## Travis
|
26
|
-
You can find Travis builds here:
|
27
|
-
* https://travis-ci.org/zdavatz/brand2csv
|
data/Rakefile
DELETED
@@ -1,25 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
# encoding: utf-8
|
3
|
-
|
4
|
-
require 'rubygems'
|
5
|
-
require 'hoe'
|
6
|
-
|
7
|
-
Hoe.spec "brand2csv" do
|
8
|
-
self.author = "Niklaus Giger, Yasuhiro Asaka, Zeno R.R. Davatz" # gem.authors
|
9
|
-
self.email = "ngiger@ywesee.com, yasaka@ywesee.com, zdavatz@ywesee.com"
|
10
|
-
self.description = "brand2csv creates csv files for swiss brand registered in a specific time period.
|
11
|
-
The csv contains the brand, link to image (if present), link to the detailinfo at swissreg.ch, name and address of owner (Inhaber)"
|
12
|
-
self.summary = "brand2csv creates csv files for swiss brands."
|
13
|
-
self.urls = ["https://github.com/zdavatz/brand2csv"] # gem.homepage
|
14
|
-
|
15
|
-
license "GPLv3.0"
|
16
|
-
|
17
|
-
# gem.add_runtime_dependency
|
18
|
-
self.extra_deps << ['mechanize', '>= 2.6']
|
19
|
-
|
20
|
-
# gem.add_development_dependency
|
21
|
-
self.extra_dev_deps << ['rspec']
|
22
|
-
self.extra_dev_deps << ['webmock']
|
23
|
-
self.extra_dev_deps << ['hoe', '>= 3.4']
|
24
|
-
self.extra_dev_deps << ['rdoc']
|
25
|
-
end
|
data/bin/brand2csv
DELETED
@@ -1,100 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
require 'pathname'
|
4
|
-
root = Pathname.new(__FILE__).realpath.parent.parent
|
5
|
-
$:.unshift root.join('lib') if $0 == __FILE__
|
6
|
-
|
7
|
-
require 'optparse'
|
8
|
-
require "date"
|
9
|
-
require 'brand2csv'
|
10
|
-
|
11
|
-
def help
|
12
|
-
<<EOS
|
13
|
-
#$0 ver.#{Brand2csv::VERSION}
|
14
|
-
Usage:
|
15
|
-
#{File.basename(__FILE__)} timespan
|
16
|
-
Find all brands registered in switzerland during the given timespan.
|
17
|
-
The following examples valid timespan periods:
|
18
|
-
brand2csv 01.01.2013 "b*" #will search for all brand starting with "b"
|
19
|
-
brand2csv 1.10.2005-31.10.2005 #this will work as well from version 0.1.9
|
20
|
-
The results are stored in the file <date_selected>.csv.
|
21
|
-
The trademark name is either a real brand name or a link to an image.
|
22
|
-
--swiss_only Fetch only trademarks from swiss owner
|
23
|
-
EOS
|
24
|
-
end
|
25
|
-
|
26
|
-
def validates_timespan(arg)
|
27
|
-
valid = true
|
28
|
-
timespan = ""
|
29
|
-
dates = arg.gsub(/[^\d\.-]/, '').split("-")
|
30
|
-
catch (:error) do
|
31
|
-
dates.each_with_index do |d, i|
|
32
|
-
sep = (dates.length > 1 && i != 0) ? "-" : ""
|
33
|
-
begin
|
34
|
-
Date.parse(d)
|
35
|
-
timespan << sep + d
|
36
|
-
rescue ArgumentError
|
37
|
-
valid = false
|
38
|
-
elms = d.split(".")
|
39
|
-
prms = [elms[2], elms[1], -1].map(&:to_i)
|
40
|
-
begin
|
41
|
-
cand = Date.new(*prms).strftime("%d.%m.%Y")
|
42
|
-
if elms[0] == (elms - cand.to_s.split(".")).first
|
43
|
-
timespan << sep + cand.to_s
|
44
|
-
else
|
45
|
-
raise
|
46
|
-
end
|
47
|
-
rescue ArgumentError
|
48
|
-
timespan = "" # unknown
|
49
|
-
throw :error
|
50
|
-
end
|
51
|
-
end
|
52
|
-
end
|
53
|
-
end
|
54
|
-
message = nil
|
55
|
-
unless valid
|
56
|
-
if timespan.empty?
|
57
|
-
message = "Timespan is invalid"
|
58
|
-
else
|
59
|
-
message = "Did you mean #{timespan} ?"
|
60
|
-
end
|
61
|
-
end
|
62
|
-
[valid, message]
|
63
|
-
end
|
64
|
-
|
65
|
-
parser = OptionParser.new
|
66
|
-
opts = {}
|
67
|
-
parser.on('--swiss_only') {|v| opts[:swiss_only] = true }
|
68
|
-
parser.on_tail('-h', '--help') { puts help; exit }
|
69
|
-
|
70
|
-
args = ARGV.dup
|
71
|
-
begin
|
72
|
-
parser.parse!(args)
|
73
|
-
rescue OptionParser::MissingArgument,
|
74
|
-
OptionParser::InvalidArgument,
|
75
|
-
OptionParser::InvalidOption
|
76
|
-
puts help
|
77
|
-
exit 1
|
78
|
-
end
|
79
|
-
|
80
|
-
unless args.size >= 1
|
81
|
-
puts help
|
82
|
-
exit 1
|
83
|
-
end
|
84
|
-
|
85
|
-
unless args.empty?
|
86
|
-
valid,message = validates_timespan(args[0])
|
87
|
-
unless valid
|
88
|
-
puts message
|
89
|
-
exit 1
|
90
|
-
end
|
91
|
-
end
|
92
|
-
|
93
|
-
begin
|
94
|
-
Brand2csv::run(args[0], args[1], opts[:swiss_only])
|
95
|
-
rescue Interrupt
|
96
|
-
puts "Unterbrochen. Breche mit Fehler ab"
|
97
|
-
exit 1
|
98
|
-
end
|
99
|
-
|
100
|
-
puts "#{__FILE__} completed successfully" if $VERBOSE
|
data/lib/brand2csv.rb
DELETED
@@ -1,590 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
# encoding: utf-8
|
3
|
-
require 'rubygems' if /^1\.8/.match(RUBY_VERSION)
|
4
|
-
require "brand2csv/version"
|
5
|
-
require 'mechanize'
|
6
|
-
require 'prettyprint'
|
7
|
-
require 'optparse'
|
8
|
-
require 'csv'
|
9
|
-
require 'logger'
|
10
|
-
|
11
|
-
module Brand2csv
|
12
|
-
|
13
|
-
|
14
|
-
class Marke < Struct.new(:name, :markennummer, :inhaber, :land, :hatVertreter, :hinterlegungsdatum, :zeile_1, :zeile_2, :zeile_3, :zeile_4, :zeile_5, :plz, :ort)
|
15
|
-
end
|
16
|
-
|
17
|
-
class Swissreg
|
18
|
-
|
19
|
-
# Weitere gesehene Fehler
|
20
|
-
BekannteFehler =
|
21
|
-
['Das Datum ist ung', # ültig'
|
22
|
-
'500 Internal Server Error',
|
23
|
-
'Vereinfachte Trefferliste anzeigen',
|
24
|
-
'Es wurden keine Daten gefunden.',
|
25
|
-
'Die Suchkriterien sind teilweise unzul', # ässig',
|
26
|
-
'Geben Sie mindestens ein Suchkriterium ein',
|
27
|
-
'Die Suche wurde abgebrochen, da die maximale Suchzeit von 60 Sekunden',
|
28
|
-
'Erweiterte Suche',
|
29
|
-
]
|
30
|
-
Base_uri = 'https://www.swissreg.ch'
|
31
|
-
Start_uri = "#{Base_uri}/srclient/faces/jsp/start.jsp"
|
32
|
-
Sr1 = "#{Base_uri}/srclient/faces/jsp/trademark/sr1.jsp"
|
33
|
-
Sr2 = "#{Base_uri}/srclient/faces/jsp/trademark/sr2.jsp"
|
34
|
-
Sr3 = "#{Base_uri}/srclient/faces/jsp/trademark/sr3.jsp"
|
35
|
-
Sr30 = "#{Base_uri}/srclient/faces/jsp/trademark/sr30.jsp"
|
36
|
-
Sr300 = "#{Base_uri}/srclient/faces/jsp/trademark/sr300.jsp"
|
37
|
-
DetailRegexp = /d_swissreg:mainContent:data:(\d*):tm_no_detail:id_detail/i
|
38
|
-
AddressRegexp = /^(\d\d\d\d)\W*(.*)/
|
39
|
-
LineSplit = ', '
|
40
|
-
DefaultCountry = 'Schweiz'
|
41
|
-
# Angezeigte Spalten "id_swissreg:mainContent:id_ckbTMChoice"
|
42
|
-
TMChoiceFields = [
|
43
|
-
"tm_lbl_tm_text", # Marke
|
44
|
-
# "tm_lbl_state"], # Status
|
45
|
-
# "tm_lbl_nizza_class"], # Nizza Klassifikation Nr.
|
46
|
-
# "tm_lbl_no"], # disabled="disabled"], # Nummer
|
47
|
-
"tm_lbl_applicant", # Inhaber/in
|
48
|
-
"tm_lbl_country", # Land (Inhaber/in)
|
49
|
-
"tm_lbl_agent", # Vertreter/in
|
50
|
-
# "tm_lbl_licensee"], # Lizenznehmer/in
|
51
|
-
"tm_lbl_app_date", # Hinterlegungsdatum
|
52
|
-
]
|
53
|
-
# Alle Felder mit sprechenden Namen
|
54
|
-
# ["id_swissreg:mainContent:id_txf_tm_no", nummer],# Marken Nr
|
55
|
-
# ["id_swissreg:mainContent:id_txf_app_no", ""], # Gesuch Nr.
|
56
|
-
# ["id_swissreg:mainContent:id_txf_tm_text", marke],
|
57
|
-
# ["id_swissreg:mainContent:id_txf_applicant", ""], # Inhaber/in
|
58
|
-
# ["id_swissreg:mainContent:id_cbxCountry", "_ALL"], # Auswahl Länder _ALL
|
59
|
-
# ["id_swissreg:mainContent:id_txf_agent", ""], # Vertreter/in
|
60
|
-
# ["id_swissreg:mainContent:id_txf_licensee", ""], # Lizenznehmer
|
61
|
-
# ["id_swissreg:mainContent:id_txf_nizza_class", ""], # Nizza Klassifikation Nr.
|
62
|
-
# # ["id_swissreg:mainContent:id_txf_appDate", timespan], # Hinterlegungsdatum
|
63
|
-
# ["id_swissreg:mainContent:id_txf_appDate", "%s" % timespan] ,
|
64
|
-
# ["id_swissreg:mainContent:id_txf_expiryDate", ""], # Ablauf Schutzfrist
|
65
|
-
# Markenart: Individualmarke 1 Kollektivmarke 2 Garantiemarke 3
|
66
|
-
# ["id_swissreg:mainContent:id_cbxTMTypeGrp", "_ALL"], # Markenart
|
67
|
-
# ["id_swissreg:mainContent:id_cbxTMForm", "_ALL"], # Markentyp
|
68
|
-
# ["id_swissreg:mainContent:id_cbxTMColorClaim", "_ALL"], # Farbanspruch
|
69
|
-
# ["id_swissreg:mainContent:id_txf_pub_date", ""], # Publikationsdatum
|
70
|
-
|
71
|
-
# info zu Publikationsgrund id_swissreg:mainContent:id_ckbTMPubReason
|
72
|
-
# ["id_swissreg:mainContent:id_ckbTMPubReason", "1"], #Neueintragungen
|
73
|
-
# ["id_swissreg:mainContent:id_ckbTMPubReason", "2"], #Berichtigungen
|
74
|
-
# ["id_swissreg:mainContent:id_ckbTMPubReason", "3"], #Verlängerungen
|
75
|
-
# ["id_swissreg:mainContent:id_ckbTMPubReason", "4"], #Löschungen
|
76
|
-
# ["id_swissreg:mainContent:id_ckbTMPubReason", "5"], #Inhaberänderungen
|
77
|
-
# ["id_swissreg:mainContent:id_ckbTMPubReason", "6"], #Vertreteränderungen
|
78
|
-
# ["id_swissreg:mainContent:id_ckbTMPubReason", "7"], #Lizenzänderungen
|
79
|
-
# ["id_swissreg:mainContent:id_ckbTMPubReason", "8"], #Weitere Registeränderungen
|
80
|
-
# ["id_swissreg:mainContent:id_ckbTMEmptyHits", "0"], # Leere Trefferliste anzeigen
|
81
|
-
# ["id_swissreg:mainContent:id_ckbTMState", "1"], # "Hängige Gesuche 1
|
82
|
-
# # ["id_swissreg:mainContent:id_ckbTMState", "2"], # "Gelöschte Gesuche 2
|
83
|
-
# ["id_swissreg:mainContent:id_ckbTMState", "3"], # aktive Marken 3
|
84
|
-
# # ["id_swissreg:mainContent:id_ckbTMState", "4"], # gelöschte Marken 4
|
85
|
-
|
86
|
-
|
87
|
-
MaxZeilen = 5
|
88
|
-
HitsPerPage = 250
|
89
|
-
LogDir = 'log'
|
90
|
-
|
91
|
-
attr_accessor :marke, :results, :timespan
|
92
|
-
|
93
|
-
def initialize(timespan, marke = nil, swiss_only=false)
|
94
|
-
@timespan = timespan
|
95
|
-
@marke = marke
|
96
|
-
@swiss_only = swiss_only
|
97
|
-
@number = nil
|
98
|
-
@results = []
|
99
|
-
@all_trademark_numbers = []
|
100
|
-
@errors = Hash.new
|
101
|
-
@lastDetail =nil
|
102
|
-
@counterDetails = 0
|
103
|
-
end
|
104
|
-
|
105
|
-
def writeResponse(filename)
|
106
|
-
if defined?(RSpec) or $VERBOSE
|
107
|
-
ausgabe = File.open(filename, 'w+')
|
108
|
-
ausgabe.puts @agent.page.body
|
109
|
-
ausgabe.close
|
110
|
-
else
|
111
|
-
puts "Skipping writing #{filename}" if $VERBOSE
|
112
|
-
end
|
113
|
-
end
|
114
|
-
|
115
|
-
def checkErrors(body, exitIfFailure = true)
|
116
|
-
BekannteFehler.each {
|
117
|
-
|errMsg|
|
118
|
-
if body.to_s.index(errMsg)
|
119
|
-
if exitIfFailure
|
120
|
-
puts "Tut mir leid. Suche wurde mit Fehlermeldung <#{errMsg}> abgebrochen."
|
121
|
-
exit 2
|
122
|
-
else
|
123
|
-
puts "Info: Suche meldet <#{errMsg}> "
|
124
|
-
end
|
125
|
-
end
|
126
|
-
}
|
127
|
-
end
|
128
|
-
|
129
|
-
UseClick = false
|
130
|
-
|
131
|
-
# Initialize a session with swissreg and save the cookie as @state
|
132
|
-
def init_swissreg
|
133
|
-
begin
|
134
|
-
@agent = Mechanize.new { |agent|
|
135
|
-
agent.user_agent = 'Mozilla/5.0 (X11; Linux x86_64; rv:16.0) Gecko/20100101 Firefox/16.0'
|
136
|
-
agent.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
137
|
-
FileUtils.makedirs(LogDir) if $VERBOSE or defined?(RSpec)
|
138
|
-
agent.log = Logger.new("#{LogDir}/mechanize.log") if $VERBOSE
|
139
|
-
}
|
140
|
-
@agent.get_file Start_uri # 'https://www.swissreg.ch/srclient/faces/jsp/start.jsp'
|
141
|
-
writeResponse("#{LogDir}/session_expired.html")
|
142
|
-
checkErrors(@agent.page.body, false)
|
143
|
-
@agent.page.links[3].click
|
144
|
-
writeResponse("#{LogDir}/homepage.html")
|
145
|
-
@state = @agent.page.form["javax.faces.ViewState"]
|
146
|
-
rescue Net::HTTPInternalServerError, Mechanize::ResponseCodeError
|
147
|
-
puts "Net::HTTPInternalServerError oder Mechanize::ResponseCodeError gesehen.\n #{Base_uri} hat wahrscheinlich Probleme"
|
148
|
-
exit 3
|
149
|
-
end
|
150
|
-
end
|
151
|
-
|
152
|
-
def parse_swissreg(timespan = @timespan, # sollte 377 Treffer ergeben, für 01.06.2007-10.06.2007, 559271 wurde in diesem Zeitraum registriert
|
153
|
-
marke = @marke,
|
154
|
-
nummer =@number) # nummer = "559271" ergibt genau einen treffer
|
155
|
-
|
156
|
-
init_swissreg
|
157
|
-
data = [
|
158
|
-
["autoScroll", "0,0"],
|
159
|
-
["id_swissreg:_link_hidden_", ""],
|
160
|
-
["id_swissreg_SUBMIT", "1"],
|
161
|
-
["id_swissreg:_idcl", "id_swissreg_sub_nav_ipiNavigation_item0"],
|
162
|
-
["javax.faces.ViewState", @state],
|
163
|
-
]
|
164
|
-
@agent.page.form['id_swissreg:_idcl'] = 'id_swissreg_sub_nav_ipiNavigation_item0'
|
165
|
-
@agent.page.forms.first.submit
|
166
|
-
writeResponse("#{LogDir}/trademark_simple.html")
|
167
|
-
data = [
|
168
|
-
["autoScroll", "0,0"],
|
169
|
-
["id_swissreg:_link_hidden_", ""],
|
170
|
-
["id_swissreg_SUBMIT", "1"],
|
171
|
-
["id_swissreg:_idcl", "id_swissreg_sub_nav_ipiNavigation_item0_item3"],
|
172
|
-
["javax.faces.ViewState", @state],
|
173
|
-
]
|
174
|
-
@agent.page.form['id_swissreg:_idcl'] = 'id_swissreg_sub_nav_ipiNavigation_item0_item3'
|
175
|
-
@agent.page.forms.first.submit
|
176
|
-
writeResponse("#{LogDir}/trademark_extended.html")
|
177
|
-
|
178
|
-
data = [
|
179
|
-
["autoScroll", "0,829"],
|
180
|
-
["id_swissreg:_link_hidden_", ""],
|
181
|
-
["id_swissreg:mainContent:id_ckbTMState", "1"], # Hängige Gesuche 1
|
182
|
-
["id_swissreg:mainContent:id_ckbTMState", "3"], # Aktive Marken 3
|
183
|
-
["id_swissreg:mainContent:id_txf_tm_no", ""],# Marken Nr
|
184
|
-
["id_swissreg:mainContent:id_txf_app_no", ""], # Gesuch Nr.
|
185
|
-
["id_swissreg:mainContent:id_txf_tm_text", "#{marke}"],
|
186
|
-
["id_swissreg:mainContent:id_txf_applicant", ""], # Inhaber/in
|
187
|
-
["id_swissreg:mainContent:id_cbxCountry", @swiss_only ? 'CH' : '_ALL'],
|
188
|
-
["id_swissreg:mainContent:id_txf_agent", ""], # Vertreter/in
|
189
|
-
["id_swissreg:mainContent:id_txf_licensee", ""], # Lizenznehmer
|
190
|
-
["id_swissreg:mainContent:id_txf_nizza_class", ""], # Nizza Klassifikation Nr.
|
191
|
-
["id_swissreg:mainContent:id_txf_appDate", "#{timespan}"] ,
|
192
|
-
["id_swissreg:mainContent:id_txf_expiryDate", ""], # Ablauf Schutzfrist
|
193
|
-
["id_swissreg:mainContent:id_cbxTMTypeGrp", "_ALL"], # Markenart
|
194
|
-
["id_swissreg:mainContent:id_cbxTMForm", "_ALL"], # Markentyp
|
195
|
-
["id_swissreg:mainContent:id_cbxTMColorClaim", "_ALL"], # Farbanspruch
|
196
|
-
["id_swissreg:mainContent:id_txf_pub_date", ""], # Publikationsdatum
|
197
|
-
["id_swissreg:mainContent:id_ckbTMPubReason", '1'],
|
198
|
-
["id_swissreg:mainContent:id_ckbTMPubReason", '2'],
|
199
|
-
["id_swissreg:mainContent:id_ckbTMPubReason", '3'],
|
200
|
-
["id_swissreg:mainContent:id_ckbTMPubReason", '4'],
|
201
|
-
["id_swissreg:mainContent:id_ckbTMPubReason", '5'],
|
202
|
-
["id_swissreg:mainContent:id_ckbTMPubReason", '6'],
|
203
|
-
["id_swissreg:mainContent:id_ckbTMPubReason", '7'],
|
204
|
-
["id_swissreg:mainContent:id_ckbTMPubReason", '8'],
|
205
|
-
["id_swissreg:mainContent:id_cbxFormatChoice", "1"],
|
206
|
-
["id_swissreg:mainContent:id_ckbTMChoice", "tm_lbl_tm_text"],
|
207
|
-
["id_swissreg:mainContent:id_ckbTMChoice", "tm_lbl_applicant"],
|
208
|
-
["id_swissreg:mainContent:id_ckbTMChoice", "tm_lbl_country"],
|
209
|
-
["id_swissreg:mainContent:id_ckbTMChoice", "tm_lbl_agent"],
|
210
|
-
["id_swissreg:mainContent:id_ckbTMChoice", "tm_lbl_app_date"],
|
211
|
-
["id_swissreg:mainContent:id_cbxHitsPerPage", HitsPerPage], # Treffer pro Seite
|
212
|
-
["id_swissreg:mainContent:sub_fieldset:id_submit", "suchen"],
|
213
|
-
["id_swissreg_SUBMIT", "1"],
|
214
|
-
["id_swissreg:_idcl", ""],
|
215
|
-
["id_swissreg:_link_hidden_", ""],
|
216
|
-
["javax.faces.ViewState", @state],
|
217
|
-
]
|
218
|
-
begin
|
219
|
-
@agent.post(Sr3, data)
|
220
|
-
rescue Timeout::Error
|
221
|
-
puts "Timeout!"
|
222
|
-
retry
|
223
|
-
end
|
224
|
-
writeResponse("#{LogDir}/first_results.html")
|
225
|
-
checkErrors(@agent.page.body, false)
|
226
|
-
end
|
227
|
-
|
228
|
-
# the number is only passed to facilitate debugging
|
229
|
-
# lines are the address lines
|
230
|
-
def Swissreg::parseAddress(number, inhaber)
|
231
|
-
ort = nil
|
232
|
-
plz = nil
|
233
|
-
if inhaber
|
234
|
-
lines = CGI.unescapeHTML(inhaber).split(LineSplit)
|
235
|
-
# Search for plz/address
|
236
|
-
1.upto(lines.length-1).each {
|
237
|
-
|cnt|
|
238
|
-
if m = AddressRegexp.match(lines[cnt])
|
239
|
-
lines[cnt+1] = nil
|
240
|
-
plz = m[1]; ort = m[2]
|
241
|
-
cnt.upto(MaxZeilen-1).each{ |cnt2| lines[cnt2] = nil }
|
242
|
-
break
|
243
|
-
end
|
244
|
-
}
|
245
|
-
end
|
246
|
-
unless plz
|
247
|
-
puts "Achtung! Konnte Marke #{number} mit Inhaber #{lines.inspect} nicht parsen" if $VERBOSE
|
248
|
-
return nil, nil, nil, nil, nil, nil, nil, nil
|
249
|
-
end
|
250
|
-
# search for lines with only digits
|
251
|
-
found = false
|
252
|
-
1.upto(lines.length-1).each {
|
253
|
-
|cnt|
|
254
|
-
break if lines[cnt] == nil
|
255
|
-
if /^\d*$/.match(lines[cnt])
|
256
|
-
found = true
|
257
|
-
if lines[cnt+1] == nil
|
258
|
-
found = 'before'
|
259
|
-
lines[cnt-1] += LineSplit + lines[cnt]
|
260
|
-
lines.delete_at(cnt)
|
261
|
-
else
|
262
|
-
found = 'after'
|
263
|
-
lines[cnt] += LineSplit + lines[cnt+1]
|
264
|
-
lines.delete_at(cnt+1)
|
265
|
-
end
|
266
|
-
end
|
267
|
-
}
|
268
|
-
puts "found #{found}: #{lines.inspect}" if found and $VERBOSE
|
269
|
-
return lines[0], lines[1], lines[2], lines[3], lines[4], plz, ort
|
270
|
-
end
|
271
|
-
|
272
|
-
def Swissreg::getInputValuesFromPage(body) # body of HTML page
|
273
|
-
contentData = []
|
274
|
-
body.search('input').each{ |input|
|
275
|
-
# puts "name: #{input.attribute('name')} value #{input.attribute('value')}"
|
276
|
-
contentData << [ input.attribute('name').to_s, input.attribute('value').to_s ]
|
277
|
-
}
|
278
|
-
contentData
|
279
|
-
end
|
280
|
-
|
281
|
-
# return value of an array of POST values
|
282
|
-
def Swissreg::inputValue(values, key)
|
283
|
-
values.each{ |val|
|
284
|
-
return val[1] if key.eql?(val[0])
|
285
|
-
}
|
286
|
-
return nil
|
287
|
-
end
|
288
|
-
|
289
|
-
# set value for a key of an array of POST values
|
290
|
-
def Swissreg::setInputValue(values, key, newValue)
|
291
|
-
values.each{ |val|
|
292
|
-
if key.eql?(val[0])
|
293
|
-
val[1] = newValue
|
294
|
-
return
|
295
|
-
end
|
296
|
-
}
|
297
|
-
return
|
298
|
-
end
|
299
|
-
|
300
|
-
def Swissreg::setAllInputValue(form, values)
|
301
|
-
values.each{ |newValue|
|
302
|
-
# puts "x: 0 #{ newValue[0].to_s} 1 #{newValue[1].to_s}"
|
303
|
-
form.field(:name => newValue[0].to_s) { |elem|
|
304
|
-
next if elem == nil # puts "Cannot set #{newValue[0].to_s}"
|
305
|
-
elem.value = newValue[1].to_s
|
306
|
-
}
|
307
|
-
}
|
308
|
-
end
|
309
|
-
|
310
|
-
def Swissreg::getMarkenInfoFromDetail(doc)
|
311
|
-
marke = nil
|
312
|
-
number = 'invalid'
|
313
|
-
bezeichnung = nil
|
314
|
-
inhaber = nil
|
315
|
-
hinterlegungsdatum = nil
|
316
|
-
hatVertreter = 'Nein'
|
317
|
-
doc.xpath("//html/body/form/div/div/fieldset/div/table/tbody/tr").each{
|
318
|
-
|x|
|
319
|
-
if x.children.first.text.eql?('Marke')
|
320
|
-
if x.children[1].text.index('Markenabbildung')
|
321
|
-
# we must fetch the link to the image
|
322
|
-
bezeichnung = x.children[1].elements.first.attribute('href').text
|
323
|
-
else # we got a trademark
|
324
|
-
bezeichnung = x.children[1].text
|
325
|
-
end
|
326
|
-
end
|
327
|
-
|
328
|
-
if x.children.first.text.eql?('Inhaber/in')
|
329
|
-
inhaber = />(.*)<\/td/.match(x.children[1].to_s)[1].gsub('<br>',LineSplit)
|
330
|
-
end
|
331
|
-
|
332
|
-
if x.children.first.text.eql?('Vertreter/in')
|
333
|
-
hatVertreter = 'Ja' if x.children[1].text.length > 0
|
334
|
-
end
|
335
|
-
hinterlegungsdatum = x.children[1].text if x.children.first.text.eql?('Hinterlegungsdatum')
|
336
|
-
number = x.children[1].text if x.children.first.text.eql?('Gesuch Nr.')
|
337
|
-
}
|
338
|
-
zeile_1, zeile_2, zeile_3, zeile_4, zeile_5, plz, ort = Swissreg::parseAddress(number, inhaber)
|
339
|
-
inhaber = inhaber.split(', , ')[0] # Catch cases where Inhaber has several postal addresses
|
340
|
-
marke = Marke.new(bezeichnung, number, inhaber, DefaultCountry, hatVertreter, hinterlegungsdatum, zeile_1, zeile_2, zeile_3, zeile_4, zeile_5, plz, ort )
|
341
|
-
end
|
342
|
-
|
343
|
-
def fetchDetails(nummer) # takes a long time!
|
344
|
-
@counterDetails += 1
|
345
|
-
init_swissreg if @counterDetails % 90 == 0 # it seems that swissreg is artificially slowing down serving request after 100 hits
|
346
|
-
filename = "#{LogDir}/detail_#{sprintf('%05d', @counterDetails)}_#{nummer.gsub('/','.')}.html"
|
347
|
-
if File.exists?(filename)
|
348
|
-
doc = Nokogiri::Slop(File.open(filename))
|
349
|
-
else
|
350
|
-
url = "#{Sr300}?language=de§ion=tm&id=#{nummer}"
|
351
|
-
pp "#{Time.now.strftime("%H:%M:%S")}: Opening #{filename}" if $VERBOSE
|
352
|
-
$stdout.flush
|
353
|
-
nrRetries = 0
|
354
|
-
begin
|
355
|
-
content = @agent.get_file url
|
356
|
-
body = @agent.page.body
|
357
|
-
rescue 'getaddrinfo: Name or service not known', Exception => e
|
358
|
-
nrRetries += 1
|
359
|
-
puts e.backtrace
|
360
|
-
if nrRetries <= 3
|
361
|
-
puts "get_file did not work reinit session and retry for #{nr}. nrRetries #{nrRetries}/3. e #{e}"
|
362
|
-
sleep 60 # Sleep a minute to let network recover
|
363
|
-
init_swissreg
|
364
|
-
retry
|
365
|
-
else
|
366
|
-
puts "get_file did not work reinit session raise Interrupt"
|
367
|
-
raise Interrupt
|
368
|
-
end
|
369
|
-
end
|
370
|
-
body.force_encoding('utf-8') unless /^1\.8/.match(RUBY_VERSION)
|
371
|
-
doc = Nokogiri::Slop(body)
|
372
|
-
writeResponse(filename)
|
373
|
-
end
|
374
|
-
marke = Swissreg::getMarkenInfoFromDetail(doc)
|
375
|
-
@results << marke
|
376
|
-
end
|
377
|
-
|
378
|
-
def Swissreg::emitCsv(results, filename='ausgabe.csv')
|
379
|
-
return if results == nil or results.size == 0
|
380
|
-
if /^1\.8/.match(RUBY_VERSION)
|
381
|
-
ausgabe = File.open(filename, 'w+')
|
382
|
-
# Write header
|
383
|
-
s=''
|
384
|
-
results[0].members.each { |member| s += member + ';' }
|
385
|
-
ausgabe.puts s.chop
|
386
|
-
# write all line
|
387
|
-
results.each{
|
388
|
-
|result|
|
389
|
-
s = ''
|
390
|
-
result.members.each{ |member|
|
391
|
-
unless eval("result.#{member}")
|
392
|
-
s += ';'
|
393
|
-
else
|
394
|
-
value = eval("result.#{member.to_s}")
|
395
|
-
value = "\"#{value}\"" if value.index(';')
|
396
|
-
s += value + ';'
|
397
|
-
end
|
398
|
-
}
|
399
|
-
ausgabe.puts s.chop
|
400
|
-
}
|
401
|
-
ausgabe.close
|
402
|
-
else
|
403
|
-
|
404
|
-
CSV.open(filename, 'w', :headers=>results[0].members,
|
405
|
-
:write_headers => true,
|
406
|
-
:col_sep => ';'
|
407
|
-
) do |csv| results.each{ |x| csv << x }
|
408
|
-
end
|
409
|
-
end
|
410
|
-
end
|
411
|
-
|
412
|
-
def Swissreg::getTrademarkNumbers(doc)
|
413
|
-
trademark_numbers = []
|
414
|
-
doc.search('a').each{
|
415
|
-
|link|
|
416
|
-
if DetailRegexp.match(link.attribute('id'))
|
417
|
-
trademark_numbers << link.children.first.children.first.content
|
418
|
-
end
|
419
|
-
}
|
420
|
-
trademark_numbers
|
421
|
-
end
|
422
|
-
|
423
|
-
class Swissreg::Vereinfachte
|
424
|
-
attr_reader :links2details, :trademark_search_id, :inputData, :firstHit, :nrHits, :nrSubPages, :pageNr
|
425
|
-
HitRegexpDE = /Seite (\d*) von ([\d']*) - Treffer ([\d']*)-([\d']*) von ([\d']*)/
|
426
|
-
Vivian = 'id_swissreg:mainContent:vivian'
|
427
|
-
|
428
|
-
# Parse a HTML page from swissreg sr3.jsp
|
429
|
-
# There we find info like "Seite 1 von 26 - Treffer 1-250 von 6'349" and upto 250 links to details
|
430
|
-
def initialize(doc)
|
431
|
-
@inputData = []
|
432
|
-
@pageNr = @nrSubPages = @firstHit = @nrHits = 0
|
433
|
-
m = HitRegexpDE.match(doc.text)
|
434
|
-
if m
|
435
|
-
begin
|
436
|
-
c = m.to_a.map{|n| n.gsub(/'/, "").to_i }
|
437
|
-
@pageNr = c[1]
|
438
|
-
@nrSubPages = c[2]
|
439
|
-
@firstHit = c[3]
|
440
|
-
@nrHits = c[5]
|
441
|
-
rescue NoMethodError
|
442
|
-
end
|
443
|
-
end
|
444
|
-
@trademark_search_id = Swissreg::inputValue(Swissreg::getInputValuesFromPage(doc), Vivian)
|
445
|
-
@links2details = []
|
446
|
-
doc.search('input').each{ |input|
|
447
|
-
# puts "name: #{input.attribute('name')} value #{input.attribute('value')}" if $VERBOSE
|
448
|
-
@inputData << [ input.attribute('name').to_s, input.attribute('value').to_s ]
|
449
|
-
}
|
450
|
-
|
451
|
-
@state = Swissreg::inputValue(Swissreg::getInputValuesFromPage(doc), 'javax.faces.ViewState')
|
452
|
-
doc.search('a').each{
|
453
|
-
|link|
|
454
|
-
if m = DetailRegexp.match(link.attribute('id'))
|
455
|
-
# puts "XXX #{link.attribute('onclick').to_s} href: #{link.attribute('href').to_s} value #{link.attribute('value').to_s}" if $VERBOSE
|
456
|
-
m = /'tmMainId','(\d*)'/.match(link.attribute('onclick').to_s)
|
457
|
-
tmMainId = m[1].to_i
|
458
|
-
@links2details << tmMainId
|
459
|
-
end
|
460
|
-
}
|
461
|
-
end
|
462
|
-
|
463
|
-
def getPostDataForDetail(position, id)
|
464
|
-
[
|
465
|
-
[ "autoScroll", "0,0"],
|
466
|
-
[ "id_swissreg:mainContent:sub_options_result:sub_fieldset:cbxHitsPerPage", "#{HitsPerPage}"],
|
467
|
-
[ "id_swissreg:mainContent:vivian", @trademark_search_id],
|
468
|
-
[ "id_swissreg_SUBMIT", "1"],
|
469
|
-
[ "id_swissreg:_idcl", "id_swissreg:mainContent:data:#{position}:tm_no_detail:id_detail", ""],
|
470
|
-
[ "id_swissreg:mainContent:scroll_1", ""],
|
471
|
-
[ "tmMainId", "#{id}"],
|
472
|
-
[ "id_swissreg:_link_hidden_ "],
|
473
|
-
[ "javax.faces.ViewState", @state]
|
474
|
-
]
|
475
|
-
end
|
476
|
-
|
477
|
-
def getPostDataForSubpage(pageNr)
|
478
|
-
[
|
479
|
-
[ "autoScroll", "0,0"],
|
480
|
-
[ "id_swissreg:mainContent:sub_options_result:sub_fieldset:cbxHitsPerPage", "#{HitsPerPage}"],
|
481
|
-
[ "id_swissreg:mainContent:vivian", @trademark_search_id],
|
482
|
-
[ "id_swissreg_SUBMIT", "1"],
|
483
|
-
[ "id_swissreg:_idcl", "id_swissreg:mainContent:scroll_1idx#{pageNr}"],
|
484
|
-
[ "id_swissreg:mainContent:scroll_1", "idx#{pageNr}"],
|
485
|
-
[ "tmMainId", ""],
|
486
|
-
[ "id_swissreg:_link_hidden_ "],
|
487
|
-
[ "javax.faces.ViewState", @state]
|
488
|
-
]
|
489
|
-
end
|
490
|
-
|
491
|
-
end
|
492
|
-
|
493
|
-
def getAllHits(filename = nil, pageNr = 1)
|
494
|
-
if filename && File.exists?(filename)
|
495
|
-
doc = Nokogiri::Slop(File.open(filename))
|
496
|
-
else
|
497
|
-
form = @agent.page.form
|
498
|
-
btn = form.buttons.last
|
499
|
-
if btn && btn.name == "id_swissreg:mainContent:id_show_simple_view_hitlist"
|
500
|
-
res = @agent.submit(form, btn)
|
501
|
-
body = res.body
|
502
|
-
else
|
503
|
-
body = @agent.page.body
|
504
|
-
end
|
505
|
-
body.force_encoding('utf-8') unless /^1\.8/.match(RUBY_VERSION)
|
506
|
-
doc = Nokogiri::Slop(body)
|
507
|
-
filename = "#{LogDir}/vereinfachte_#{pageNr}.html"
|
508
|
-
writeResponse(filename)
|
509
|
-
end
|
510
|
-
einfach = Swissreg::Vereinfachte.new(doc)
|
511
|
-
puts "#{Time.now.strftime("%H:%M:%S")} status: getAllHits for #{pageNr} of #{einfach.nrSubPages} pages" if $VERBOSE
|
512
|
-
subPage2Fetch = pageNr + 1
|
513
|
-
data2 = einfach.getPostDataForSubpage(subPage2Fetch).clone
|
514
|
-
if (HitsPerPage < einfach.nrHits - einfach.firstHit)
|
515
|
-
itemsToFetch = HitsPerPage
|
516
|
-
else
|
517
|
-
itemsToFetch = einfach.nrHits - einfach.firstHit
|
518
|
-
end
|
519
|
-
@all_trademark_numbers += Swissreg::getTrademarkNumbers(doc)
|
520
|
-
|
521
|
-
filename = "#{LogDir}/vereinfachte_#{pageNr}_back.html"
|
522
|
-
writeResponse(filename)
|
523
|
-
if pageNr < (einfach.nrSubPages)
|
524
|
-
Swissreg::setAllInputValue(@agent.page.forms.first, data2)
|
525
|
-
@agent.page.forms.first.submit
|
526
|
-
getAllHits(nil, subPage2Fetch)
|
527
|
-
end
|
528
|
-
@all_trademark_numbers
|
529
|
-
end
|
530
|
-
|
531
|
-
def fetchresult(filename = "#{LogDir}/fetch_1.html", counter = 1)
|
532
|
-
if filename && File.exists?(filename)
|
533
|
-
doc = Nokogiri::Slop(File.open(filename))
|
534
|
-
else
|
535
|
-
body = @agent.page.body
|
536
|
-
body.force_encoding('utf-8') unless /^1\.8/.match(RUBY_VERSION)
|
537
|
-
doc = Nokogiri::Slop(body)
|
538
|
-
writeResponse(filename)
|
539
|
-
end
|
540
|
-
|
541
|
-
if /Vereinfachte Trefferliste anzeigen/i.match(doc.text)
|
542
|
-
form = @agent.page.forms.first
|
543
|
-
button = form.button_with(:value => /Vereinfachte/i)
|
544
|
-
# submit the form using that button
|
545
|
-
@agent.submit(form, button)
|
546
|
-
filename = "#{LogDir}/vereinfacht.html"
|
547
|
-
writeResponse(filename)
|
548
|
-
end
|
549
|
-
getAllHits(doc, counter)
|
550
|
-
puts"getAllHits: returned #{@all_trademark_numbers ? @all_trademark_numbers.size : 0} hits "
|
551
|
-
if @all_trademark_numbers
|
552
|
-
@all_trademark_numbers.each{
|
553
|
-
|nr|
|
554
|
-
nrRetries = 0
|
555
|
-
begin
|
556
|
-
fetchDetails(nr)
|
557
|
-
rescue SocketError, Exception => e
|
558
|
-
nrRetries += 1
|
559
|
-
puts e.backtrace
|
560
|
-
if nrRetries <= 3
|
561
|
-
puts "fetchDetails did not work reinit session and retry for #{nr}. nrRetries #{nrRetries}/3. e #{e}"
|
562
|
-
sleep 60 # Sleep a minute to let network recover
|
563
|
-
init_swissreg
|
564
|
-
retry
|
565
|
-
else
|
566
|
-
puts "fetchDetails did not work reinit session raise Interrupt"
|
567
|
-
raise Interrupt
|
568
|
-
end
|
569
|
-
end
|
570
|
-
|
571
|
-
}
|
572
|
-
else
|
573
|
-
puts "Could not find any trademarks in #{filename}"
|
574
|
-
end
|
575
|
-
end
|
576
|
-
end # class Swissreg
|
577
|
-
|
578
|
-
def Brand2csv::run(timespan, marke = 'a*', swiss_only = false)
|
579
|
-
session = Swissreg.new(timespan, marke, swiss_only)
|
580
|
-
begin
|
581
|
-
session.parse_swissreg
|
582
|
-
session.fetchresult
|
583
|
-
rescue Interrupt, Net::HTTP::Persistent::Error
|
584
|
-
puts "Unterbrochen. Vesuche #{session.results.size} Resultate zu speichern"
|
585
|
-
end
|
586
|
-
Swissreg::emitCsv(session.results, "#{timespan}.csv")
|
587
|
-
session.results
|
588
|
-
end
|
589
|
-
|
590
|
-
end # module Brand2csv
|