brand2csv 0.3.1 → 0.3.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (56) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +14 -0
  3. data/.rspec +1 -0
  4. data/.travis.yml +26 -0
  5. data/Gemfile +4 -0
  6. data/History.txt +121 -0
  7. data/LICENCE.txt +515 -0
  8. data/Manifest.txt +54 -0
  9. data/README.md +27 -0
  10. data/Rakefile +18 -0
  11. data/bin/brand2csv +100 -0
  12. data/brand2csv.gemspec +44 -0
  13. data/lib/brand2csv.rb +594 -0
  14. data/lib/brand2csv/version.rb +3 -0
  15. data/logs/aspen_08_08_1986.html +598 -0
  16. data/logs/post.rohdaten.httpfox +1 -0
  17. data/logs/post.rohdaten.mechanize +1 -0
  18. data/logs/protocol_swissreg.log +86 -0
  19. data/logs/result_01.10.2005.jsp +598 -0
  20. data/logs/sr1.jsp +449 -0
  21. data/logs/sr3.jsp +598 -0
  22. data/logs/start.jsp +350 -0
  23. data/logs/start2.jsp +434 -0
  24. data/protocol.2013.05.12.textile +56 -0
  25. data/protocol.2013.05.15.textile +49 -0
  26. data/protocol.2013.05.21.textile +84 -0
  27. data/spec/brand2csv_spec.rb +62 -0
  28. data/spec/csv_spec.rb +57 -0
  29. data/spec/data/aspectra/detail_00001_P-480296.html +531 -0
  30. data/spec/data/aspectra/detail_00002_P-482236.html +531 -0
  31. data/spec/data/aspectra/detail_00003_641074.html +539 -0
  32. data/spec/data/aspectra/first_results.html +600 -0
  33. data/spec/data/einfache_suche.html +434 -0
  34. data/spec/data/erweiterte_suche.html +446 -0
  35. data/spec/data/main.html +350 -0
  36. data/spec/data/result_short.html +606 -0
  37. data/spec/data/resultate_1.html +446 -0
  38. data/spec/data/resultate_2.html +446 -0
  39. data/spec/data/urner_wildheu/detail_00001_57862.2013.html +516 -0
  40. data/spec/data/urner_wildheu/first_results.html +598 -0
  41. data/spec/data/vereinfachte_1.html +847 -0
  42. data/spec/data/vereinfachte_detail_33.html +516 -0
  43. data/spec/detail_spec.rb +28 -0
  44. data/spec/short_spec.rb +55 -0
  45. data/spec/simple_search.rb +43 -0
  46. data/spec/spec_helper.rb +34 -0
  47. data/spec/support/core_ext/kernel.rb +26 -0
  48. data/spec/support/server_mock_helper.rb +143 -0
  49. data/spec/swissreg_spec.rb +45 -0
  50. data/spec/trademark_numbers_spec.rb +21 -0
  51. data/spec/utilities_spec.rb +83 -0
  52. data/spike.rb +491 -0
  53. data/spike_mechanize_swissreg.rb +312 -0
  54. data/spike_watir.rb +58 -0
  55. data/swissreg.rb +75 -0
  56. metadata +86 -7
@@ -0,0 +1,54 @@
1
+ .gitignore
2
+ .rspec
3
+ .travis.yml
4
+ Gemfile
5
+ Gemfile.lock
6
+ History.txt
7
+ LICENCE.txt
8
+ Manifest.txt
9
+ README.md
10
+ Rakefile
11
+ bin/brand2csv
12
+ lib/brand2csv.rb
13
+ lib/brand2csv/version.rb
14
+ logs/aspen_08_08_1986.html
15
+ logs/post.rohdaten.httpfox
16
+ logs/post.rohdaten.mechanize
17
+ logs/protocol_swissreg.log
18
+ logs/result_01.10.2005.jsp
19
+ logs/sr1.jsp
20
+ logs/sr3.jsp
21
+ logs/start.jsp
22
+ logs/start2.jsp
23
+ protocol.2013.05.12.textile
24
+ protocol.2013.05.15.textile
25
+ protocol.2013.05.21.textile
26
+ spec/brand2csv_spec.rb
27
+ spec/csv_spec.rb
28
+ spec/data/aspectra/detail_00001_P-480296.html
29
+ spec/data/aspectra/detail_00002_P-482236.html
30
+ spec/data/aspectra/detail_00003_641074.html
31
+ spec/data/aspectra/first_results.html
32
+ spec/data/einfache_suche.html
33
+ spec/data/erweiterte_suche.html
34
+ spec/data/main.html
35
+ spec/data/result_short.html
36
+ spec/data/resultate_1.html
37
+ spec/data/resultate_2.html
38
+ spec/data/urner_wildheu/detail_00001_57862.2013.html
39
+ spec/data/urner_wildheu/first_results.html
40
+ spec/data/vereinfachte_1.html
41
+ spec/data/vereinfachte_detail_33.html
42
+ spec/detail_spec.rb
43
+ spec/short_spec.rb
44
+ spec/simple_search.rb
45
+ spec/spec_helper.rb
46
+ spec/support/core_ext/kernel.rb
47
+ spec/support/server_mock_helper.rb
48
+ spec/swissreg_spec.rb
49
+ spec/trademark_numbers_spec.rb
50
+ spec/utilities_spec.rb
51
+ spike.rb
52
+ spike_mechanize_swissreg.rb
53
+ spike_watir.rb
54
+ swissreg.rb
@@ -0,0 +1,27 @@
1
+ # brand2csv
2
+
3
+ [![Build Status](https://secure.travis-ci.org/zdavatz/brand2csv.png)](http://travis-ci.org/zdavatz/brand2csv)
4
+
5
+ brand2csv using swissreg.ch to get addresses.
6
+
7
+ ## Usage
8
+ ```
9
+ brand2csv 01.01.2013 "b*"
10
+ brand2csv 1.10.2005-31.10.2005
11
+ ```
12
+ ## Help
13
+ ```
14
+ ~> brand2csv --help
15
+ /usr/local/bin/brand2csv ver.0.1.9
16
+ Usage:
17
+ brand2csv timespan
18
+ Find all brands registered in switzerland during the given timespan.
19
+ The following examples valid timespan periods:
20
+ brand2csv 01.01.2013 "b*" #will search for all brand starting with "b"
21
+ brand2csv 1.10.2005-31.10.2005 #this will work as well from version 0.1.9
22
+ The results are stored in the file <date_selected>.csv.
23
+ The trademark name is either a real brand name or a link to an image.
24
+ ```
25
+ ## Travis
26
+ You can find Travis builds here:
27
+ * https://travis-ci.org/zdavatz/brand2csv
@@ -0,0 +1,18 @@
1
+ #!/usr/bin/env ruby
2
+ # encoding: utf-8
3
+
4
+ require "bundler/gem_tasks"
5
+ require "rspec/core/rake_task"
6
+ require 'rake/testtask'
7
+
8
+ RSpec::Core::RakeTask.new(:spec)
9
+
10
+ desc 'Offer a gem task like hoe'
11
+ task :gem => :build do
12
+ Rake::Task[:build].invoke
13
+ end
14
+
15
+ task :spec => :clean
16
+
17
+ require 'rake/clean'
18
+ CLEAN.include FileList['pkg/*.gem']
@@ -0,0 +1,100 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'pathname'
4
+ root = Pathname.new(__FILE__).realpath.parent.parent
5
+ $:.unshift root.join('lib') if $0 == __FILE__
6
+
7
+ require 'optparse'
8
+ require "date"
9
+ require 'brand2csv'
10
+
11
+ def help
12
+ <<EOS
13
+ #$0 ver.#{Brand2csv::VERSION}
14
+ Usage:
15
+ #{File.basename(__FILE__)} timespan
16
+ Find all brands registered in switzerland during the given timespan.
17
+ The following examples valid timespan periods:
18
+ brand2csv 01.01.2013 "b*" #will search for all brand starting with "b"
19
+ brand2csv 1.10.2005-31.10.2005 #this will work as well from version 0.1.9
20
+ The results are stored in the file <date_selected>.csv.
21
+ The trademark name is either a real brand name or a link to an image.
22
+ --swiss_only Fetch only trademarks from swiss owner
23
+ EOS
24
+ end
25
+
26
+ def validates_timespan(arg)
27
+ valid = true
28
+ timespan = ""
29
+ dates = arg.gsub(/[^\d\.-]/, '').split("-")
30
+ catch (:error) do
31
+ dates.each_with_index do |d, i|
32
+ sep = (dates.length > 1 && i != 0) ? "-" : ""
33
+ begin
34
+ Date.parse(d)
35
+ timespan << sep + d
36
+ rescue ArgumentError
37
+ valid = false
38
+ elms = d.split(".")
39
+ prms = [elms[2], elms[1], -1].map(&:to_i)
40
+ begin
41
+ cand = Date.new(*prms).strftime("%d.%m.%Y")
42
+ if elms[0] == (elms - cand.to_s.split(".")).first
43
+ timespan << sep + cand.to_s
44
+ else
45
+ raise
46
+ end
47
+ rescue ArgumentError
48
+ timespan = "" # unknown
49
+ throw :error
50
+ end
51
+ end
52
+ end
53
+ end
54
+ message = nil
55
+ unless valid
56
+ if timespan.empty?
57
+ message = "Timespan is invalid"
58
+ else
59
+ message = "Did you mean #{timespan} ?"
60
+ end
61
+ end
62
+ [valid, message]
63
+ end
64
+
65
+ parser = OptionParser.new
66
+ opts = {}
67
+ parser.on('--swiss_only') {|v| opts[:swiss_only] = true }
68
+ parser.on_tail('-h', '--help') { puts help; exit }
69
+
70
+ args = ARGV.dup
71
+ begin
72
+ parser.parse!(args)
73
+ rescue OptionParser::MissingArgument,
74
+ OptionParser::InvalidArgument,
75
+ OptionParser::InvalidOption
76
+ puts help
77
+ exit 1
78
+ end
79
+
80
+ unless args.size >= 1
81
+ puts help
82
+ exit 1
83
+ end
84
+
85
+ unless args.empty?
86
+ valid,message = validates_timespan(args[0])
87
+ unless valid
88
+ puts message
89
+ exit 1
90
+ end
91
+ end
92
+
93
+ begin
94
+ Brand2csv::run(args[0], args[1], opts[:swiss_only])
95
+ rescue Interrupt
96
+ puts "Unterbrochen. Breche mit Fehler ab"
97
+ exit 1
98
+ end
99
+
100
+ puts "#{__FILE__} completed successfully" if $VERBOSE
@@ -0,0 +1,44 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'brand2csv/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "brand2csv"
8
+ spec.version = Brand2csv::VERSION
9
+ spec.summary = 'brand2csv creates csv files for swiss brands'
10
+ spec.description = "brand2csv creates csv files for swiss brand registered in a specific time period.
11
+ The csv contains the brand, link to image (if present), link to the detailinfo at swissreg.ch, name and address of owner (Inhaber)"
12
+ spec.author = 'Niklaus Giger, Yasuhiro Asaka, Zeno R.R. Davatz'
13
+ spec.email = 'yasaka@ywesee.com, zdavatz@ywesee.com, ngiger@ywesee.com'
14
+ spec.platform = Gem::Platform::RUBY
15
+ spec.license = 'GPLv3'
16
+ spec.homepage = 'https://github.com/zdavatz/brand2csv'
17
+ spec.files = `git ls-files -z`.split("\x0")
18
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
19
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
20
+ spec.require_paths = ["lib"]
21
+
22
+ # gem.add_runtime_dependency
23
+ spec.add_runtime_dependency 'mechanize', '>= 2.6'
24
+ spec.add_runtime_dependency'json'
25
+ spec.add_runtime_dependency'nokogiri'
26
+
27
+ # gem.add_development_dependency
28
+ spec.add_development_dependency 'watir'
29
+ spec.add_development_dependency 'watir-webdriver'
30
+ spec.add_development_dependency 'webmock'
31
+ spec.add_development_dependency 'rake'
32
+ spec.add_development_dependency 'rdoc'
33
+ spec.add_development_dependency 'rspec'
34
+
35
+
36
+
37
+ if RUBY_VERSION.match(/^1/)
38
+ spec.add_development_dependency 'pry-debugger'
39
+ else
40
+ spec.add_development_dependency 'pry-byebug'
41
+ spec.add_development_dependency 'pry-doc'
42
+ end
43
+ end
44
+
@@ -0,0 +1,594 @@
1
+ #!/usr/bin/env ruby
2
+ # encoding: utf-8
3
+ require 'rubygems' if /^1\.8/.match(RUBY_VERSION)
4
+ require "brand2csv/version"
5
+ require 'mechanize'
6
+ require 'prettyprint'
7
+ require 'optparse'
8
+ require 'csv'
9
+ require 'logger'
10
+
11
+ module Brand2csv
12
+
13
+
14
+ class Marke < Struct.new(:name, :markennummer, :inhaber, :land, :hatVertreter, :hinterlegungsdatum, :zeile_1, :zeile_2, :zeile_3, :zeile_4, :zeile_5, :plz, :ort)
15
+ end
16
+
17
+ class Swissreg
18
+
19
+ # Weitere gesehene Fehler
20
+ BekannteFehler =
21
+ ['Das Datum ist ung', # ültig'
22
+ '500 Internal Server Error',
23
+ 'Vereinfachte Trefferliste anzeigen',
24
+ 'Es wurden keine Daten gefunden.',
25
+ 'Die Suchkriterien sind teilweise unzul', # ässig',
26
+ 'Geben Sie mindestens ein Suchkriterium ein',
27
+ 'Die Suche wurde abgebrochen, da die maximale Suchzeit von 60 Sekunden',
28
+ 'Erweiterte Suche',
29
+ ]
30
+ Base_uri = 'https://www.swissreg.ch'
31
+ Start_uri = "#{Base_uri}/srclient/faces/jsp/start.jsp"
32
+ Sr1 = "#{Base_uri}/srclient/faces/jsp/trademark/sr1.jsp"
33
+ Sr2 = "#{Base_uri}/srclient/faces/jsp/trademark/sr2.jsp"
34
+ Sr3 = "#{Base_uri}/srclient/faces/jsp/trademark/sr3.jsp"
35
+ Sr30 = "#{Base_uri}/srclient/faces/jsp/trademark/sr30.jsp"
36
+ Sr300 = "#{Base_uri}/srclient/faces/jsp/trademark/sr300.jsp"
37
+ DetailRegexp = /d_swissreg:mainContent:data:(\d*):tm_no_detail:id_detail/i
38
+ AddressRegexp = /^(\d\d\d\d)\W*(.*)/
39
+ LineSplit = ', '
40
+ DefaultCountry = 'Schweiz'
41
+ # Angezeigte Spalten "id_swissreg:mainContent:id_ckbTMChoice"
42
+ TMChoiceFields = [
43
+ "tm_lbl_tm_text", # Marke
44
+ # "tm_lbl_state"], # Status
45
+ # "tm_lbl_nizza_class"], # Nizza Klassifikation Nr.
46
+ # "tm_lbl_no"], # disabled="disabled"], # Nummer
47
+ "tm_lbl_applicant", # Inhaber/in
48
+ "tm_lbl_country", # Land (Inhaber/in)
49
+ "tm_lbl_agent", # Vertreter/in
50
+ # "tm_lbl_licensee"], # Lizenznehmer/in
51
+ "tm_lbl_app_date", # Hinterlegungsdatum
52
+ ]
53
+ # Alle Felder mit sprechenden Namen
54
+ # ["id_swissreg:mainContent:id_txf_tm_no", nummer],# Marken Nr
55
+ # ["id_swissreg:mainContent:id_txf_app_no", ""], # Gesuch Nr.
56
+ # ["id_swissreg:mainContent:id_txf_tm_text", marke],
57
+ # ["id_swissreg:mainContent:id_txf_applicant", ""], # Inhaber/in
58
+ # ["id_swissreg:mainContent:id_cbxCountry", "_ALL"], # Auswahl Länder _ALL
59
+ # ["id_swissreg:mainContent:id_txf_agent", ""], # Vertreter/in
60
+ # ["id_swissreg:mainContent:id_txf_licensee", ""], # Lizenznehmer
61
+ # ["id_swissreg:mainContent:id_txf_nizza_class", ""], # Nizza Klassifikation Nr.
62
+ # # ["id_swissreg:mainContent:id_txf_appDate", timespan], # Hinterlegungsdatum
63
+ # ["id_swissreg:mainContent:id_txf_appDate", "%s" % timespan] ,
64
+ # ["id_swissreg:mainContent:id_txf_expiryDate", ""], # Ablauf Schutzfrist
65
+ # Markenart: Individualmarke 1 Kollektivmarke 2 Garantiemarke 3
66
+ # ["id_swissreg:mainContent:id_cbxTMTypeGrp", "_ALL"], # Markenart
67
+ # ["id_swissreg:mainContent:id_cbxTMForm", "_ALL"], # Markentyp
68
+ # ["id_swissreg:mainContent:id_cbxTMColorClaim", "_ALL"], # Farbanspruch
69
+ # ["id_swissreg:mainContent:id_txf_pub_date", ""], # Publikationsdatum
70
+
71
+ # info zu Publikationsgrund id_swissreg:mainContent:id_ckbTMPubReason
72
+ # ["id_swissreg:mainContent:id_ckbTMPubReason", "1"], #Neueintragungen
73
+ # ["id_swissreg:mainContent:id_ckbTMPubReason", "2"], #Berichtigungen
74
+ # ["id_swissreg:mainContent:id_ckbTMPubReason", "3"], #Verlängerungen
75
+ # ["id_swissreg:mainContent:id_ckbTMPubReason", "4"], #Löschungen
76
+ # ["id_swissreg:mainContent:id_ckbTMPubReason", "5"], #Inhaberänderungen
77
+ # ["id_swissreg:mainContent:id_ckbTMPubReason", "6"], #Vertreteränderungen
78
+ # ["id_swissreg:mainContent:id_ckbTMPubReason", "7"], #Lizenzänderungen
79
+ # ["id_swissreg:mainContent:id_ckbTMPubReason", "8"], #Weitere Registeränderungen
80
+ # ["id_swissreg:mainContent:id_ckbTMEmptyHits", "0"], # Leere Trefferliste anzeigen
81
+ # ["id_swissreg:mainContent:id_ckbTMState", "1"], # "Hängige Gesuche 1
82
+ # # ["id_swissreg:mainContent:id_ckbTMState", "2"], # "Gelöschte Gesuche 2
83
+ # ["id_swissreg:mainContent:id_ckbTMState", "3"], # aktive Marken 3
84
+ # # ["id_swissreg:mainContent:id_ckbTMState", "4"], # gelöschte Marken 4
85
+
86
+
87
+ MaxZeilen = 5
88
+ HitsPerPage = 250
89
+ LogDir = 'log'
90
+
91
+ attr_accessor :marke, :results, :timespan
92
+
93
+ def initialize(timespan, marke = nil, swiss_only=false)
94
+ @timespan = timespan
95
+ @marke = marke
96
+ @swiss_only = swiss_only
97
+ @number = nil
98
+ @results = []
99
+ @all_trademark_numbers = []
100
+ @errors = Hash.new
101
+ @lastDetail =nil
102
+ @counterDetails = 0
103
+ end
104
+
105
+ def writeResponse(filename)
106
+ if defined?(RSpec) or $VERBOSE
107
+ ausgabe = File.open(filename, 'w+')
108
+ ausgabe.puts @agent.page.body
109
+ ausgabe.close
110
+ else
111
+ puts "Skipping writing #{filename}" if $VERBOSE
112
+ end
113
+ end
114
+
115
+ def checkErrors(body, exitIfFailure = true)
116
+ BekannteFehler.each {
117
+ |errMsg|
118
+ if body.to_s.index(errMsg)
119
+ if exitIfFailure
120
+ puts "Tut mir leid. Suche wurde mit Fehlermeldung <#{errMsg}> abgebrochen."
121
+ exit 2
122
+ else
123
+ puts "Info: Suche meldet <#{errMsg}> "
124
+ end
125
+ end
126
+ }
127
+ end
128
+
129
+ UseClick = false
130
+
131
+ # Initialize a session with swissreg and save the cookie as @state
132
+ def init_swissreg
133
+ begin
134
+ @agent = Mechanize.new { |agent|
135
+ agent.user_agent = 'Mozilla/5.0 (X11; Linux x86_64; rv:16.0) Gecko/20100101 Firefox/16.0'
136
+ agent.verify_mode = OpenSSL::SSL::VERIFY_NONE
137
+ FileUtils.makedirs(LogDir) if $VERBOSE or defined?(RSpec)
138
+ agent.log = Logger.new("#{LogDir}/mechanize.log") if $VERBOSE
139
+ }
140
+ @agent.get_file Start_uri # 'https://www.swissreg.ch/srclient/faces/jsp/start.jsp'
141
+ writeResponse("#{LogDir}/session_expired.html")
142
+ checkErrors(@agent.page.body, false)
143
+ @agent.page.links[3].click
144
+ writeResponse("#{LogDir}/homepage.html")
145
+ @state = @agent.page.form["javax.faces.ViewState"]
146
+ rescue Net::HTTPInternalServerError, Mechanize::ResponseCodeError
147
+ puts "Net::HTTPInternalServerError oder Mechanize::ResponseCodeError gesehen.\n #{Base_uri} hat wahrscheinlich Probleme"
148
+ exit 3
149
+ end
150
+ end
151
+
152
+ def parse_swissreg(timespan = @timespan, # sollte 377 Treffer ergeben, für 01.06.2007-10.06.2007, 559271 wurde in diesem Zeitraum registriert
153
+ marke = @marke,
154
+ nummer =@number) # nummer = "559271" ergibt genau einen treffer
155
+
156
+ init_swissreg
157
+ data = [
158
+ ["autoScroll", "0,0"],
159
+ ["id_swissreg:_link_hidden_", ""],
160
+ ["id_swissreg_SUBMIT", "1"],
161
+ ["id_swissreg:_idcl", "id_swissreg_sub_nav_ipiNavigation_item0"],
162
+ ["javax.faces.ViewState", @state],
163
+ ]
164
+ @agent.page.form['id_swissreg:_idcl'] = 'id_swissreg_sub_nav_ipiNavigation_item0'
165
+ @agent.page.forms.first.submit
166
+ writeResponse("#{LogDir}/trademark_simple.html")
167
+ data = [
168
+ ["autoScroll", "0,0"],
169
+ ["id_swissreg:_link_hidden_", ""],
170
+ ["id_swissreg_SUBMIT", "1"],
171
+ ["id_swissreg:_idcl", "id_swissreg_sub_nav_ipiNavigation_item0_item3"],
172
+ ["javax.faces.ViewState", @state],
173
+ ]
174
+ @agent.page.form['id_swissreg:_idcl'] = 'id_swissreg_sub_nav_ipiNavigation_item0_item3'
175
+ @agent.page.forms.first.submit
176
+ writeResponse("#{LogDir}/trademark_extended.html")
177
+
178
+ data = [
179
+ ["autoScroll", "0,829"],
180
+ ["id_swissreg:_link_hidden_", ""],
181
+ ["id_swissreg:mainContent:id_ckbTMState", "1"], # Hängige Gesuche 1
182
+ ["id_swissreg:mainContent:id_ckbTMState", "3"], # Aktive Marken 3
183
+ ["id_swissreg:mainContent:id_txf_tm_no", ""],# Marken Nr
184
+ ["id_swissreg:mainContent:id_txf_app_no", ""], # Gesuch Nr.
185
+ ["id_swissreg:mainContent:id_txf_tm_text", "#{marke}"],
186
+ ["id_swissreg:mainContent:id_txf_applicant", ""], # Inhaber/in
187
+ ["id_swissreg:mainContent:id_cbxCountry", @swiss_only ? 'CH' : '_ALL'],
188
+ ["id_swissreg:mainContent:id_txf_agent", ""], # Vertreter/in
189
+ ["id_swissreg:mainContent:id_txf_licensee", ""], # Lizenznehmer
190
+ ["id_swissreg:mainContent:id_txf_nizza_class", ""], # Nizza Klassifikation Nr.
191
+ ["id_swissreg:mainContent:id_txf_appDate", "#{timespan}"] ,
192
+ ["id_swissreg:mainContent:id_txf_expiryDate", ""], # Ablauf Schutzfrist
193
+ ["id_swissreg:mainContent:id_cbxTMTypeGrp", "_ALL"], # Markenart
194
+ ["id_swissreg:mainContent:id_cbxTMForm", "_ALL"], # Markentyp
195
+ ["id_swissreg:mainContent:id_cbxTMColorClaim", "_ALL"], # Farbanspruch
196
+ ["id_swissreg:mainContent:id_txf_pub_date", ""], # Publikationsdatum
197
+ ["id_swissreg:mainContent:id_ckbTMPubReason", '1'],
198
+ ["id_swissreg:mainContent:id_ckbTMPubReason", '2'],
199
+ ["id_swissreg:mainContent:id_ckbTMPubReason", '3'],
200
+ ["id_swissreg:mainContent:id_ckbTMPubReason", '4'],
201
+ ["id_swissreg:mainContent:id_ckbTMPubReason", '5'],
202
+ ["id_swissreg:mainContent:id_ckbTMPubReason", '6'],
203
+ ["id_swissreg:mainContent:id_ckbTMPubReason", '7'],
204
+ ["id_swissreg:mainContent:id_ckbTMPubReason", '8'],
205
+ ["id_swissreg:mainContent:id_cbxFormatChoice", "1"],
206
+ ["id_swissreg:mainContent:id_ckbTMChoice", "tm_lbl_tm_text"],
207
+ ["id_swissreg:mainContent:id_ckbTMChoice", "tm_lbl_applicant"],
208
+ ["id_swissreg:mainContent:id_ckbTMChoice", "tm_lbl_country"],
209
+ ["id_swissreg:mainContent:id_ckbTMChoice", "tm_lbl_agent"],
210
+ ["id_swissreg:mainContent:id_ckbTMChoice", "tm_lbl_app_date"],
211
+ ["id_swissreg:mainContent:id_cbxHitsPerPage", HitsPerPage], # Treffer pro Seite
212
+ ["id_swissreg:mainContent:sub_fieldset:id_submit", "suchen"],
213
+ ["id_swissreg_SUBMIT", "1"],
214
+ ["id_swissreg:_idcl", ""],
215
+ ["id_swissreg:_link_hidden_", ""],
216
+ ["javax.faces.ViewState", @state],
217
+ ]
218
+ begin
219
+ @agent.post(Sr3, data)
220
+ rescue Timeout::Error
221
+ puts "Timeout!"
222
+ retry
223
+ end
224
+ writeResponse("#{LogDir}/first_results.html")
225
+ checkErrors(@agent.page.body, false)
226
+ end
227
+
228
+ # the number is only passed to facilitate debugging
229
+ # lines are the address lines
230
+ def Swissreg::parseAddress(number, inhaber)
231
+ ort = nil
232
+ plz = nil
233
+ if inhaber
234
+ lines = CGI.unescapeHTML(inhaber).split(LineSplit)
235
+ # Search for plz/address
236
+ 1.upto(lines.length-1).each {
237
+ |cnt|
238
+ if m = AddressRegexp.match(lines[cnt])
239
+ lines[cnt+1] = nil
240
+ plz = m[1]; ort = m[2]
241
+ cnt.upto(MaxZeilen-1).each{ |cnt2| lines[cnt2] = nil }
242
+ break
243
+ end
244
+ }
245
+ end
246
+ unless plz
247
+ puts "Achtung! Konnte Marke #{number} mit Inhaber #{lines.inspect} nicht parsen" if $VERBOSE
248
+ return nil, nil, nil, nil, nil, nil, nil, nil
249
+ end
250
+ # search for lines with only digits
251
+ found = false
252
+ 1.upto(lines.length-1).each {
253
+ |cnt|
254
+ break if lines[cnt] == nil
255
+ if /^\d*$/.match(lines[cnt])
256
+ found = true
257
+ if lines[cnt+1] == nil
258
+ found = 'before'
259
+ lines[cnt-1] += LineSplit + lines[cnt]
260
+ lines.delete_at(cnt)
261
+ else
262
+ found = 'after'
263
+ lines[cnt] += LineSplit + lines[cnt+1]
264
+ lines.delete_at(cnt+1)
265
+ end
266
+ end
267
+ }
268
+ puts "found #{found}: #{lines.inspect}" if found and $VERBOSE
269
+ return lines[0], lines[1], lines[2], lines[3], lines[4], plz, ort
270
+ end
271
+
272
+ def Swissreg::getInputValuesFromPage(body) # body of HTML page
273
+ contentData = []
274
+ body.search('input').each{ |input|
275
+ # puts "name: #{input.attribute('name')} value #{input.attribute('value')}"
276
+ contentData << [ input.attribute('name').to_s, input.attribute('value').to_s ]
277
+ }
278
+ contentData
279
+ end
280
+
281
+ # return value of an array of POST values
282
+ def Swissreg::inputValue(values, key)
283
+ values.each{ |val|
284
+ return val[1] if key.eql?(val[0])
285
+ }
286
+ return nil
287
+ end
288
+
289
+ # set value for a key of an array of POST values
290
+ def Swissreg::setInputValue(values, key, newValue)
291
+ values.each{ |val|
292
+ if key.eql?(val[0])
293
+ val[1] = newValue
294
+ return
295
+ end
296
+ }
297
+ return
298
+ end
299
+
300
+ def Swissreg::setAllInputValue(form, values)
301
+ values.each{ |newValue|
302
+ # puts "x: 0 #{ newValue[0].to_s} 1 #{newValue[1].to_s}"
303
+ form.field(:name => newValue[0].to_s) { |elem|
304
+ next if elem == nil # puts "Cannot set #{newValue[0].to_s}"
305
+ elem.value = newValue[1].to_s
306
+ }
307
+ }
308
+ end
309
+
310
+ def Swissreg::getMarkenInfoFromDetail(doc)
311
+ marke = nil
312
+ number = 'invalid'
313
+ bezeichnung = nil
314
+ inhaber = nil
315
+ hinterlegungsdatum = nil
316
+ hatVertreter = 'Nein'
317
+ doc.xpath("//html/body/form/div/div/fieldset/div/table/tbody/tr").each{
318
+ |x|
319
+ if x.children.first.text.eql?('Marke')
320
+ if x.children[1].text.index('Markenabbildung')
321
+ # we must fetch the link to the image
322
+ bezeichnung = x.children[1].elements.first.attribute('href').text
323
+ else # we got a trademark
324
+ bezeichnung = x.children[1].text
325
+ end
326
+ end
327
+
328
+ if x.children.first.text.eql?('Inhaber/in')
329
+ inhaber = />(.*)<\/td/.match(x.children[1].to_s)[1].gsub('<br>',LineSplit)
330
+ end
331
+
332
+ if x.children.first.text.eql?('Vertreter/in')
333
+ hatVertreter = 'Ja' if x.children[1].text.length > 0
334
+ end
335
+ hinterlegungsdatum = x.children[1].text if x.children.first.text.eql?('Hinterlegungsdatum')
336
+ number = x.children[1].text if x.children.first.text.eql?('Gesuch Nr.')
337
+ }
338
+ zeile_1, zeile_2, zeile_3, zeile_4, zeile_5, plz, ort = Swissreg::parseAddress(number, inhaber)
339
+ inhaber = inhaber.split(', , ')[0] # Catch cases where Inhaber has several postal addresses
340
+ marke = Marke.new(bezeichnung, number, inhaber, DefaultCountry, hatVertreter, hinterlegungsdatum, zeile_1, zeile_2, zeile_3, zeile_4, zeile_5, plz, ort )
341
+ end
342
+
343
+ def fetchDetails(nummer) # takes a long time!
344
+ @counterDetails += 1
345
+ init_swissreg if @counterDetails % 90 == 0 # it seems that swissreg is artificially slowing down serving request after 100 hits
346
+ filename = "#{LogDir}/detail_#{sprintf('%05d', @counterDetails)}_#{nummer.gsub('/','.')}.html"
347
+ if File.exists?(filename)
348
+ doc = Nokogiri::Slop(File.open(filename))
349
+ else
350
+ url = "#{Sr300}?language=de&section=tm&id=#{nummer}"
351
+ pp "#{Time.now.strftime("%H:%M:%S")}: Opening #{filename}" if $VERBOSE
352
+ $stdout.flush
353
+ nrRetries = 0
354
+ begin
355
+ content = @agent.get_file url
356
+ body = @agent.page.body
357
+ rescue 'getaddrinfo: Name or service not known', Exception => e
358
+ nrRetries += 1
359
+ puts e.backtrace
360
+ if nrRetries <= 3
361
+ puts "get_file did not work reinit session and retry for #{nummer}. nrRetries #{nrRetries}/3. e #{e}"
362
+ sleep 60 # Sleep a minute to let network recover
363
+ init_swissreg
364
+ retry
365
+ else
366
+ puts "get_file did not work reinit session raise Interrupt"
367
+ raise Interrupt
368
+ end
369
+ end
370
+ body.force_encoding('utf-8') unless /^1\.8/.match(RUBY_VERSION)
371
+ doc = Nokogiri::Slop(body)
372
+ writeResponse(filename)
373
+ end
374
+ marke = Swissreg::getMarkenInfoFromDetail(doc)
375
+ @results << marke
376
+ end
377
+
378
+ def Swissreg::emitCsv(results, filename='ausgabe.csv')
379
+ return if results == nil or results.size == 0
380
+ all_inhaber = {}
381
+ results.each do |result|
382
+ next if all_inhaber[result.inhaber]
383
+ all_inhaber[result.inhaber] = result
384
+ end
385
+ if /^1\.8/.match(RUBY_VERSION)
386
+ ausgabe = File.open(filename, 'w+')
387
+ # Write header
388
+ s=''
389
+ results[0].members.each { |member| s += member + ';' }
390
+ ausgabe.puts s.chop
391
+ # write all line
392
+ all_inhaber.values.each{
393
+ |result|
394
+ s = ''
395
+ result.members.each{ |member|
396
+ unless eval("result.#{member}")
397
+ s += ';'
398
+ else
399
+ value = eval("result.#{member.to_s}")
400
+ value = "\"#{value}\"" if value.index(';')
401
+ s += value + ';'
402
+ end
403
+ }
404
+ ausgabe.puts s.chop
405
+ }
406
+ ausgabe.close
407
+ else
408
+ CSV.open(filename, 'w', :headers=>results[0].members,
409
+ :write_headers => true,
410
+ :col_sep => ';'
411
+ ) do |csv| all_inhaber.values.each{ |x| csv << x }
412
+ end
413
+ end
414
+ end
415
+
416
+ def Swissreg::getTrademarkNumbers(doc)
417
+ trademark_numbers = []
418
+ doc.search('a').each{
419
+ |link|
420
+ if DetailRegexp.match(link.attribute('id'))
421
+ trademark_numbers << link.children.first.children.first.content
422
+ end
423
+ }
424
+ trademark_numbers
425
+ end
426
+
427
+ class Swissreg::Vereinfachte
428
+ attr_reader :links2details, :trademark_search_id, :inputData, :firstHit, :nrHits, :nrSubPages, :pageNr
429
+ HitRegexpDE = /Seite (\d*) von ([\d']*) - Treffer ([\d']*)-([\d']*) von ([\d']*)/
430
+ Vivian = 'id_swissreg:mainContent:vivian'
431
+
432
+ # Parse a HTML page from swissreg sr3.jsp
433
+ # There we find info like "Seite 1 von 26 - Treffer 1-250 von 6'349" and upto 250 links to details
434
+ def initialize(doc)
435
+ @inputData = []
436
+ @pageNr = @nrSubPages = @firstHit = @nrHits = 0
437
+ m = HitRegexpDE.match(doc.text)
438
+ if m
439
+ begin
440
+ c = m.to_a.map{|n| n.gsub(/'/, "").to_i }
441
+ @pageNr = c[1]
442
+ @nrSubPages = c[2]
443
+ @firstHit = c[3]
444
+ @nrHits = c[5]
445
+ rescue NoMethodError
446
+ end
447
+ end
448
+ @trademark_search_id = Swissreg::inputValue(Swissreg::getInputValuesFromPage(doc), Vivian)
449
+ @links2details = []
450
+ doc.search('input').each{ |input|
451
+ # puts "name: #{input.attribute('name')} value #{input.attribute('value')}" if $VERBOSE
452
+ @inputData << [ input.attribute('name').to_s, input.attribute('value').to_s ]
453
+ }
454
+
455
+ @state = Swissreg::inputValue(Swissreg::getInputValuesFromPage(doc), 'javax.faces.ViewState')
456
+ doc.search('a').each{
457
+ |link|
458
+ if m = DetailRegexp.match(link.attribute('id'))
459
+ # puts "XXX #{link.attribute('onclick').to_s} href: #{link.attribute('href').to_s} value #{link.attribute('value').to_s}" if $VERBOSE
460
+ m = /'tmMainId','(\d*)'/.match(link.attribute('onclick').to_s)
461
+ tmMainId = m[1].to_i
462
+ @links2details << tmMainId
463
+ end
464
+ }
465
+ end
466
+
467
+ def getPostDataForDetail(position, id)
468
+ [
469
+ [ "autoScroll", "0,0"],
470
+ [ "id_swissreg:mainContent:sub_options_result:sub_fieldset:cbxHitsPerPage", "#{HitsPerPage}"],
471
+ [ "id_swissreg:mainContent:vivian", @trademark_search_id],
472
+ [ "id_swissreg_SUBMIT", "1"],
473
+ [ "id_swissreg:_idcl", "id_swissreg:mainContent:data:#{position}:tm_no_detail:id_detail", ""],
474
+ [ "id_swissreg:mainContent:scroll_1", ""],
475
+ [ "tmMainId", "#{id}"],
476
+ [ "id_swissreg:_link_hidden_ "],
477
+ [ "javax.faces.ViewState", @state]
478
+ ]
479
+ end
480
+
481
+ def getPostDataForSubpage(pageNr)
482
+ [
483
+ [ "autoScroll", "0,0"],
484
+ [ "id_swissreg:mainContent:sub_options_result:sub_fieldset:cbxHitsPerPage", "#{HitsPerPage}"],
485
+ [ "id_swissreg:mainContent:vivian", @trademark_search_id],
486
+ [ "id_swissreg_SUBMIT", "1"],
487
+ [ "id_swissreg:_idcl", "id_swissreg:mainContent:scroll_1idx#{pageNr}"],
488
+ [ "id_swissreg:mainContent:scroll_1", "idx#{pageNr}"],
489
+ [ "tmMainId", ""],
490
+ [ "id_swissreg:_link_hidden_ "],
491
+ [ "javax.faces.ViewState", @state]
492
+ ]
493
+ end
494
+
495
+ end
496
+
497
+ def getAllHits(filename = nil, pageNr = 1)
498
+ if filename && File.exists?(filename)
499
+ doc = Nokogiri::Slop(File.open(filename))
500
+ else
501
+ form = @agent.page.form
502
+ btn = form.buttons.last
503
+ if btn && btn.name == "id_swissreg:mainContent:id_show_simple_view_hitlist"
504
+ res = @agent.submit(form, btn)
505
+ body = res.body
506
+ else
507
+ body = @agent.page.body
508
+ end
509
+ body.force_encoding('utf-8') unless /^1\.8/.match(RUBY_VERSION)
510
+ doc = Nokogiri::Slop(body)
511
+ filename = "#{LogDir}/vereinfachte_#{pageNr}.html"
512
+ writeResponse(filename)
513
+ end
514
+ einfach = Swissreg::Vereinfachte.new(doc)
515
+ puts "#{Time.now.strftime("%H:%M:%S")} status: getAllHits for #{pageNr} of #{einfach.nrSubPages} pages" if $VERBOSE
516
+ subPage2Fetch = pageNr + 1
517
+ data2 = einfach.getPostDataForSubpage(subPage2Fetch).clone
518
+ if (HitsPerPage < einfach.nrHits - einfach.firstHit)
519
+ itemsToFetch = HitsPerPage
520
+ else
521
+ itemsToFetch = einfach.nrHits - einfach.firstHit
522
+ end
523
+ @all_trademark_numbers += Swissreg::getTrademarkNumbers(doc)
524
+
525
+ filename = "#{LogDir}/vereinfachte_#{pageNr}_back.html"
526
+ writeResponse(filename)
527
+ if pageNr < (einfach.nrSubPages)
528
+ Swissreg::setAllInputValue(@agent.page.forms.first, data2)
529
+ @agent.page.forms.first.submit
530
+ getAllHits(nil, subPage2Fetch)
531
+ end
532
+ @all_trademark_numbers
533
+ end
534
+
535
+ def fetchresult(filename = "#{LogDir}/fetch_1.html", counter = 1)
536
+ if filename && File.exists?(filename)
537
+ doc = Nokogiri::Slop(File.open(filename))
538
+ else
539
+ body = @agent.page.body
540
+ body.force_encoding('utf-8') unless /^1\.8/.match(RUBY_VERSION)
541
+ doc = Nokogiri::Slop(body)
542
+ writeResponse(filename)
543
+ end
544
+
545
+ if /Vereinfachte Trefferliste anzeigen/i.match(doc.text)
546
+ form = @agent.page.forms.first
547
+ button = form.button_with(:value => /Vereinfachte/i)
548
+ # submit the form using that button
549
+ @agent.submit(form, button)
550
+ filename = "#{LogDir}/vereinfacht.html"
551
+ writeResponse(filename)
552
+ end
553
+ getAllHits(doc, counter)
554
+ puts"getAllHits: returned #{@all_trademark_numbers ? @all_trademark_numbers.size : 0} hits "
555
+ if @all_trademark_numbers
556
+ @all_trademark_numbers.each{
557
+ |nr|
558
+ nrRetries = 0
559
+ begin
560
+ fetchDetails(nr)
561
+ rescue SocketError, Exception => e
562
+ nrRetries += 1
563
+ puts e.backtrace
564
+ if nrRetries <= 3
565
+ puts "fetchDetails did not work reinit session and retry for #{nr}. nrRetries #{nrRetries}/3. e #{e}"
566
+ sleep 60 # Sleep a minute to let network recover
567
+ init_swissreg
568
+ retry
569
+ else
570
+ puts "fetchDetails did not work reinit session raise Interrupt"
571
+ raise Interrupt
572
+ end
573
+ end
574
+
575
+ }
576
+ else
577
+ puts "Could not find any trademarks in #{filename}"
578
+ end
579
+ end
580
+ end # class Swissreg
581
+
582
+ def Brand2csv::run(timespan, marke = 'a*', swiss_only = false)
583
+ session = Swissreg.new(timespan, marke, swiss_only)
584
+ begin
585
+ session.parse_swissreg
586
+ session.fetchresult
587
+ rescue Interrupt, Net::HTTP::Persistent::Error
588
+ puts "Unterbrochen. Vesuche #{session.results.size} Resultate zu speichern"
589
+ end
590
+ Swissreg::emitCsv(session.results, "#{timespan}.csv")
591
+ session.results
592
+ end
593
+
594
+ end # module Brand2csv