brand2csv 0.3.1 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +14 -0
  3. data/.rspec +1 -0
  4. data/.travis.yml +26 -0
  5. data/Gemfile +4 -0
  6. data/History.txt +121 -0
  7. data/LICENCE.txt +515 -0
  8. data/Manifest.txt +54 -0
  9. data/README.md +27 -0
  10. data/Rakefile +18 -0
  11. data/bin/brand2csv +100 -0
  12. data/brand2csv.gemspec +44 -0
  13. data/lib/brand2csv.rb +594 -0
  14. data/lib/brand2csv/version.rb +3 -0
  15. data/logs/aspen_08_08_1986.html +598 -0
  16. data/logs/post.rohdaten.httpfox +1 -0
  17. data/logs/post.rohdaten.mechanize +1 -0
  18. data/logs/protocol_swissreg.log +86 -0
  19. data/logs/result_01.10.2005.jsp +598 -0
  20. data/logs/sr1.jsp +449 -0
  21. data/logs/sr3.jsp +598 -0
  22. data/logs/start.jsp +350 -0
  23. data/logs/start2.jsp +434 -0
  24. data/protocol.2013.05.12.textile +56 -0
  25. data/protocol.2013.05.15.textile +49 -0
  26. data/protocol.2013.05.21.textile +84 -0
  27. data/spec/brand2csv_spec.rb +62 -0
  28. data/spec/csv_spec.rb +57 -0
  29. data/spec/data/aspectra/detail_00001_P-480296.html +531 -0
  30. data/spec/data/aspectra/detail_00002_P-482236.html +531 -0
  31. data/spec/data/aspectra/detail_00003_641074.html +539 -0
  32. data/spec/data/aspectra/first_results.html +600 -0
  33. data/spec/data/einfache_suche.html +434 -0
  34. data/spec/data/erweiterte_suche.html +446 -0
  35. data/spec/data/main.html +350 -0
  36. data/spec/data/result_short.html +606 -0
  37. data/spec/data/resultate_1.html +446 -0
  38. data/spec/data/resultate_2.html +446 -0
  39. data/spec/data/urner_wildheu/detail_00001_57862.2013.html +516 -0
  40. data/spec/data/urner_wildheu/first_results.html +598 -0
  41. data/spec/data/vereinfachte_1.html +847 -0
  42. data/spec/data/vereinfachte_detail_33.html +516 -0
  43. data/spec/detail_spec.rb +28 -0
  44. data/spec/short_spec.rb +55 -0
  45. data/spec/simple_search.rb +43 -0
  46. data/spec/spec_helper.rb +34 -0
  47. data/spec/support/core_ext/kernel.rb +26 -0
  48. data/spec/support/server_mock_helper.rb +143 -0
  49. data/spec/swissreg_spec.rb +45 -0
  50. data/spec/trademark_numbers_spec.rb +21 -0
  51. data/spec/utilities_spec.rb +83 -0
  52. data/spike.rb +491 -0
  53. data/spike_mechanize_swissreg.rb +312 -0
  54. data/spike_watir.rb +58 -0
  55. data/swissreg.rb +75 -0
  56. metadata +86 -7
@@ -0,0 +1,54 @@
1
+ .gitignore
2
+ .rspec
3
+ .travis.yml
4
+ Gemfile
5
+ Gemfile.lock
6
+ History.txt
7
+ LICENCE.txt
8
+ Manifest.txt
9
+ README.md
10
+ Rakefile
11
+ bin/brand2csv
12
+ lib/brand2csv.rb
13
+ lib/brand2csv/version.rb
14
+ logs/aspen_08_08_1986.html
15
+ logs/post.rohdaten.httpfox
16
+ logs/post.rohdaten.mechanize
17
+ logs/protocol_swissreg.log
18
+ logs/result_01.10.2005.jsp
19
+ logs/sr1.jsp
20
+ logs/sr3.jsp
21
+ logs/start.jsp
22
+ logs/start2.jsp
23
+ protocol.2013.05.12.textile
24
+ protocol.2013.05.15.textile
25
+ protocol.2013.05.21.textile
26
+ spec/brand2csv_spec.rb
27
+ spec/csv_spec.rb
28
+ spec/data/aspectra/detail_00001_P-480296.html
29
+ spec/data/aspectra/detail_00002_P-482236.html
30
+ spec/data/aspectra/detail_00003_641074.html
31
+ spec/data/aspectra/first_results.html
32
+ spec/data/einfache_suche.html
33
+ spec/data/erweiterte_suche.html
34
+ spec/data/main.html
35
+ spec/data/result_short.html
36
+ spec/data/resultate_1.html
37
+ spec/data/resultate_2.html
38
+ spec/data/urner_wildheu/detail_00001_57862.2013.html
39
+ spec/data/urner_wildheu/first_results.html
40
+ spec/data/vereinfachte_1.html
41
+ spec/data/vereinfachte_detail_33.html
42
+ spec/detail_spec.rb
43
+ spec/short_spec.rb
44
+ spec/simple_search.rb
45
+ spec/spec_helper.rb
46
+ spec/support/core_ext/kernel.rb
47
+ spec/support/server_mock_helper.rb
48
+ spec/swissreg_spec.rb
49
+ spec/trademark_numbers_spec.rb
50
+ spec/utilities_spec.rb
51
+ spike.rb
52
+ spike_mechanize_swissreg.rb
53
+ spike_watir.rb
54
+ swissreg.rb
@@ -0,0 +1,27 @@
1
+ # brand2csv
2
+
3
+ [![Build Status](https://secure.travis-ci.org/zdavatz/brand2csv.png)](http://travis-ci.org/zdavatz/brand2csv)
4
+
5
+ brand2csv using swissreg.ch to get addresses.
6
+
7
+ ## Usage
8
+ ```
9
+ brand2csv 01.01.2013 "b*"
10
+ brand2csv 1.10.2005-31.10.2005
11
+ ```
12
+ ## Help
13
+ ```
14
+ ~> brand2csv --help
15
+ /usr/local/bin/brand2csv ver.0.1.9
16
+ Usage:
17
+ brand2csv timespan
18
+ Find all brands registered in switzerland during the given timespan.
19
+ The following examples valid timespan periods:
20
+ brand2csv 01.01.2013 "b*" #will search for all brand starting with "b"
21
+ brand2csv 1.10.2005-31.10.2005 #this will work as well from version 0.1.9
22
+ The results are stored in the file <date_selected>.csv.
23
+ The trademark name is either a real brand name or a link to an image.
24
+ ```
25
+ ## Travis
26
+ You can find Travis builds here:
27
+ * https://travis-ci.org/zdavatz/brand2csv
@@ -0,0 +1,18 @@
1
+ #!/usr/bin/env ruby
2
+ # encoding: utf-8
3
+
4
+ require "bundler/gem_tasks"
5
+ require "rspec/core/rake_task"
6
+ require 'rake/testtask'
7
+
8
+ RSpec::Core::RakeTask.new(:spec)
9
+
10
+ desc 'Offer a gem task like hoe'
11
+ task :gem => :build do
12
+ Rake::Task[:build].invoke
13
+ end
14
+
15
+ task :spec => :clean
16
+
17
+ require 'rake/clean'
18
+ CLEAN.include FileList['pkg/*.gem']
@@ -0,0 +1,100 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'pathname'
4
+ root = Pathname.new(__FILE__).realpath.parent.parent
5
+ $:.unshift root.join('lib') if $0 == __FILE__
6
+
7
+ require 'optparse'
8
+ require "date"
9
+ require 'brand2csv'
10
+
11
+ def help
12
+ <<EOS
13
+ #$0 ver.#{Brand2csv::VERSION}
14
+ Usage:
15
+ #{File.basename(__FILE__)} timespan
16
+ Find all brands registered in switzerland during the given timespan.
17
+ The following examples valid timespan periods:
18
+ brand2csv 01.01.2013 "b*" #will search for all brand starting with "b"
19
+ brand2csv 1.10.2005-31.10.2005 #this will work as well from version 0.1.9
20
+ The results are stored in the file <date_selected>.csv.
21
+ The trademark name is either a real brand name or a link to an image.
22
+ --swiss_only Fetch only trademarks from swiss owner
23
+ EOS
24
+ end
25
+
26
+ def validates_timespan(arg)
27
+ valid = true
28
+ timespan = ""
29
+ dates = arg.gsub(/[^\d\.-]/, '').split("-")
30
+ catch (:error) do
31
+ dates.each_with_index do |d, i|
32
+ sep = (dates.length > 1 && i != 0) ? "-" : ""
33
+ begin
34
+ Date.parse(d)
35
+ timespan << sep + d
36
+ rescue ArgumentError
37
+ valid = false
38
+ elms = d.split(".")
39
+ prms = [elms[2], elms[1], -1].map(&:to_i)
40
+ begin
41
+ cand = Date.new(*prms).strftime("%d.%m.%Y")
42
+ if elms[0] == (elms - cand.to_s.split(".")).first
43
+ timespan << sep + cand.to_s
44
+ else
45
+ raise
46
+ end
47
+ rescue ArgumentError
48
+ timespan = "" # unknown
49
+ throw :error
50
+ end
51
+ end
52
+ end
53
+ end
54
+ message = nil
55
+ unless valid
56
+ if timespan.empty?
57
+ message = "Timespan is invalid"
58
+ else
59
+ message = "Did you mean #{timespan} ?"
60
+ end
61
+ end
62
+ [valid, message]
63
+ end
64
+
65
+ parser = OptionParser.new
66
+ opts = {}
67
+ parser.on('--swiss_only') {|v| opts[:swiss_only] = true }
68
+ parser.on_tail('-h', '--help') { puts help; exit }
69
+
70
+ args = ARGV.dup
71
+ begin
72
+ parser.parse!(args)
73
+ rescue OptionParser::MissingArgument,
74
+ OptionParser::InvalidArgument,
75
+ OptionParser::InvalidOption
76
+ puts help
77
+ exit 1
78
+ end
79
+
80
+ unless args.size >= 1
81
+ puts help
82
+ exit 1
83
+ end
84
+
85
+ unless args.empty?
86
+ valid,message = validates_timespan(args[0])
87
+ unless valid
88
+ puts message
89
+ exit 1
90
+ end
91
+ end
92
+
93
+ begin
94
+ Brand2csv::run(args[0], args[1], opts[:swiss_only])
95
+ rescue Interrupt
96
+ puts "Unterbrochen. Breche mit Fehler ab"
97
+ exit 1
98
+ end
99
+
100
+ puts "#{__FILE__} completed successfully" if $VERBOSE
@@ -0,0 +1,44 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'brand2csv/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "brand2csv"
8
+ spec.version = Brand2csv::VERSION
9
+ spec.summary = 'brand2csv creates csv files for swiss brands'
10
+ spec.description = "brand2csv creates csv files for swiss brand registered in a specific time period.
11
+ The csv contains the brand, link to image (if present), link to the detailinfo at swissreg.ch, name and address of owner (Inhaber)"
12
+ spec.author = 'Niklaus Giger, Yasuhiro Asaka, Zeno R.R. Davatz'
13
+ spec.email = 'yasaka@ywesee.com, zdavatz@ywesee.com, ngiger@ywesee.com'
14
+ spec.platform = Gem::Platform::RUBY
15
+ spec.license = 'GPLv3'
16
+ spec.homepage = 'https://github.com/zdavatz/brand2csv'
17
+ spec.files = `git ls-files -z`.split("\x0")
18
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
19
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
20
+ spec.require_paths = ["lib"]
21
+
22
+ # gem.add_runtime_dependency
23
+ spec.add_runtime_dependency 'mechanize', '>= 2.6'
24
+ spec.add_runtime_dependency'json'
25
+ spec.add_runtime_dependency'nokogiri'
26
+
27
+ # gem.add_development_dependency
28
+ spec.add_development_dependency 'watir'
29
+ spec.add_development_dependency 'watir-webdriver'
30
+ spec.add_development_dependency 'webmock'
31
+ spec.add_development_dependency 'rake'
32
+ spec.add_development_dependency 'rdoc'
33
+ spec.add_development_dependency 'rspec'
34
+
35
+
36
+
37
+ if RUBY_VERSION.match(/^1/)
38
+ spec.add_development_dependency 'pry-debugger'
39
+ else
40
+ spec.add_development_dependency 'pry-byebug'
41
+ spec.add_development_dependency 'pry-doc'
42
+ end
43
+ end
44
+
@@ -0,0 +1,594 @@
1
+ #!/usr/bin/env ruby
2
+ # encoding: utf-8
3
+ require 'rubygems' if /^1\.8/.match(RUBY_VERSION)
4
+ require "brand2csv/version"
5
+ require 'mechanize'
6
+ require 'prettyprint'
7
+ require 'optparse'
8
+ require 'csv'
9
+ require 'logger'
10
+
11
+ module Brand2csv
12
+
13
+
14
+ class Marke < Struct.new(:name, :markennummer, :inhaber, :land, :hatVertreter, :hinterlegungsdatum, :zeile_1, :zeile_2, :zeile_3, :zeile_4, :zeile_5, :plz, :ort)
15
+ end
16
+
17
+ class Swissreg
18
+
19
+ # Weitere gesehene Fehler
20
+ BekannteFehler =
21
+ ['Das Datum ist ung', # ültig'
22
+ '500 Internal Server Error',
23
+ 'Vereinfachte Trefferliste anzeigen',
24
+ 'Es wurden keine Daten gefunden.',
25
+ 'Die Suchkriterien sind teilweise unzul', # ässig',
26
+ 'Geben Sie mindestens ein Suchkriterium ein',
27
+ 'Die Suche wurde abgebrochen, da die maximale Suchzeit von 60 Sekunden',
28
+ 'Erweiterte Suche',
29
+ ]
30
+ Base_uri = 'https://www.swissreg.ch'
31
+ Start_uri = "#{Base_uri}/srclient/faces/jsp/start.jsp"
32
+ Sr1 = "#{Base_uri}/srclient/faces/jsp/trademark/sr1.jsp"
33
+ Sr2 = "#{Base_uri}/srclient/faces/jsp/trademark/sr2.jsp"
34
+ Sr3 = "#{Base_uri}/srclient/faces/jsp/trademark/sr3.jsp"
35
+ Sr30 = "#{Base_uri}/srclient/faces/jsp/trademark/sr30.jsp"
36
+ Sr300 = "#{Base_uri}/srclient/faces/jsp/trademark/sr300.jsp"
37
+ DetailRegexp = /d_swissreg:mainContent:data:(\d*):tm_no_detail:id_detail/i
38
+ AddressRegexp = /^(\d\d\d\d)\W*(.*)/
39
+ LineSplit = ', '
40
+ DefaultCountry = 'Schweiz'
41
+ # Angezeigte Spalten "id_swissreg:mainContent:id_ckbTMChoice"
42
+ TMChoiceFields = [
43
+ "tm_lbl_tm_text", # Marke
44
+ # "tm_lbl_state"], # Status
45
+ # "tm_lbl_nizza_class"], # Nizza Klassifikation Nr.
46
+ # "tm_lbl_no"], # disabled="disabled"], # Nummer
47
+ "tm_lbl_applicant", # Inhaber/in
48
+ "tm_lbl_country", # Land (Inhaber/in)
49
+ "tm_lbl_agent", # Vertreter/in
50
+ # "tm_lbl_licensee"], # Lizenznehmer/in
51
+ "tm_lbl_app_date", # Hinterlegungsdatum
52
+ ]
53
+ # Alle Felder mit sprechenden Namen
54
+ # ["id_swissreg:mainContent:id_txf_tm_no", nummer],# Marken Nr
55
+ # ["id_swissreg:mainContent:id_txf_app_no", ""], # Gesuch Nr.
56
+ # ["id_swissreg:mainContent:id_txf_tm_text", marke],
57
+ # ["id_swissreg:mainContent:id_txf_applicant", ""], # Inhaber/in
58
+ # ["id_swissreg:mainContent:id_cbxCountry", "_ALL"], # Auswahl Länder _ALL
59
+ # ["id_swissreg:mainContent:id_txf_agent", ""], # Vertreter/in
60
+ # ["id_swissreg:mainContent:id_txf_licensee", ""], # Lizenznehmer
61
+ # ["id_swissreg:mainContent:id_txf_nizza_class", ""], # Nizza Klassifikation Nr.
62
+ # # ["id_swissreg:mainContent:id_txf_appDate", timespan], # Hinterlegungsdatum
63
+ # ["id_swissreg:mainContent:id_txf_appDate", "%s" % timespan] ,
64
+ # ["id_swissreg:mainContent:id_txf_expiryDate", ""], # Ablauf Schutzfrist
65
+ # Markenart: Individualmarke 1 Kollektivmarke 2 Garantiemarke 3
66
+ # ["id_swissreg:mainContent:id_cbxTMTypeGrp", "_ALL"], # Markenart
67
+ # ["id_swissreg:mainContent:id_cbxTMForm", "_ALL"], # Markentyp
68
+ # ["id_swissreg:mainContent:id_cbxTMColorClaim", "_ALL"], # Farbanspruch
69
+ # ["id_swissreg:mainContent:id_txf_pub_date", ""], # Publikationsdatum
70
+
71
+ # info zu Publikationsgrund id_swissreg:mainContent:id_ckbTMPubReason
72
+ # ["id_swissreg:mainContent:id_ckbTMPubReason", "1"], #Neueintragungen
73
+ # ["id_swissreg:mainContent:id_ckbTMPubReason", "2"], #Berichtigungen
74
+ # ["id_swissreg:mainContent:id_ckbTMPubReason", "3"], #Verlängerungen
75
+ # ["id_swissreg:mainContent:id_ckbTMPubReason", "4"], #Löschungen
76
+ # ["id_swissreg:mainContent:id_ckbTMPubReason", "5"], #Inhaberänderungen
77
+ # ["id_swissreg:mainContent:id_ckbTMPubReason", "6"], #Vertreteränderungen
78
+ # ["id_swissreg:mainContent:id_ckbTMPubReason", "7"], #Lizenzänderungen
79
+ # ["id_swissreg:mainContent:id_ckbTMPubReason", "8"], #Weitere Registeränderungen
80
+ # ["id_swissreg:mainContent:id_ckbTMEmptyHits", "0"], # Leere Trefferliste anzeigen
81
+ # ["id_swissreg:mainContent:id_ckbTMState", "1"], # "Hängige Gesuche 1
82
+ # # ["id_swissreg:mainContent:id_ckbTMState", "2"], # "Gelöschte Gesuche 2
83
+ # ["id_swissreg:mainContent:id_ckbTMState", "3"], # aktive Marken 3
84
+ # # ["id_swissreg:mainContent:id_ckbTMState", "4"], # gelöschte Marken 4
85
+
86
+
87
+ MaxZeilen = 5
88
+ HitsPerPage = 250
89
+ LogDir = 'log'
90
+
91
+ attr_accessor :marke, :results, :timespan
92
+
93
+ def initialize(timespan, marke = nil, swiss_only=false)
94
+ @timespan = timespan
95
+ @marke = marke
96
+ @swiss_only = swiss_only
97
+ @number = nil
98
+ @results = []
99
+ @all_trademark_numbers = []
100
+ @errors = Hash.new
101
+ @lastDetail =nil
102
+ @counterDetails = 0
103
+ end
104
+
105
+ def writeResponse(filename)
106
+ if defined?(RSpec) or $VERBOSE
107
+ ausgabe = File.open(filename, 'w+')
108
+ ausgabe.puts @agent.page.body
109
+ ausgabe.close
110
+ else
111
+ puts "Skipping writing #{filename}" if $VERBOSE
112
+ end
113
+ end
114
+
115
+ def checkErrors(body, exitIfFailure = true)
116
+ BekannteFehler.each {
117
+ |errMsg|
118
+ if body.to_s.index(errMsg)
119
+ if exitIfFailure
120
+ puts "Tut mir leid. Suche wurde mit Fehlermeldung <#{errMsg}> abgebrochen."
121
+ exit 2
122
+ else
123
+ puts "Info: Suche meldet <#{errMsg}> "
124
+ end
125
+ end
126
+ }
127
+ end
128
+
129
+ UseClick = false
130
+
131
+ # Initialize a session with swissreg and save the cookie as @state
132
+ def init_swissreg
133
+ begin
134
+ @agent = Mechanize.new { |agent|
135
+ agent.user_agent = 'Mozilla/5.0 (X11; Linux x86_64; rv:16.0) Gecko/20100101 Firefox/16.0'
136
+ agent.verify_mode = OpenSSL::SSL::VERIFY_NONE
137
+ FileUtils.makedirs(LogDir) if $VERBOSE or defined?(RSpec)
138
+ agent.log = Logger.new("#{LogDir}/mechanize.log") if $VERBOSE
139
+ }
140
+ @agent.get_file Start_uri # 'https://www.swissreg.ch/srclient/faces/jsp/start.jsp'
141
+ writeResponse("#{LogDir}/session_expired.html")
142
+ checkErrors(@agent.page.body, false)
143
+ @agent.page.links[3].click
144
+ writeResponse("#{LogDir}/homepage.html")
145
+ @state = @agent.page.form["javax.faces.ViewState"]
146
+ rescue Net::HTTPInternalServerError, Mechanize::ResponseCodeError
147
+ puts "Net::HTTPInternalServerError oder Mechanize::ResponseCodeError gesehen.\n #{Base_uri} hat wahrscheinlich Probleme"
148
+ exit 3
149
+ end
150
+ end
151
+
152
+ def parse_swissreg(timespan = @timespan, # sollte 377 Treffer ergeben, für 01.06.2007-10.06.2007, 559271 wurde in diesem Zeitraum registriert
153
+ marke = @marke,
154
+ nummer =@number) # nummer = "559271" ergibt genau einen treffer
155
+
156
+ init_swissreg
157
+ data = [
158
+ ["autoScroll", "0,0"],
159
+ ["id_swissreg:_link_hidden_", ""],
160
+ ["id_swissreg_SUBMIT", "1"],
161
+ ["id_swissreg:_idcl", "id_swissreg_sub_nav_ipiNavigation_item0"],
162
+ ["javax.faces.ViewState", @state],
163
+ ]
164
+ @agent.page.form['id_swissreg:_idcl'] = 'id_swissreg_sub_nav_ipiNavigation_item0'
165
+ @agent.page.forms.first.submit
166
+ writeResponse("#{LogDir}/trademark_simple.html")
167
+ data = [
168
+ ["autoScroll", "0,0"],
169
+ ["id_swissreg:_link_hidden_", ""],
170
+ ["id_swissreg_SUBMIT", "1"],
171
+ ["id_swissreg:_idcl", "id_swissreg_sub_nav_ipiNavigation_item0_item3"],
172
+ ["javax.faces.ViewState", @state],
173
+ ]
174
+ @agent.page.form['id_swissreg:_idcl'] = 'id_swissreg_sub_nav_ipiNavigation_item0_item3'
175
+ @agent.page.forms.first.submit
176
+ writeResponse("#{LogDir}/trademark_extended.html")
177
+
178
+ data = [
179
+ ["autoScroll", "0,829"],
180
+ ["id_swissreg:_link_hidden_", ""],
181
+ ["id_swissreg:mainContent:id_ckbTMState", "1"], # Hängige Gesuche 1
182
+ ["id_swissreg:mainContent:id_ckbTMState", "3"], # Aktive Marken 3
183
+ ["id_swissreg:mainContent:id_txf_tm_no", ""],# Marken Nr
184
+ ["id_swissreg:mainContent:id_txf_app_no", ""], # Gesuch Nr.
185
+ ["id_swissreg:mainContent:id_txf_tm_text", "#{marke}"],
186
+ ["id_swissreg:mainContent:id_txf_applicant", ""], # Inhaber/in
187
+ ["id_swissreg:mainContent:id_cbxCountry", @swiss_only ? 'CH' : '_ALL'],
188
+ ["id_swissreg:mainContent:id_txf_agent", ""], # Vertreter/in
189
+ ["id_swissreg:mainContent:id_txf_licensee", ""], # Lizenznehmer
190
+ ["id_swissreg:mainContent:id_txf_nizza_class", ""], # Nizza Klassifikation Nr.
191
+ ["id_swissreg:mainContent:id_txf_appDate", "#{timespan}"] ,
192
+ ["id_swissreg:mainContent:id_txf_expiryDate", ""], # Ablauf Schutzfrist
193
+ ["id_swissreg:mainContent:id_cbxTMTypeGrp", "_ALL"], # Markenart
194
+ ["id_swissreg:mainContent:id_cbxTMForm", "_ALL"], # Markentyp
195
+ ["id_swissreg:mainContent:id_cbxTMColorClaim", "_ALL"], # Farbanspruch
196
+ ["id_swissreg:mainContent:id_txf_pub_date", ""], # Publikationsdatum
197
+ ["id_swissreg:mainContent:id_ckbTMPubReason", '1'],
198
+ ["id_swissreg:mainContent:id_ckbTMPubReason", '2'],
199
+ ["id_swissreg:mainContent:id_ckbTMPubReason", '3'],
200
+ ["id_swissreg:mainContent:id_ckbTMPubReason", '4'],
201
+ ["id_swissreg:mainContent:id_ckbTMPubReason", '5'],
202
+ ["id_swissreg:mainContent:id_ckbTMPubReason", '6'],
203
+ ["id_swissreg:mainContent:id_ckbTMPubReason", '7'],
204
+ ["id_swissreg:mainContent:id_ckbTMPubReason", '8'],
205
+ ["id_swissreg:mainContent:id_cbxFormatChoice", "1"],
206
+ ["id_swissreg:mainContent:id_ckbTMChoice", "tm_lbl_tm_text"],
207
+ ["id_swissreg:mainContent:id_ckbTMChoice", "tm_lbl_applicant"],
208
+ ["id_swissreg:mainContent:id_ckbTMChoice", "tm_lbl_country"],
209
+ ["id_swissreg:mainContent:id_ckbTMChoice", "tm_lbl_agent"],
210
+ ["id_swissreg:mainContent:id_ckbTMChoice", "tm_lbl_app_date"],
211
+ ["id_swissreg:mainContent:id_cbxHitsPerPage", HitsPerPage], # Treffer pro Seite
212
+ ["id_swissreg:mainContent:sub_fieldset:id_submit", "suchen"],
213
+ ["id_swissreg_SUBMIT", "1"],
214
+ ["id_swissreg:_idcl", ""],
215
+ ["id_swissreg:_link_hidden_", ""],
216
+ ["javax.faces.ViewState", @state],
217
+ ]
218
+ begin
219
+ @agent.post(Sr3, data)
220
+ rescue Timeout::Error
221
+ puts "Timeout!"
222
+ retry
223
+ end
224
+ writeResponse("#{LogDir}/first_results.html")
225
+ checkErrors(@agent.page.body, false)
226
+ end
227
+
228
+ # the number is only passed to facilitate debugging
229
+ # lines are the address lines
230
+ def Swissreg::parseAddress(number, inhaber)
231
+ ort = nil
232
+ plz = nil
233
+ if inhaber
234
+ lines = CGI.unescapeHTML(inhaber).split(LineSplit)
235
+ # Search for plz/address
236
+ 1.upto(lines.length-1).each {
237
+ |cnt|
238
+ if m = AddressRegexp.match(lines[cnt])
239
+ lines[cnt+1] = nil
240
+ plz = m[1]; ort = m[2]
241
+ cnt.upto(MaxZeilen-1).each{ |cnt2| lines[cnt2] = nil }
242
+ break
243
+ end
244
+ }
245
+ end
246
+ unless plz
247
+ puts "Achtung! Konnte Marke #{number} mit Inhaber #{lines.inspect} nicht parsen" if $VERBOSE
248
+ return nil, nil, nil, nil, nil, nil, nil, nil
249
+ end
250
+ # search for lines with only digits
251
+ found = false
252
+ 1.upto(lines.length-1).each {
253
+ |cnt|
254
+ break if lines[cnt] == nil
255
+ if /^\d*$/.match(lines[cnt])
256
+ found = true
257
+ if lines[cnt+1] == nil
258
+ found = 'before'
259
+ lines[cnt-1] += LineSplit + lines[cnt]
260
+ lines.delete_at(cnt)
261
+ else
262
+ found = 'after'
263
+ lines[cnt] += LineSplit + lines[cnt+1]
264
+ lines.delete_at(cnt+1)
265
+ end
266
+ end
267
+ }
268
+ puts "found #{found}: #{lines.inspect}" if found and $VERBOSE
269
+ return lines[0], lines[1], lines[2], lines[3], lines[4], plz, ort
270
+ end
271
+
272
+ def Swissreg::getInputValuesFromPage(body) # body of HTML page
273
+ contentData = []
274
+ body.search('input').each{ |input|
275
+ # puts "name: #{input.attribute('name')} value #{input.attribute('value')}"
276
+ contentData << [ input.attribute('name').to_s, input.attribute('value').to_s ]
277
+ }
278
+ contentData
279
+ end
280
+
281
+ # return value of an array of POST values
282
+ def Swissreg::inputValue(values, key)
283
+ values.each{ |val|
284
+ return val[1] if key.eql?(val[0])
285
+ }
286
+ return nil
287
+ end
288
+
289
+ # set value for a key of an array of POST values
290
+ def Swissreg::setInputValue(values, key, newValue)
291
+ values.each{ |val|
292
+ if key.eql?(val[0])
293
+ val[1] = newValue
294
+ return
295
+ end
296
+ }
297
+ return
298
+ end
299
+
300
+ def Swissreg::setAllInputValue(form, values)
301
+ values.each{ |newValue|
302
+ # puts "x: 0 #{ newValue[0].to_s} 1 #{newValue[1].to_s}"
303
+ form.field(:name => newValue[0].to_s) { |elem|
304
+ next if elem == nil # puts "Cannot set #{newValue[0].to_s}"
305
+ elem.value = newValue[1].to_s
306
+ }
307
+ }
308
+ end
309
+
310
+ def Swissreg::getMarkenInfoFromDetail(doc)
311
+ marke = nil
312
+ number = 'invalid'
313
+ bezeichnung = nil
314
+ inhaber = nil
315
+ hinterlegungsdatum = nil
316
+ hatVertreter = 'Nein'
317
+ doc.xpath("//html/body/form/div/div/fieldset/div/table/tbody/tr").each{
318
+ |x|
319
+ if x.children.first.text.eql?('Marke')
320
+ if x.children[1].text.index('Markenabbildung')
321
+ # we must fetch the link to the image
322
+ bezeichnung = x.children[1].elements.first.attribute('href').text
323
+ else # we got a trademark
324
+ bezeichnung = x.children[1].text
325
+ end
326
+ end
327
+
328
+ if x.children.first.text.eql?('Inhaber/in')
329
+ inhaber = />(.*)<\/td/.match(x.children[1].to_s)[1].gsub('<br>',LineSplit)
330
+ end
331
+
332
+ if x.children.first.text.eql?('Vertreter/in')
333
+ hatVertreter = 'Ja' if x.children[1].text.length > 0
334
+ end
335
+ hinterlegungsdatum = x.children[1].text if x.children.first.text.eql?('Hinterlegungsdatum')
336
+ number = x.children[1].text if x.children.first.text.eql?('Gesuch Nr.')
337
+ }
338
+ zeile_1, zeile_2, zeile_3, zeile_4, zeile_5, plz, ort = Swissreg::parseAddress(number, inhaber)
339
+ inhaber = inhaber.split(', , ')[0] # Catch cases where Inhaber has several postal addresses
340
+ marke = Marke.new(bezeichnung, number, inhaber, DefaultCountry, hatVertreter, hinterlegungsdatum, zeile_1, zeile_2, zeile_3, zeile_4, zeile_5, plz, ort )
341
+ end
342
+
343
+ def fetchDetails(nummer) # takes a long time!
344
+ @counterDetails += 1
345
+ init_swissreg if @counterDetails % 90 == 0 # it seems that swissreg is artificially slowing down serving request after 100 hits
346
+ filename = "#{LogDir}/detail_#{sprintf('%05d', @counterDetails)}_#{nummer.gsub('/','.')}.html"
347
+ if File.exists?(filename)
348
+ doc = Nokogiri::Slop(File.open(filename))
349
+ else
350
+ url = "#{Sr300}?language=de&section=tm&id=#{nummer}"
351
+ pp "#{Time.now.strftime("%H:%M:%S")}: Opening #{filename}" if $VERBOSE
352
+ $stdout.flush
353
+ nrRetries = 0
354
+ begin
355
+ content = @agent.get_file url
356
+ body = @agent.page.body
357
+ rescue 'getaddrinfo: Name or service not known', Exception => e
358
+ nrRetries += 1
359
+ puts e.backtrace
360
+ if nrRetries <= 3
361
+ puts "get_file did not work reinit session and retry for #{nummer}. nrRetries #{nrRetries}/3. e #{e}"
362
+ sleep 60 # Sleep a minute to let network recover
363
+ init_swissreg
364
+ retry
365
+ else
366
+ puts "get_file did not work reinit session raise Interrupt"
367
+ raise Interrupt
368
+ end
369
+ end
370
+ body.force_encoding('utf-8') unless /^1\.8/.match(RUBY_VERSION)
371
+ doc = Nokogiri::Slop(body)
372
+ writeResponse(filename)
373
+ end
374
+ marke = Swissreg::getMarkenInfoFromDetail(doc)
375
+ @results << marke
376
+ end
377
+
378
+ def Swissreg::emitCsv(results, filename='ausgabe.csv')
379
+ return if results == nil or results.size == 0
380
+ all_inhaber = {}
381
+ results.each do |result|
382
+ next if all_inhaber[result.inhaber]
383
+ all_inhaber[result.inhaber] = result
384
+ end
385
+ if /^1\.8/.match(RUBY_VERSION)
386
+ ausgabe = File.open(filename, 'w+')
387
+ # Write header
388
+ s=''
389
+ results[0].members.each { |member| s += member + ';' }
390
+ ausgabe.puts s.chop
391
+ # write all line
392
+ all_inhaber.values.each{
393
+ |result|
394
+ s = ''
395
+ result.members.each{ |member|
396
+ unless eval("result.#{member}")
397
+ s += ';'
398
+ else
399
+ value = eval("result.#{member.to_s}")
400
+ value = "\"#{value}\"" if value.index(';')
401
+ s += value + ';'
402
+ end
403
+ }
404
+ ausgabe.puts s.chop
405
+ }
406
+ ausgabe.close
407
+ else
408
+ CSV.open(filename, 'w', :headers=>results[0].members,
409
+ :write_headers => true,
410
+ :col_sep => ';'
411
+ ) do |csv| all_inhaber.values.each{ |x| csv << x }
412
+ end
413
+ end
414
+ end
415
+
416
+ def Swissreg::getTrademarkNumbers(doc)
417
+ trademark_numbers = []
418
+ doc.search('a').each{
419
+ |link|
420
+ if DetailRegexp.match(link.attribute('id'))
421
+ trademark_numbers << link.children.first.children.first.content
422
+ end
423
+ }
424
+ trademark_numbers
425
+ end
426
+
427
+ class Swissreg::Vereinfachte
428
+ attr_reader :links2details, :trademark_search_id, :inputData, :firstHit, :nrHits, :nrSubPages, :pageNr
429
+ HitRegexpDE = /Seite (\d*) von ([\d']*) - Treffer ([\d']*)-([\d']*) von ([\d']*)/
430
+ Vivian = 'id_swissreg:mainContent:vivian'
431
+
432
+ # Parse a HTML page from swissreg sr3.jsp
433
+ # There we find info like "Seite 1 von 26 - Treffer 1-250 von 6'349" and upto 250 links to details
434
+ def initialize(doc)
435
+ @inputData = []
436
+ @pageNr = @nrSubPages = @firstHit = @nrHits = 0
437
+ m = HitRegexpDE.match(doc.text)
438
+ if m
439
+ begin
440
+ c = m.to_a.map{|n| n.gsub(/'/, "").to_i }
441
+ @pageNr = c[1]
442
+ @nrSubPages = c[2]
443
+ @firstHit = c[3]
444
+ @nrHits = c[5]
445
+ rescue NoMethodError
446
+ end
447
+ end
448
+ @trademark_search_id = Swissreg::inputValue(Swissreg::getInputValuesFromPage(doc), Vivian)
449
+ @links2details = []
450
+ doc.search('input').each{ |input|
451
+ # puts "name: #{input.attribute('name')} value #{input.attribute('value')}" if $VERBOSE
452
+ @inputData << [ input.attribute('name').to_s, input.attribute('value').to_s ]
453
+ }
454
+
455
+ @state = Swissreg::inputValue(Swissreg::getInputValuesFromPage(doc), 'javax.faces.ViewState')
456
+ doc.search('a').each{
457
+ |link|
458
+ if m = DetailRegexp.match(link.attribute('id'))
459
+ # puts "XXX #{link.attribute('onclick').to_s} href: #{link.attribute('href').to_s} value #{link.attribute('value').to_s}" if $VERBOSE
460
+ m = /'tmMainId','(\d*)'/.match(link.attribute('onclick').to_s)
461
+ tmMainId = m[1].to_i
462
+ @links2details << tmMainId
463
+ end
464
+ }
465
+ end
466
+
467
+ def getPostDataForDetail(position, id)
468
+ [
469
+ [ "autoScroll", "0,0"],
470
+ [ "id_swissreg:mainContent:sub_options_result:sub_fieldset:cbxHitsPerPage", "#{HitsPerPage}"],
471
+ [ "id_swissreg:mainContent:vivian", @trademark_search_id],
472
+ [ "id_swissreg_SUBMIT", "1"],
473
+ [ "id_swissreg:_idcl", "id_swissreg:mainContent:data:#{position}:tm_no_detail:id_detail", ""],
474
+ [ "id_swissreg:mainContent:scroll_1", ""],
475
+ [ "tmMainId", "#{id}"],
476
+ [ "id_swissreg:_link_hidden_ "],
477
+ [ "javax.faces.ViewState", @state]
478
+ ]
479
+ end
480
+
481
+ def getPostDataForSubpage(pageNr)
482
+ [
483
+ [ "autoScroll", "0,0"],
484
+ [ "id_swissreg:mainContent:sub_options_result:sub_fieldset:cbxHitsPerPage", "#{HitsPerPage}"],
485
+ [ "id_swissreg:mainContent:vivian", @trademark_search_id],
486
+ [ "id_swissreg_SUBMIT", "1"],
487
+ [ "id_swissreg:_idcl", "id_swissreg:mainContent:scroll_1idx#{pageNr}"],
488
+ [ "id_swissreg:mainContent:scroll_1", "idx#{pageNr}"],
489
+ [ "tmMainId", ""],
490
+ [ "id_swissreg:_link_hidden_ "],
491
+ [ "javax.faces.ViewState", @state]
492
+ ]
493
+ end
494
+
495
+ end
496
+
497
+ def getAllHits(filename = nil, pageNr = 1)
498
+ if filename && File.exists?(filename)
499
+ doc = Nokogiri::Slop(File.open(filename))
500
+ else
501
+ form = @agent.page.form
502
+ btn = form.buttons.last
503
+ if btn && btn.name == "id_swissreg:mainContent:id_show_simple_view_hitlist"
504
+ res = @agent.submit(form, btn)
505
+ body = res.body
506
+ else
507
+ body = @agent.page.body
508
+ end
509
+ body.force_encoding('utf-8') unless /^1\.8/.match(RUBY_VERSION)
510
+ doc = Nokogiri::Slop(body)
511
+ filename = "#{LogDir}/vereinfachte_#{pageNr}.html"
512
+ writeResponse(filename)
513
+ end
514
+ einfach = Swissreg::Vereinfachte.new(doc)
515
+ puts "#{Time.now.strftime("%H:%M:%S")} status: getAllHits for #{pageNr} of #{einfach.nrSubPages} pages" if $VERBOSE
516
+ subPage2Fetch = pageNr + 1
517
+ data2 = einfach.getPostDataForSubpage(subPage2Fetch).clone
518
+ if (HitsPerPage < einfach.nrHits - einfach.firstHit)
519
+ itemsToFetch = HitsPerPage
520
+ else
521
+ itemsToFetch = einfach.nrHits - einfach.firstHit
522
+ end
523
+ @all_trademark_numbers += Swissreg::getTrademarkNumbers(doc)
524
+
525
+ filename = "#{LogDir}/vereinfachte_#{pageNr}_back.html"
526
+ writeResponse(filename)
527
+ if pageNr < (einfach.nrSubPages)
528
+ Swissreg::setAllInputValue(@agent.page.forms.first, data2)
529
+ @agent.page.forms.first.submit
530
+ getAllHits(nil, subPage2Fetch)
531
+ end
532
+ @all_trademark_numbers
533
+ end
534
+
535
+ def fetchresult(filename = "#{LogDir}/fetch_1.html", counter = 1)
536
+ if filename && File.exists?(filename)
537
+ doc = Nokogiri::Slop(File.open(filename))
538
+ else
539
+ body = @agent.page.body
540
+ body.force_encoding('utf-8') unless /^1\.8/.match(RUBY_VERSION)
541
+ doc = Nokogiri::Slop(body)
542
+ writeResponse(filename)
543
+ end
544
+
545
+ if /Vereinfachte Trefferliste anzeigen/i.match(doc.text)
546
+ form = @agent.page.forms.first
547
+ button = form.button_with(:value => /Vereinfachte/i)
548
+ # submit the form using that button
549
+ @agent.submit(form, button)
550
+ filename = "#{LogDir}/vereinfacht.html"
551
+ writeResponse(filename)
552
+ end
553
+ getAllHits(doc, counter)
554
+ puts"getAllHits: returned #{@all_trademark_numbers ? @all_trademark_numbers.size : 0} hits "
555
+ if @all_trademark_numbers
556
+ @all_trademark_numbers.each{
557
+ |nr|
558
+ nrRetries = 0
559
+ begin
560
+ fetchDetails(nr)
561
+ rescue SocketError, Exception => e
562
+ nrRetries += 1
563
+ puts e.backtrace
564
+ if nrRetries <= 3
565
+ puts "fetchDetails did not work reinit session and retry for #{nr}. nrRetries #{nrRetries}/3. e #{e}"
566
+ sleep 60 # Sleep a minute to let network recover
567
+ init_swissreg
568
+ retry
569
+ else
570
+ puts "fetchDetails did not work reinit session raise Interrupt"
571
+ raise Interrupt
572
+ end
573
+ end
574
+
575
+ }
576
+ else
577
+ puts "Could not find any trademarks in #{filename}"
578
+ end
579
+ end
580
+ end # class Swissreg
581
+
582
+ def Brand2csv::run(timespan, marke = 'a*', swiss_only = false)
583
+ session = Swissreg.new(timespan, marke, swiss_only)
584
+ begin
585
+ session.parse_swissreg
586
+ session.fetchresult
587
+ rescue Interrupt, Net::HTTP::Persistent::Error
588
+ puts "Unterbrochen. Vesuche #{session.results.size} Resultate zu speichern"
589
+ end
590
+ Swissreg::emitCsv(session.results, "#{timespan}.csv")
591
+ session.results
592
+ end
593
+
594
+ end # module Brand2csv