brand2csv 0.3.0 → 0.3.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (57) hide show
  1. checksums.yaml +4 -4
  2. metadata +117 -93
  3. data/.gemtest +0 -0
  4. data/.gitignore +0 -10
  5. data/.rspec +0 -1
  6. data/.travis.yml +0 -14
  7. data/Gemfile +0 -14
  8. data/Gemfile.lock +0 -76
  9. data/History.txt +0 -111
  10. data/LICENCE.txt +0 -515
  11. data/Manifest.txt +0 -54
  12. data/README.md +0 -27
  13. data/Rakefile +0 -25
  14. data/bin/brand2csv +0 -100
  15. data/lib/brand2csv.rb +0 -590
  16. data/lib/brand2csv/version.rb +0 -3
  17. data/logs/aspen_08_08_1986.html +0 -598
  18. data/logs/post.rohdaten.httpfox +0 -1
  19. data/logs/post.rohdaten.mechanize +0 -1
  20. data/logs/protocol_swissreg.log +0 -86
  21. data/logs/result_01.10.2005.jsp +0 -598
  22. data/logs/sr1.jsp +0 -449
  23. data/logs/sr3.jsp +0 -598
  24. data/logs/start.jsp +0 -350
  25. data/logs/start2.jsp +0 -434
  26. data/protocol.2013.05.12.textile +0 -56
  27. data/protocol.2013.05.15.textile +0 -49
  28. data/protocol.2013.05.21.textile +0 -84
  29. data/spec/brand2csv_spec.rb +0 -62
  30. data/spec/csv_spec.rb +0 -27
  31. data/spec/data/aspectra/detail_00001_P-480296.html +0 -531
  32. data/spec/data/aspectra/detail_00002_P-482236.html +0 -531
  33. data/spec/data/aspectra/detail_00003_641074.html +0 -539
  34. data/spec/data/aspectra/first_results.html +0 -600
  35. data/spec/data/einfache_suche.html +0 -434
  36. data/spec/data/erweiterte_suche.html +0 -446
  37. data/spec/data/main.html +0 -350
  38. data/spec/data/result_short.html +0 -606
  39. data/spec/data/resultate_1.html +0 -446
  40. data/spec/data/resultate_2.html +0 -446
  41. data/spec/data/urner_wildheu/detail_00001_57862.2013.html +0 -516
  42. data/spec/data/urner_wildheu/first_results.html +0 -598
  43. data/spec/data/vereinfachte_1.html +0 -847
  44. data/spec/data/vereinfachte_detail_33.html +0 -516
  45. data/spec/detail_spec.rb +0 -28
  46. data/spec/short_spec.rb +0 -55
  47. data/spec/simple_search.rb +0 -43
  48. data/spec/spec_helper.rb +0 -34
  49. data/spec/support/core_ext/kernel.rb +0 -26
  50. data/spec/support/server_mock_helper.rb +0 -142
  51. data/spec/swissreg_spec.rb +0 -44
  52. data/spec/trademark_numbers_spec.rb +0 -21
  53. data/spec/utilities_spec.rb +0 -83
  54. data/spike.rb +0 -491
  55. data/spike_mechanize_swissreg.rb +0 -312
  56. data/spike_watir.rb +0 -58
  57. data/swissreg.rb +0 -75
@@ -1,54 +0,0 @@
1
- .gitignore
2
- .rspec
3
- .travis.yml
4
- Gemfile
5
- Gemfile.lock
6
- History.txt
7
- LICENCE.txt
8
- Manifest.txt
9
- README.md
10
- Rakefile
11
- bin/brand2csv
12
- lib/brand2csv.rb
13
- lib/brand2csv/version.rb
14
- logs/aspen_08_08_1986.html
15
- logs/post.rohdaten.httpfox
16
- logs/post.rohdaten.mechanize
17
- logs/protocol_swissreg.log
18
- logs/result_01.10.2005.jsp
19
- logs/sr1.jsp
20
- logs/sr3.jsp
21
- logs/start.jsp
22
- logs/start2.jsp
23
- protocol.2013.05.12.textile
24
- protocol.2013.05.15.textile
25
- protocol.2013.05.21.textile
26
- spec/brand2csv_spec.rb
27
- spec/csv_spec.rb
28
- spec/data/aspectra/detail_00001_P-480296.html
29
- spec/data/aspectra/detail_00002_P-482236.html
30
- spec/data/aspectra/detail_00003_641074.html
31
- spec/data/aspectra/first_results.html
32
- spec/data/einfache_suche.html
33
- spec/data/erweiterte_suche.html
34
- spec/data/main.html
35
- spec/data/result_short.html
36
- spec/data/resultate_1.html
37
- spec/data/resultate_2.html
38
- spec/data/urner_wildheu/detail_00001_57862.2013.html
39
- spec/data/urner_wildheu/first_results.html
40
- spec/data/vereinfachte_1.html
41
- spec/data/vereinfachte_detail_33.html
42
- spec/detail_spec.rb
43
- spec/short_spec.rb
44
- spec/simple_search.rb
45
- spec/spec_helper.rb
46
- spec/support/core_ext/kernel.rb
47
- spec/support/server_mock_helper.rb
48
- spec/swissreg_spec.rb
49
- spec/trademark_numbers_spec.rb
50
- spec/utilities_spec.rb
51
- spike.rb
52
- spike_mechanize_swissreg.rb
53
- spike_watir.rb
54
- swissreg.rb
data/README.md DELETED
@@ -1,27 +0,0 @@
1
- # brand2csv
2
-
3
- [![Build Status](https://secure.travis-ci.org/zdavatz/brand2csv.png)](http://travis-ci.org/zdavatz/brand2csv)
4
-
5
- brand2csv using swissreg.ch to get addresses.
6
-
7
- ## Usage
8
- ```
9
- brand2csv 01.01.2013 "b*"
10
- brand2csv 1.10.2005-31.10.2005
11
- ```
12
- ## Help
13
- ```
14
- ~> brand2csv --help
15
- /usr/local/bin/brand2csv ver.0.1.9
16
- Usage:
17
- brand2csv timespan
18
- Find all brands registered in switzerland during the given timespan.
19
- The following examples valid timespan periods:
20
- brand2csv 01.01.2013 "b*" #will search for all brand starting with "b"
21
- brand2csv 1.10.2005-31.10.2005 #this will work as well from version 0.1.9
22
- The results are stored in the file <date_selected>.csv.
23
- The trademark name is either a real brand name or a link to an image.
24
- ```
25
- ## Travis
26
- You can find Travis builds here:
27
- * https://travis-ci.org/zdavatz/brand2csv
data/Rakefile DELETED
@@ -1,25 +0,0 @@
1
- #!/usr/bin/env ruby
2
- # encoding: utf-8
3
-
4
- require 'rubygems'
5
- require 'hoe'
6
-
7
- Hoe.spec "brand2csv" do
8
- self.author = "Niklaus Giger, Yasuhiro Asaka, Zeno R.R. Davatz" # gem.authors
9
- self.email = "ngiger@ywesee.com, yasaka@ywesee.com, zdavatz@ywesee.com"
10
- self.description = "brand2csv creates csv files for swiss brand registered in a specific time period.
11
- The csv contains the brand, link to image (if present), link to the detailinfo at swissreg.ch, name and address of owner (Inhaber)"
12
- self.summary = "brand2csv creates csv files for swiss brands."
13
- self.urls = ["https://github.com/zdavatz/brand2csv"] # gem.homepage
14
-
15
- license "GPLv3.0"
16
-
17
- # gem.add_runtime_dependency
18
- self.extra_deps << ['mechanize', '>= 2.6']
19
-
20
- # gem.add_development_dependency
21
- self.extra_dev_deps << ['rspec']
22
- self.extra_dev_deps << ['webmock']
23
- self.extra_dev_deps << ['hoe', '>= 3.4']
24
- self.extra_dev_deps << ['rdoc']
25
- end
@@ -1,100 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- require 'pathname'
4
- root = Pathname.new(__FILE__).realpath.parent.parent
5
- $:.unshift root.join('lib') if $0 == __FILE__
6
-
7
- require 'optparse'
8
- require "date"
9
- require 'brand2csv'
10
-
11
- def help
12
- <<EOS
13
- #$0 ver.#{Brand2csv::VERSION}
14
- Usage:
15
- #{File.basename(__FILE__)} timespan
16
- Find all brands registered in switzerland during the given timespan.
17
- The following examples valid timespan periods:
18
- brand2csv 01.01.2013 "b*" #will search for all brand starting with "b"
19
- brand2csv 1.10.2005-31.10.2005 #this will work as well from version 0.1.9
20
- The results are stored in the file <date_selected>.csv.
21
- The trademark name is either a real brand name or a link to an image.
22
- --swiss_only Fetch only trademarks from swiss owner
23
- EOS
24
- end
25
-
26
- def validates_timespan(arg)
27
- valid = true
28
- timespan = ""
29
- dates = arg.gsub(/[^\d\.-]/, '').split("-")
30
- catch (:error) do
31
- dates.each_with_index do |d, i|
32
- sep = (dates.length > 1 && i != 0) ? "-" : ""
33
- begin
34
- Date.parse(d)
35
- timespan << sep + d
36
- rescue ArgumentError
37
- valid = false
38
- elms = d.split(".")
39
- prms = [elms[2], elms[1], -1].map(&:to_i)
40
- begin
41
- cand = Date.new(*prms).strftime("%d.%m.%Y")
42
- if elms[0] == (elms - cand.to_s.split(".")).first
43
- timespan << sep + cand.to_s
44
- else
45
- raise
46
- end
47
- rescue ArgumentError
48
- timespan = "" # unknown
49
- throw :error
50
- end
51
- end
52
- end
53
- end
54
- message = nil
55
- unless valid
56
- if timespan.empty?
57
- message = "Timespan is invalid"
58
- else
59
- message = "Did you mean #{timespan} ?"
60
- end
61
- end
62
- [valid, message]
63
- end
64
-
65
- parser = OptionParser.new
66
- opts = {}
67
- parser.on('--swiss_only') {|v| opts[:swiss_only] = true }
68
- parser.on_tail('-h', '--help') { puts help; exit }
69
-
70
- args = ARGV.dup
71
- begin
72
- parser.parse!(args)
73
- rescue OptionParser::MissingArgument,
74
- OptionParser::InvalidArgument,
75
- OptionParser::InvalidOption
76
- puts help
77
- exit 1
78
- end
79
-
80
- unless args.size >= 1
81
- puts help
82
- exit 1
83
- end
84
-
85
- unless args.empty?
86
- valid,message = validates_timespan(args[0])
87
- unless valid
88
- puts message
89
- exit 1
90
- end
91
- end
92
-
93
- begin
94
- Brand2csv::run(args[0], args[1], opts[:swiss_only])
95
- rescue Interrupt
96
- puts "Unterbrochen. Breche mit Fehler ab"
97
- exit 1
98
- end
99
-
100
- puts "#{__FILE__} completed successfully" if $VERBOSE
@@ -1,590 +0,0 @@
1
- #!/usr/bin/env ruby
2
- # encoding: utf-8
3
- require 'rubygems' if /^1\.8/.match(RUBY_VERSION)
4
- require "brand2csv/version"
5
- require 'mechanize'
6
- require 'prettyprint'
7
- require 'optparse'
8
- require 'csv'
9
- require 'logger'
10
-
11
- module Brand2csv
12
-
13
-
14
- class Marke < Struct.new(:name, :markennummer, :inhaber, :land, :hatVertreter, :hinterlegungsdatum, :zeile_1, :zeile_2, :zeile_3, :zeile_4, :zeile_5, :plz, :ort)
15
- end
16
-
17
- class Swissreg
18
-
19
- # Weitere gesehene Fehler
20
- BekannteFehler =
21
- ['Das Datum ist ung', # ültig'
22
- '500 Internal Server Error',
23
- 'Vereinfachte Trefferliste anzeigen',
24
- 'Es wurden keine Daten gefunden.',
25
- 'Die Suchkriterien sind teilweise unzul', # ässig',
26
- 'Geben Sie mindestens ein Suchkriterium ein',
27
- 'Die Suche wurde abgebrochen, da die maximale Suchzeit von 60 Sekunden',
28
- 'Erweiterte Suche',
29
- ]
30
- Base_uri = 'https://www.swissreg.ch'
31
- Start_uri = "#{Base_uri}/srclient/faces/jsp/start.jsp"
32
- Sr1 = "#{Base_uri}/srclient/faces/jsp/trademark/sr1.jsp"
33
- Sr2 = "#{Base_uri}/srclient/faces/jsp/trademark/sr2.jsp"
34
- Sr3 = "#{Base_uri}/srclient/faces/jsp/trademark/sr3.jsp"
35
- Sr30 = "#{Base_uri}/srclient/faces/jsp/trademark/sr30.jsp"
36
- Sr300 = "#{Base_uri}/srclient/faces/jsp/trademark/sr300.jsp"
37
- DetailRegexp = /d_swissreg:mainContent:data:(\d*):tm_no_detail:id_detail/i
38
- AddressRegexp = /^(\d\d\d\d)\W*(.*)/
39
- LineSplit = ', '
40
- DefaultCountry = 'Schweiz'
41
- # Angezeigte Spalten "id_swissreg:mainContent:id_ckbTMChoice"
42
- TMChoiceFields = [
43
- "tm_lbl_tm_text", # Marke
44
- # "tm_lbl_state"], # Status
45
- # "tm_lbl_nizza_class"], # Nizza Klassifikation Nr.
46
- # "tm_lbl_no"], # disabled="disabled"], # Nummer
47
- "tm_lbl_applicant", # Inhaber/in
48
- "tm_lbl_country", # Land (Inhaber/in)
49
- "tm_lbl_agent", # Vertreter/in
50
- # "tm_lbl_licensee"], # Lizenznehmer/in
51
- "tm_lbl_app_date", # Hinterlegungsdatum
52
- ]
53
- # Alle Felder mit sprechenden Namen
54
- # ["id_swissreg:mainContent:id_txf_tm_no", nummer],# Marken Nr
55
- # ["id_swissreg:mainContent:id_txf_app_no", ""], # Gesuch Nr.
56
- # ["id_swissreg:mainContent:id_txf_tm_text", marke],
57
- # ["id_swissreg:mainContent:id_txf_applicant", ""], # Inhaber/in
58
- # ["id_swissreg:mainContent:id_cbxCountry", "_ALL"], # Auswahl Länder _ALL
59
- # ["id_swissreg:mainContent:id_txf_agent", ""], # Vertreter/in
60
- # ["id_swissreg:mainContent:id_txf_licensee", ""], # Lizenznehmer
61
- # ["id_swissreg:mainContent:id_txf_nizza_class", ""], # Nizza Klassifikation Nr.
62
- # # ["id_swissreg:mainContent:id_txf_appDate", timespan], # Hinterlegungsdatum
63
- # ["id_swissreg:mainContent:id_txf_appDate", "%s" % timespan] ,
64
- # ["id_swissreg:mainContent:id_txf_expiryDate", ""], # Ablauf Schutzfrist
65
- # Markenart: Individualmarke 1 Kollektivmarke 2 Garantiemarke 3
66
- # ["id_swissreg:mainContent:id_cbxTMTypeGrp", "_ALL"], # Markenart
67
- # ["id_swissreg:mainContent:id_cbxTMForm", "_ALL"], # Markentyp
68
- # ["id_swissreg:mainContent:id_cbxTMColorClaim", "_ALL"], # Farbanspruch
69
- # ["id_swissreg:mainContent:id_txf_pub_date", ""], # Publikationsdatum
70
-
71
- # info zu Publikationsgrund id_swissreg:mainContent:id_ckbTMPubReason
72
- # ["id_swissreg:mainContent:id_ckbTMPubReason", "1"], #Neueintragungen
73
- # ["id_swissreg:mainContent:id_ckbTMPubReason", "2"], #Berichtigungen
74
- # ["id_swissreg:mainContent:id_ckbTMPubReason", "3"], #Verlängerungen
75
- # ["id_swissreg:mainContent:id_ckbTMPubReason", "4"], #Löschungen
76
- # ["id_swissreg:mainContent:id_ckbTMPubReason", "5"], #Inhaberänderungen
77
- # ["id_swissreg:mainContent:id_ckbTMPubReason", "6"], #Vertreteränderungen
78
- # ["id_swissreg:mainContent:id_ckbTMPubReason", "7"], #Lizenzänderungen
79
- # ["id_swissreg:mainContent:id_ckbTMPubReason", "8"], #Weitere Registeränderungen
80
- # ["id_swissreg:mainContent:id_ckbTMEmptyHits", "0"], # Leere Trefferliste anzeigen
81
- # ["id_swissreg:mainContent:id_ckbTMState", "1"], # "Hängige Gesuche 1
82
- # # ["id_swissreg:mainContent:id_ckbTMState", "2"], # "Gelöschte Gesuche 2
83
- # ["id_swissreg:mainContent:id_ckbTMState", "3"], # aktive Marken 3
84
- # # ["id_swissreg:mainContent:id_ckbTMState", "4"], # gelöschte Marken 4
85
-
86
-
87
- MaxZeilen = 5
88
- HitsPerPage = 250
89
- LogDir = 'log'
90
-
91
- attr_accessor :marke, :results, :timespan
92
-
93
- def initialize(timespan, marke = nil, swiss_only=false)
94
- @timespan = timespan
95
- @marke = marke
96
- @swiss_only = swiss_only
97
- @number = nil
98
- @results = []
99
- @all_trademark_numbers = []
100
- @errors = Hash.new
101
- @lastDetail =nil
102
- @counterDetails = 0
103
- end
104
-
105
- def writeResponse(filename)
106
- if defined?(RSpec) or $VERBOSE
107
- ausgabe = File.open(filename, 'w+')
108
- ausgabe.puts @agent.page.body
109
- ausgabe.close
110
- else
111
- puts "Skipping writing #{filename}" if $VERBOSE
112
- end
113
- end
114
-
115
- def checkErrors(body, exitIfFailure = true)
116
- BekannteFehler.each {
117
- |errMsg|
118
- if body.to_s.index(errMsg)
119
- if exitIfFailure
120
- puts "Tut mir leid. Suche wurde mit Fehlermeldung <#{errMsg}> abgebrochen."
121
- exit 2
122
- else
123
- puts "Info: Suche meldet <#{errMsg}> "
124
- end
125
- end
126
- }
127
- end
128
-
129
- UseClick = false
130
-
131
- # Initialize a session with swissreg and save the cookie as @state
132
- def init_swissreg
133
- begin
134
- @agent = Mechanize.new { |agent|
135
- agent.user_agent = 'Mozilla/5.0 (X11; Linux x86_64; rv:16.0) Gecko/20100101 Firefox/16.0'
136
- agent.verify_mode = OpenSSL::SSL::VERIFY_NONE
137
- FileUtils.makedirs(LogDir) if $VERBOSE or defined?(RSpec)
138
- agent.log = Logger.new("#{LogDir}/mechanize.log") if $VERBOSE
139
- }
140
- @agent.get_file Start_uri # 'https://www.swissreg.ch/srclient/faces/jsp/start.jsp'
141
- writeResponse("#{LogDir}/session_expired.html")
142
- checkErrors(@agent.page.body, false)
143
- @agent.page.links[3].click
144
- writeResponse("#{LogDir}/homepage.html")
145
- @state = @agent.page.form["javax.faces.ViewState"]
146
- rescue Net::HTTPInternalServerError, Mechanize::ResponseCodeError
147
- puts "Net::HTTPInternalServerError oder Mechanize::ResponseCodeError gesehen.\n #{Base_uri} hat wahrscheinlich Probleme"
148
- exit 3
149
- end
150
- end
151
-
152
- def parse_swissreg(timespan = @timespan, # sollte 377 Treffer ergeben, für 01.06.2007-10.06.2007, 559271 wurde in diesem Zeitraum registriert
153
- marke = @marke,
154
- nummer =@number) # nummer = "559271" ergibt genau einen treffer
155
-
156
- init_swissreg
157
- data = [
158
- ["autoScroll", "0,0"],
159
- ["id_swissreg:_link_hidden_", ""],
160
- ["id_swissreg_SUBMIT", "1"],
161
- ["id_swissreg:_idcl", "id_swissreg_sub_nav_ipiNavigation_item0"],
162
- ["javax.faces.ViewState", @state],
163
- ]
164
- @agent.page.form['id_swissreg:_idcl'] = 'id_swissreg_sub_nav_ipiNavigation_item0'
165
- @agent.page.forms.first.submit
166
- writeResponse("#{LogDir}/trademark_simple.html")
167
- data = [
168
- ["autoScroll", "0,0"],
169
- ["id_swissreg:_link_hidden_", ""],
170
- ["id_swissreg_SUBMIT", "1"],
171
- ["id_swissreg:_idcl", "id_swissreg_sub_nav_ipiNavigation_item0_item3"],
172
- ["javax.faces.ViewState", @state],
173
- ]
174
- @agent.page.form['id_swissreg:_idcl'] = 'id_swissreg_sub_nav_ipiNavigation_item0_item3'
175
- @agent.page.forms.first.submit
176
- writeResponse("#{LogDir}/trademark_extended.html")
177
-
178
- data = [
179
- ["autoScroll", "0,829"],
180
- ["id_swissreg:_link_hidden_", ""],
181
- ["id_swissreg:mainContent:id_ckbTMState", "1"], # Hängige Gesuche 1
182
- ["id_swissreg:mainContent:id_ckbTMState", "3"], # Aktive Marken 3
183
- ["id_swissreg:mainContent:id_txf_tm_no", ""],# Marken Nr
184
- ["id_swissreg:mainContent:id_txf_app_no", ""], # Gesuch Nr.
185
- ["id_swissreg:mainContent:id_txf_tm_text", "#{marke}"],
186
- ["id_swissreg:mainContent:id_txf_applicant", ""], # Inhaber/in
187
- ["id_swissreg:mainContent:id_cbxCountry", @swiss_only ? 'CH' : '_ALL'],
188
- ["id_swissreg:mainContent:id_txf_agent", ""], # Vertreter/in
189
- ["id_swissreg:mainContent:id_txf_licensee", ""], # Lizenznehmer
190
- ["id_swissreg:mainContent:id_txf_nizza_class", ""], # Nizza Klassifikation Nr.
191
- ["id_swissreg:mainContent:id_txf_appDate", "#{timespan}"] ,
192
- ["id_swissreg:mainContent:id_txf_expiryDate", ""], # Ablauf Schutzfrist
193
- ["id_swissreg:mainContent:id_cbxTMTypeGrp", "_ALL"], # Markenart
194
- ["id_swissreg:mainContent:id_cbxTMForm", "_ALL"], # Markentyp
195
- ["id_swissreg:mainContent:id_cbxTMColorClaim", "_ALL"], # Farbanspruch
196
- ["id_swissreg:mainContent:id_txf_pub_date", ""], # Publikationsdatum
197
- ["id_swissreg:mainContent:id_ckbTMPubReason", '1'],
198
- ["id_swissreg:mainContent:id_ckbTMPubReason", '2'],
199
- ["id_swissreg:mainContent:id_ckbTMPubReason", '3'],
200
- ["id_swissreg:mainContent:id_ckbTMPubReason", '4'],
201
- ["id_swissreg:mainContent:id_ckbTMPubReason", '5'],
202
- ["id_swissreg:mainContent:id_ckbTMPubReason", '6'],
203
- ["id_swissreg:mainContent:id_ckbTMPubReason", '7'],
204
- ["id_swissreg:mainContent:id_ckbTMPubReason", '8'],
205
- ["id_swissreg:mainContent:id_cbxFormatChoice", "1"],
206
- ["id_swissreg:mainContent:id_ckbTMChoice", "tm_lbl_tm_text"],
207
- ["id_swissreg:mainContent:id_ckbTMChoice", "tm_lbl_applicant"],
208
- ["id_swissreg:mainContent:id_ckbTMChoice", "tm_lbl_country"],
209
- ["id_swissreg:mainContent:id_ckbTMChoice", "tm_lbl_agent"],
210
- ["id_swissreg:mainContent:id_ckbTMChoice", "tm_lbl_app_date"],
211
- ["id_swissreg:mainContent:id_cbxHitsPerPage", HitsPerPage], # Treffer pro Seite
212
- ["id_swissreg:mainContent:sub_fieldset:id_submit", "suchen"],
213
- ["id_swissreg_SUBMIT", "1"],
214
- ["id_swissreg:_idcl", ""],
215
- ["id_swissreg:_link_hidden_", ""],
216
- ["javax.faces.ViewState", @state],
217
- ]
218
- begin
219
- @agent.post(Sr3, data)
220
- rescue Timeout::Error
221
- puts "Timeout!"
222
- retry
223
- end
224
- writeResponse("#{LogDir}/first_results.html")
225
- checkErrors(@agent.page.body, false)
226
- end
227
-
228
- # the number is only passed to facilitate debugging
229
- # lines are the address lines
230
- def Swissreg::parseAddress(number, inhaber)
231
- ort = nil
232
- plz = nil
233
- if inhaber
234
- lines = CGI.unescapeHTML(inhaber).split(LineSplit)
235
- # Search for plz/address
236
- 1.upto(lines.length-1).each {
237
- |cnt|
238
- if m = AddressRegexp.match(lines[cnt])
239
- lines[cnt+1] = nil
240
- plz = m[1]; ort = m[2]
241
- cnt.upto(MaxZeilen-1).each{ |cnt2| lines[cnt2] = nil }
242
- break
243
- end
244
- }
245
- end
246
- unless plz
247
- puts "Achtung! Konnte Marke #{number} mit Inhaber #{lines.inspect} nicht parsen" if $VERBOSE
248
- return nil, nil, nil, nil, nil, nil, nil, nil
249
- end
250
- # search for lines with only digits
251
- found = false
252
- 1.upto(lines.length-1).each {
253
- |cnt|
254
- break if lines[cnt] == nil
255
- if /^\d*$/.match(lines[cnt])
256
- found = true
257
- if lines[cnt+1] == nil
258
- found = 'before'
259
- lines[cnt-1] += LineSplit + lines[cnt]
260
- lines.delete_at(cnt)
261
- else
262
- found = 'after'
263
- lines[cnt] += LineSplit + lines[cnt+1]
264
- lines.delete_at(cnt+1)
265
- end
266
- end
267
- }
268
- puts "found #{found}: #{lines.inspect}" if found and $VERBOSE
269
- return lines[0], lines[1], lines[2], lines[3], lines[4], plz, ort
270
- end
271
-
272
- def Swissreg::getInputValuesFromPage(body) # body of HTML page
273
- contentData = []
274
- body.search('input').each{ |input|
275
- # puts "name: #{input.attribute('name')} value #{input.attribute('value')}"
276
- contentData << [ input.attribute('name').to_s, input.attribute('value').to_s ]
277
- }
278
- contentData
279
- end
280
-
281
- # return value of an array of POST values
282
- def Swissreg::inputValue(values, key)
283
- values.each{ |val|
284
- return val[1] if key.eql?(val[0])
285
- }
286
- return nil
287
- end
288
-
289
- # set value for a key of an array of POST values
290
- def Swissreg::setInputValue(values, key, newValue)
291
- values.each{ |val|
292
- if key.eql?(val[0])
293
- val[1] = newValue
294
- return
295
- end
296
- }
297
- return
298
- end
299
-
300
- def Swissreg::setAllInputValue(form, values)
301
- values.each{ |newValue|
302
- # puts "x: 0 #{ newValue[0].to_s} 1 #{newValue[1].to_s}"
303
- form.field(:name => newValue[0].to_s) { |elem|
304
- next if elem == nil # puts "Cannot set #{newValue[0].to_s}"
305
- elem.value = newValue[1].to_s
306
- }
307
- }
308
- end
309
-
310
- def Swissreg::getMarkenInfoFromDetail(doc)
311
- marke = nil
312
- number = 'invalid'
313
- bezeichnung = nil
314
- inhaber = nil
315
- hinterlegungsdatum = nil
316
- hatVertreter = 'Nein'
317
- doc.xpath("//html/body/form/div/div/fieldset/div/table/tbody/tr").each{
318
- |x|
319
- if x.children.first.text.eql?('Marke')
320
- if x.children[1].text.index('Markenabbildung')
321
- # we must fetch the link to the image
322
- bezeichnung = x.children[1].elements.first.attribute('href').text
323
- else # we got a trademark
324
- bezeichnung = x.children[1].text
325
- end
326
- end
327
-
328
- if x.children.first.text.eql?('Inhaber/in')
329
- inhaber = />(.*)<\/td/.match(x.children[1].to_s)[1].gsub('<br>',LineSplit)
330
- end
331
-
332
- if x.children.first.text.eql?('Vertreter/in')
333
- hatVertreter = 'Ja' if x.children[1].text.length > 0
334
- end
335
- hinterlegungsdatum = x.children[1].text if x.children.first.text.eql?('Hinterlegungsdatum')
336
- number = x.children[1].text if x.children.first.text.eql?('Gesuch Nr.')
337
- }
338
- zeile_1, zeile_2, zeile_3, zeile_4, zeile_5, plz, ort = Swissreg::parseAddress(number, inhaber)
339
- inhaber = inhaber.split(', , ')[0] # Catch cases where Inhaber has several postal addresses
340
- marke = Marke.new(bezeichnung, number, inhaber, DefaultCountry, hatVertreter, hinterlegungsdatum, zeile_1, zeile_2, zeile_3, zeile_4, zeile_5, plz, ort )
341
- end
342
-
343
- def fetchDetails(nummer) # takes a long time!
344
- @counterDetails += 1
345
- init_swissreg if @counterDetails % 90 == 0 # it seems that swissreg is artificially slowing down serving request after 100 hits
346
- filename = "#{LogDir}/detail_#{sprintf('%05d', @counterDetails)}_#{nummer.gsub('/','.')}.html"
347
- if File.exists?(filename)
348
- doc = Nokogiri::Slop(File.open(filename))
349
- else
350
- url = "#{Sr300}?language=de&section=tm&id=#{nummer}"
351
- pp "#{Time.now.strftime("%H:%M:%S")}: Opening #{filename}" if $VERBOSE
352
- $stdout.flush
353
- nrRetries = 0
354
- begin
355
- content = @agent.get_file url
356
- body = @agent.page.body
357
- rescue 'getaddrinfo: Name or service not known', Exception => e
358
- nrRetries += 1
359
- puts e.backtrace
360
- if nrRetries <= 3
361
- puts "get_file did not work reinit session and retry for #{nr}. nrRetries #{nrRetries}/3. e #{e}"
362
- sleep 60 # Sleep a minute to let network recover
363
- init_swissreg
364
- retry
365
- else
366
- puts "get_file did not work reinit session raise Interrupt"
367
- raise Interrupt
368
- end
369
- end
370
- body.force_encoding('utf-8') unless /^1\.8/.match(RUBY_VERSION)
371
- doc = Nokogiri::Slop(body)
372
- writeResponse(filename)
373
- end
374
- marke = Swissreg::getMarkenInfoFromDetail(doc)
375
- @results << marke
376
- end
377
-
378
- def Swissreg::emitCsv(results, filename='ausgabe.csv')
379
- return if results == nil or results.size == 0
380
- if /^1\.8/.match(RUBY_VERSION)
381
- ausgabe = File.open(filename, 'w+')
382
- # Write header
383
- s=''
384
- results[0].members.each { |member| s += member + ';' }
385
- ausgabe.puts s.chop
386
- # write all line
387
- results.each{
388
- |result|
389
- s = ''
390
- result.members.each{ |member|
391
- unless eval("result.#{member}")
392
- s += ';'
393
- else
394
- value = eval("result.#{member.to_s}")
395
- value = "\"#{value}\"" if value.index(';')
396
- s += value + ';'
397
- end
398
- }
399
- ausgabe.puts s.chop
400
- }
401
- ausgabe.close
402
- else
403
-
404
- CSV.open(filename, 'w', :headers=>results[0].members,
405
- :write_headers => true,
406
- :col_sep => ';'
407
- ) do |csv| results.each{ |x| csv << x }
408
- end
409
- end
410
- end
411
-
412
- def Swissreg::getTrademarkNumbers(doc)
413
- trademark_numbers = []
414
- doc.search('a').each{
415
- |link|
416
- if DetailRegexp.match(link.attribute('id'))
417
- trademark_numbers << link.children.first.children.first.content
418
- end
419
- }
420
- trademark_numbers
421
- end
422
-
423
- class Swissreg::Vereinfachte
424
- attr_reader :links2details, :trademark_search_id, :inputData, :firstHit, :nrHits, :nrSubPages, :pageNr
425
- HitRegexpDE = /Seite (\d*) von ([\d']*) - Treffer ([\d']*)-([\d']*) von ([\d']*)/
426
- Vivian = 'id_swissreg:mainContent:vivian'
427
-
428
- # Parse a HTML page from swissreg sr3.jsp
429
- # There we find info like "Seite 1 von 26 - Treffer 1-250 von 6'349" and upto 250 links to details
430
- def initialize(doc)
431
- @inputData = []
432
- @pageNr = @nrSubPages = @firstHit = @nrHits = 0
433
- m = HitRegexpDE.match(doc.text)
434
- if m
435
- begin
436
- c = m.to_a.map{|n| n.gsub(/'/, "").to_i }
437
- @pageNr = c[1]
438
- @nrSubPages = c[2]
439
- @firstHit = c[3]
440
- @nrHits = c[5]
441
- rescue NoMethodError
442
- end
443
- end
444
- @trademark_search_id = Swissreg::inputValue(Swissreg::getInputValuesFromPage(doc), Vivian)
445
- @links2details = []
446
- doc.search('input').each{ |input|
447
- # puts "name: #{input.attribute('name')} value #{input.attribute('value')}" if $VERBOSE
448
- @inputData << [ input.attribute('name').to_s, input.attribute('value').to_s ]
449
- }
450
-
451
- @state = Swissreg::inputValue(Swissreg::getInputValuesFromPage(doc), 'javax.faces.ViewState')
452
- doc.search('a').each{
453
- |link|
454
- if m = DetailRegexp.match(link.attribute('id'))
455
- # puts "XXX #{link.attribute('onclick').to_s} href: #{link.attribute('href').to_s} value #{link.attribute('value').to_s}" if $VERBOSE
456
- m = /'tmMainId','(\d*)'/.match(link.attribute('onclick').to_s)
457
- tmMainId = m[1].to_i
458
- @links2details << tmMainId
459
- end
460
- }
461
- end
462
-
463
- def getPostDataForDetail(position, id)
464
- [
465
- [ "autoScroll", "0,0"],
466
- [ "id_swissreg:mainContent:sub_options_result:sub_fieldset:cbxHitsPerPage", "#{HitsPerPage}"],
467
- [ "id_swissreg:mainContent:vivian", @trademark_search_id],
468
- [ "id_swissreg_SUBMIT", "1"],
469
- [ "id_swissreg:_idcl", "id_swissreg:mainContent:data:#{position}:tm_no_detail:id_detail", ""],
470
- [ "id_swissreg:mainContent:scroll_1", ""],
471
- [ "tmMainId", "#{id}"],
472
- [ "id_swissreg:_link_hidden_ "],
473
- [ "javax.faces.ViewState", @state]
474
- ]
475
- end
476
-
477
- def getPostDataForSubpage(pageNr)
478
- [
479
- [ "autoScroll", "0,0"],
480
- [ "id_swissreg:mainContent:sub_options_result:sub_fieldset:cbxHitsPerPage", "#{HitsPerPage}"],
481
- [ "id_swissreg:mainContent:vivian", @trademark_search_id],
482
- [ "id_swissreg_SUBMIT", "1"],
483
- [ "id_swissreg:_idcl", "id_swissreg:mainContent:scroll_1idx#{pageNr}"],
484
- [ "id_swissreg:mainContent:scroll_1", "idx#{pageNr}"],
485
- [ "tmMainId", ""],
486
- [ "id_swissreg:_link_hidden_ "],
487
- [ "javax.faces.ViewState", @state]
488
- ]
489
- end
490
-
491
- end
492
-
493
- def getAllHits(filename = nil, pageNr = 1)
494
- if filename && File.exists?(filename)
495
- doc = Nokogiri::Slop(File.open(filename))
496
- else
497
- form = @agent.page.form
498
- btn = form.buttons.last
499
- if btn && btn.name == "id_swissreg:mainContent:id_show_simple_view_hitlist"
500
- res = @agent.submit(form, btn)
501
- body = res.body
502
- else
503
- body = @agent.page.body
504
- end
505
- body.force_encoding('utf-8') unless /^1\.8/.match(RUBY_VERSION)
506
- doc = Nokogiri::Slop(body)
507
- filename = "#{LogDir}/vereinfachte_#{pageNr}.html"
508
- writeResponse(filename)
509
- end
510
- einfach = Swissreg::Vereinfachte.new(doc)
511
- puts "#{Time.now.strftime("%H:%M:%S")} status: getAllHits for #{pageNr} of #{einfach.nrSubPages} pages" if $VERBOSE
512
- subPage2Fetch = pageNr + 1
513
- data2 = einfach.getPostDataForSubpage(subPage2Fetch).clone
514
- if (HitsPerPage < einfach.nrHits - einfach.firstHit)
515
- itemsToFetch = HitsPerPage
516
- else
517
- itemsToFetch = einfach.nrHits - einfach.firstHit
518
- end
519
- @all_trademark_numbers += Swissreg::getTrademarkNumbers(doc)
520
-
521
- filename = "#{LogDir}/vereinfachte_#{pageNr}_back.html"
522
- writeResponse(filename)
523
- if pageNr < (einfach.nrSubPages)
524
- Swissreg::setAllInputValue(@agent.page.forms.first, data2)
525
- @agent.page.forms.first.submit
526
- getAllHits(nil, subPage2Fetch)
527
- end
528
- @all_trademark_numbers
529
- end
530
-
531
- def fetchresult(filename = "#{LogDir}/fetch_1.html", counter = 1)
532
- if filename && File.exists?(filename)
533
- doc = Nokogiri::Slop(File.open(filename))
534
- else
535
- body = @agent.page.body
536
- body.force_encoding('utf-8') unless /^1\.8/.match(RUBY_VERSION)
537
- doc = Nokogiri::Slop(body)
538
- writeResponse(filename)
539
- end
540
-
541
- if /Vereinfachte Trefferliste anzeigen/i.match(doc.text)
542
- form = @agent.page.forms.first
543
- button = form.button_with(:value => /Vereinfachte/i)
544
- # submit the form using that button
545
- @agent.submit(form, button)
546
- filename = "#{LogDir}/vereinfacht.html"
547
- writeResponse(filename)
548
- end
549
- getAllHits(doc, counter)
550
- puts"getAllHits: returned #{@all_trademark_numbers ? @all_trademark_numbers.size : 0} hits "
551
- if @all_trademark_numbers
552
- @all_trademark_numbers.each{
553
- |nr|
554
- nrRetries = 0
555
- begin
556
- fetchDetails(nr)
557
- rescue SocketError, Exception => e
558
- nrRetries += 1
559
- puts e.backtrace
560
- if nrRetries <= 3
561
- puts "fetchDetails did not work reinit session and retry for #{nr}. nrRetries #{nrRetries}/3. e #{e}"
562
- sleep 60 # Sleep a minute to let network recover
563
- init_swissreg
564
- retry
565
- else
566
- puts "fetchDetails did not work reinit session raise Interrupt"
567
- raise Interrupt
568
- end
569
- end
570
-
571
- }
572
- else
573
- puts "Could not find any trademarks in #{filename}"
574
- end
575
- end
576
- end # class Swissreg
577
-
578
- def Brand2csv::run(timespan, marke = 'a*', swiss_only = false)
579
- session = Swissreg.new(timespan, marke, swiss_only)
580
- begin
581
- session.parse_swissreg
582
- session.fetchresult
583
- rescue Interrupt, Net::HTTP::Persistent::Error
584
- puts "Unterbrochen. Vesuche #{session.results.size} Resultate zu speichern"
585
- end
586
- Swissreg::emitCsv(session.results, "#{timespan}.csv")
587
- session.results
588
- end
589
-
590
- end # module Brand2csv