brand2csv 0.3.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. checksums.yaml +4 -4
  2. metadata +117 -93
  3. data/.gemtest +0 -0
  4. data/.gitignore +0 -10
  5. data/.rspec +0 -1
  6. data/.travis.yml +0 -14
  7. data/Gemfile +0 -14
  8. data/Gemfile.lock +0 -76
  9. data/History.txt +0 -111
  10. data/LICENCE.txt +0 -515
  11. data/Manifest.txt +0 -54
  12. data/README.md +0 -27
  13. data/Rakefile +0 -25
  14. data/bin/brand2csv +0 -100
  15. data/lib/brand2csv.rb +0 -590
  16. data/lib/brand2csv/version.rb +0 -3
  17. data/logs/aspen_08_08_1986.html +0 -598
  18. data/logs/post.rohdaten.httpfox +0 -1
  19. data/logs/post.rohdaten.mechanize +0 -1
  20. data/logs/protocol_swissreg.log +0 -86
  21. data/logs/result_01.10.2005.jsp +0 -598
  22. data/logs/sr1.jsp +0 -449
  23. data/logs/sr3.jsp +0 -598
  24. data/logs/start.jsp +0 -350
  25. data/logs/start2.jsp +0 -434
  26. data/protocol.2013.05.12.textile +0 -56
  27. data/protocol.2013.05.15.textile +0 -49
  28. data/protocol.2013.05.21.textile +0 -84
  29. data/spec/brand2csv_spec.rb +0 -62
  30. data/spec/csv_spec.rb +0 -27
  31. data/spec/data/aspectra/detail_00001_P-480296.html +0 -531
  32. data/spec/data/aspectra/detail_00002_P-482236.html +0 -531
  33. data/spec/data/aspectra/detail_00003_641074.html +0 -539
  34. data/spec/data/aspectra/first_results.html +0 -600
  35. data/spec/data/einfache_suche.html +0 -434
  36. data/spec/data/erweiterte_suche.html +0 -446
  37. data/spec/data/main.html +0 -350
  38. data/spec/data/result_short.html +0 -606
  39. data/spec/data/resultate_1.html +0 -446
  40. data/spec/data/resultate_2.html +0 -446
  41. data/spec/data/urner_wildheu/detail_00001_57862.2013.html +0 -516
  42. data/spec/data/urner_wildheu/first_results.html +0 -598
  43. data/spec/data/vereinfachte_1.html +0 -847
  44. data/spec/data/vereinfachte_detail_33.html +0 -516
  45. data/spec/detail_spec.rb +0 -28
  46. data/spec/short_spec.rb +0 -55
  47. data/spec/simple_search.rb +0 -43
  48. data/spec/spec_helper.rb +0 -34
  49. data/spec/support/core_ext/kernel.rb +0 -26
  50. data/spec/support/server_mock_helper.rb +0 -142
  51. data/spec/swissreg_spec.rb +0 -44
  52. data/spec/trademark_numbers_spec.rb +0 -21
  53. data/spec/utilities_spec.rb +0 -83
  54. data/spike.rb +0 -491
  55. data/spike_mechanize_swissreg.rb +0 -312
  56. data/spike_watir.rb +0 -58
  57. data/swissreg.rb +0 -75
@@ -1,54 +0,0 @@
1
- .gitignore
2
- .rspec
3
- .travis.yml
4
- Gemfile
5
- Gemfile.lock
6
- History.txt
7
- LICENCE.txt
8
- Manifest.txt
9
- README.md
10
- Rakefile
11
- bin/brand2csv
12
- lib/brand2csv.rb
13
- lib/brand2csv/version.rb
14
- logs/aspen_08_08_1986.html
15
- logs/post.rohdaten.httpfox
16
- logs/post.rohdaten.mechanize
17
- logs/protocol_swissreg.log
18
- logs/result_01.10.2005.jsp
19
- logs/sr1.jsp
20
- logs/sr3.jsp
21
- logs/start.jsp
22
- logs/start2.jsp
23
- protocol.2013.05.12.textile
24
- protocol.2013.05.15.textile
25
- protocol.2013.05.21.textile
26
- spec/brand2csv_spec.rb
27
- spec/csv_spec.rb
28
- spec/data/aspectra/detail_00001_P-480296.html
29
- spec/data/aspectra/detail_00002_P-482236.html
30
- spec/data/aspectra/detail_00003_641074.html
31
- spec/data/aspectra/first_results.html
32
- spec/data/einfache_suche.html
33
- spec/data/erweiterte_suche.html
34
- spec/data/main.html
35
- spec/data/result_short.html
36
- spec/data/resultate_1.html
37
- spec/data/resultate_2.html
38
- spec/data/urner_wildheu/detail_00001_57862.2013.html
39
- spec/data/urner_wildheu/first_results.html
40
- spec/data/vereinfachte_1.html
41
- spec/data/vereinfachte_detail_33.html
42
- spec/detail_spec.rb
43
- spec/short_spec.rb
44
- spec/simple_search.rb
45
- spec/spec_helper.rb
46
- spec/support/core_ext/kernel.rb
47
- spec/support/server_mock_helper.rb
48
- spec/swissreg_spec.rb
49
- spec/trademark_numbers_spec.rb
50
- spec/utilities_spec.rb
51
- spike.rb
52
- spike_mechanize_swissreg.rb
53
- spike_watir.rb
54
- swissreg.rb
data/README.md DELETED
@@ -1,27 +0,0 @@
1
- # brand2csv
2
-
3
- [![Build Status](https://secure.travis-ci.org/zdavatz/brand2csv.png)](http://travis-ci.org/zdavatz/brand2csv)
4
-
5
- brand2csv using swissreg.ch to get addresses.
6
-
7
- ## Usage
8
- ```
9
- brand2csv 01.01.2013 "b*"
10
- brand2csv 1.10.2005-31.10.2005
11
- ```
12
- ## Help
13
- ```
14
- ~> brand2csv --help
15
- /usr/local/bin/brand2csv ver.0.1.9
16
- Usage:
17
- brand2csv timespan
18
- Find all brands registered in switzerland during the given timespan.
19
- The following examples valid timespan periods:
20
- brand2csv 01.01.2013 "b*" #will search for all brand starting with "b"
21
- brand2csv 1.10.2005-31.10.2005 #this will work as well from version 0.1.9
22
- The results are stored in the file <date_selected>.csv.
23
- The trademark name is either a real brand name or a link to an image.
24
- ```
25
- ## Travis
26
- You can find Travis builds here:
27
- * https://travis-ci.org/zdavatz/brand2csv
data/Rakefile DELETED
@@ -1,25 +0,0 @@
1
- #!/usr/bin/env ruby
2
- # encoding: utf-8
3
-
4
- require 'rubygems'
5
- require 'hoe'
6
-
7
- Hoe.spec "brand2csv" do
8
- self.author = "Niklaus Giger, Yasuhiro Asaka, Zeno R.R. Davatz" # gem.authors
9
- self.email = "ngiger@ywesee.com, yasaka@ywesee.com, zdavatz@ywesee.com"
10
- self.description = "brand2csv creates csv files for swiss brand registered in a specific time period.
11
- The csv contains the brand, link to image (if present), link to the detailinfo at swissreg.ch, name and address of owner (Inhaber)"
12
- self.summary = "brand2csv creates csv files for swiss brands."
13
- self.urls = ["https://github.com/zdavatz/brand2csv"] # gem.homepage
14
-
15
- license "GPLv3.0"
16
-
17
- # gem.add_runtime_dependency
18
- self.extra_deps << ['mechanize', '>= 2.6']
19
-
20
- # gem.add_development_dependency
21
- self.extra_dev_deps << ['rspec']
22
- self.extra_dev_deps << ['webmock']
23
- self.extra_dev_deps << ['hoe', '>= 3.4']
24
- self.extra_dev_deps << ['rdoc']
25
- end
@@ -1,100 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- require 'pathname'
4
- root = Pathname.new(__FILE__).realpath.parent.parent
5
- $:.unshift root.join('lib') if $0 == __FILE__
6
-
7
- require 'optparse'
8
- require "date"
9
- require 'brand2csv'
10
-
11
- def help
12
- <<EOS
13
- #$0 ver.#{Brand2csv::VERSION}
14
- Usage:
15
- #{File.basename(__FILE__)} timespan
16
- Find all brands registered in switzerland during the given timespan.
17
- The following examples valid timespan periods:
18
- brand2csv 01.01.2013 "b*" #will search for all brand starting with "b"
19
- brand2csv 1.10.2005-31.10.2005 #this will work as well from version 0.1.9
20
- The results are stored in the file <date_selected>.csv.
21
- The trademark name is either a real brand name or a link to an image.
22
- --swiss_only Fetch only trademarks from swiss owner
23
- EOS
24
- end
25
-
26
- def validates_timespan(arg)
27
- valid = true
28
- timespan = ""
29
- dates = arg.gsub(/[^\d\.-]/, '').split("-")
30
- catch (:error) do
31
- dates.each_with_index do |d, i|
32
- sep = (dates.length > 1 && i != 0) ? "-" : ""
33
- begin
34
- Date.parse(d)
35
- timespan << sep + d
36
- rescue ArgumentError
37
- valid = false
38
- elms = d.split(".")
39
- prms = [elms[2], elms[1], -1].map(&:to_i)
40
- begin
41
- cand = Date.new(*prms).strftime("%d.%m.%Y")
42
- if elms[0] == (elms - cand.to_s.split(".")).first
43
- timespan << sep + cand.to_s
44
- else
45
- raise
46
- end
47
- rescue ArgumentError
48
- timespan = "" # unknown
49
- throw :error
50
- end
51
- end
52
- end
53
- end
54
- message = nil
55
- unless valid
56
- if timespan.empty?
57
- message = "Timespan is invalid"
58
- else
59
- message = "Did you mean #{timespan} ?"
60
- end
61
- end
62
- [valid, message]
63
- end
64
-
65
- parser = OptionParser.new
66
- opts = {}
67
- parser.on('--swiss_only') {|v| opts[:swiss_only] = true }
68
- parser.on_tail('-h', '--help') { puts help; exit }
69
-
70
- args = ARGV.dup
71
- begin
72
- parser.parse!(args)
73
- rescue OptionParser::MissingArgument,
74
- OptionParser::InvalidArgument,
75
- OptionParser::InvalidOption
76
- puts help
77
- exit 1
78
- end
79
-
80
- unless args.size >= 1
81
- puts help
82
- exit 1
83
- end
84
-
85
- unless args.empty?
86
- valid,message = validates_timespan(args[0])
87
- unless valid
88
- puts message
89
- exit 1
90
- end
91
- end
92
-
93
- begin
94
- Brand2csv::run(args[0], args[1], opts[:swiss_only])
95
- rescue Interrupt
96
- puts "Unterbrochen. Breche mit Fehler ab"
97
- exit 1
98
- end
99
-
100
- puts "#{__FILE__} completed successfully" if $VERBOSE
@@ -1,590 +0,0 @@
1
- #!/usr/bin/env ruby
2
- # encoding: utf-8
3
- require 'rubygems' if /^1\.8/.match(RUBY_VERSION)
4
- require "brand2csv/version"
5
- require 'mechanize'
6
- require 'prettyprint'
7
- require 'optparse'
8
- require 'csv'
9
- require 'logger'
10
-
11
- module Brand2csv
12
-
13
-
14
- class Marke < Struct.new(:name, :markennummer, :inhaber, :land, :hatVertreter, :hinterlegungsdatum, :zeile_1, :zeile_2, :zeile_3, :zeile_4, :zeile_5, :plz, :ort)
15
- end
16
-
17
- class Swissreg
18
-
19
- # Weitere gesehene Fehler
20
- BekannteFehler =
21
- ['Das Datum ist ung', # ültig'
22
- '500 Internal Server Error',
23
- 'Vereinfachte Trefferliste anzeigen',
24
- 'Es wurden keine Daten gefunden.',
25
- 'Die Suchkriterien sind teilweise unzul', # ässig',
26
- 'Geben Sie mindestens ein Suchkriterium ein',
27
- 'Die Suche wurde abgebrochen, da die maximale Suchzeit von 60 Sekunden',
28
- 'Erweiterte Suche',
29
- ]
30
- Base_uri = 'https://www.swissreg.ch'
31
- Start_uri = "#{Base_uri}/srclient/faces/jsp/start.jsp"
32
- Sr1 = "#{Base_uri}/srclient/faces/jsp/trademark/sr1.jsp"
33
- Sr2 = "#{Base_uri}/srclient/faces/jsp/trademark/sr2.jsp"
34
- Sr3 = "#{Base_uri}/srclient/faces/jsp/trademark/sr3.jsp"
35
- Sr30 = "#{Base_uri}/srclient/faces/jsp/trademark/sr30.jsp"
36
- Sr300 = "#{Base_uri}/srclient/faces/jsp/trademark/sr300.jsp"
37
- DetailRegexp = /d_swissreg:mainContent:data:(\d*):tm_no_detail:id_detail/i
38
- AddressRegexp = /^(\d\d\d\d)\W*(.*)/
39
- LineSplit = ', '
40
- DefaultCountry = 'Schweiz'
41
- # Angezeigte Spalten "id_swissreg:mainContent:id_ckbTMChoice"
42
- TMChoiceFields = [
43
- "tm_lbl_tm_text", # Marke
44
- # "tm_lbl_state"], # Status
45
- # "tm_lbl_nizza_class"], # Nizza Klassifikation Nr.
46
- # "tm_lbl_no"], # disabled="disabled"], # Nummer
47
- "tm_lbl_applicant", # Inhaber/in
48
- "tm_lbl_country", # Land (Inhaber/in)
49
- "tm_lbl_agent", # Vertreter/in
50
- # "tm_lbl_licensee"], # Lizenznehmer/in
51
- "tm_lbl_app_date", # Hinterlegungsdatum
52
- ]
53
- # Alle Felder mit sprechenden Namen
54
- # ["id_swissreg:mainContent:id_txf_tm_no", nummer],# Marken Nr
55
- # ["id_swissreg:mainContent:id_txf_app_no", ""], # Gesuch Nr.
56
- # ["id_swissreg:mainContent:id_txf_tm_text", marke],
57
- # ["id_swissreg:mainContent:id_txf_applicant", ""], # Inhaber/in
58
- # ["id_swissreg:mainContent:id_cbxCountry", "_ALL"], # Auswahl Länder _ALL
59
- # ["id_swissreg:mainContent:id_txf_agent", ""], # Vertreter/in
60
- # ["id_swissreg:mainContent:id_txf_licensee", ""], # Lizenznehmer
61
- # ["id_swissreg:mainContent:id_txf_nizza_class", ""], # Nizza Klassifikation Nr.
62
- # # ["id_swissreg:mainContent:id_txf_appDate", timespan], # Hinterlegungsdatum
63
- # ["id_swissreg:mainContent:id_txf_appDate", "%s" % timespan] ,
64
- # ["id_swissreg:mainContent:id_txf_expiryDate", ""], # Ablauf Schutzfrist
65
- # Markenart: Individualmarke 1 Kollektivmarke 2 Garantiemarke 3
66
- # ["id_swissreg:mainContent:id_cbxTMTypeGrp", "_ALL"], # Markenart
67
- # ["id_swissreg:mainContent:id_cbxTMForm", "_ALL"], # Markentyp
68
- # ["id_swissreg:mainContent:id_cbxTMColorClaim", "_ALL"], # Farbanspruch
69
- # ["id_swissreg:mainContent:id_txf_pub_date", ""], # Publikationsdatum
70
-
71
- # info zu Publikationsgrund id_swissreg:mainContent:id_ckbTMPubReason
72
- # ["id_swissreg:mainContent:id_ckbTMPubReason", "1"], #Neueintragungen
73
- # ["id_swissreg:mainContent:id_ckbTMPubReason", "2"], #Berichtigungen
74
- # ["id_swissreg:mainContent:id_ckbTMPubReason", "3"], #Verlängerungen
75
- # ["id_swissreg:mainContent:id_ckbTMPubReason", "4"], #Löschungen
76
- # ["id_swissreg:mainContent:id_ckbTMPubReason", "5"], #Inhaberänderungen
77
- # ["id_swissreg:mainContent:id_ckbTMPubReason", "6"], #Vertreteränderungen
78
- # ["id_swissreg:mainContent:id_ckbTMPubReason", "7"], #Lizenzänderungen
79
- # ["id_swissreg:mainContent:id_ckbTMPubReason", "8"], #Weitere Registeränderungen
80
- # ["id_swissreg:mainContent:id_ckbTMEmptyHits", "0"], # Leere Trefferliste anzeigen
81
- # ["id_swissreg:mainContent:id_ckbTMState", "1"], # "Hängige Gesuche 1
82
- # # ["id_swissreg:mainContent:id_ckbTMState", "2"], # "Gelöschte Gesuche 2
83
- # ["id_swissreg:mainContent:id_ckbTMState", "3"], # aktive Marken 3
84
- # # ["id_swissreg:mainContent:id_ckbTMState", "4"], # gelöschte Marken 4
85
-
86
-
87
- MaxZeilen = 5
88
- HitsPerPage = 250
89
- LogDir = 'log'
90
-
91
- attr_accessor :marke, :results, :timespan
92
-
93
- def initialize(timespan, marke = nil, swiss_only=false)
94
- @timespan = timespan
95
- @marke = marke
96
- @swiss_only = swiss_only
97
- @number = nil
98
- @results = []
99
- @all_trademark_numbers = []
100
- @errors = Hash.new
101
- @lastDetail =nil
102
- @counterDetails = 0
103
- end
104
-
105
- def writeResponse(filename)
106
- if defined?(RSpec) or $VERBOSE
107
- ausgabe = File.open(filename, 'w+')
108
- ausgabe.puts @agent.page.body
109
- ausgabe.close
110
- else
111
- puts "Skipping writing #{filename}" if $VERBOSE
112
- end
113
- end
114
-
115
- def checkErrors(body, exitIfFailure = true)
116
- BekannteFehler.each {
117
- |errMsg|
118
- if body.to_s.index(errMsg)
119
- if exitIfFailure
120
- puts "Tut mir leid. Suche wurde mit Fehlermeldung <#{errMsg}> abgebrochen."
121
- exit 2
122
- else
123
- puts "Info: Suche meldet <#{errMsg}> "
124
- end
125
- end
126
- }
127
- end
128
-
129
- UseClick = false
130
-
131
- # Initialize a session with swissreg and save the cookie as @state
132
- def init_swissreg
133
- begin
134
- @agent = Mechanize.new { |agent|
135
- agent.user_agent = 'Mozilla/5.0 (X11; Linux x86_64; rv:16.0) Gecko/20100101 Firefox/16.0'
136
- agent.verify_mode = OpenSSL::SSL::VERIFY_NONE
137
- FileUtils.makedirs(LogDir) if $VERBOSE or defined?(RSpec)
138
- agent.log = Logger.new("#{LogDir}/mechanize.log") if $VERBOSE
139
- }
140
- @agent.get_file Start_uri # 'https://www.swissreg.ch/srclient/faces/jsp/start.jsp'
141
- writeResponse("#{LogDir}/session_expired.html")
142
- checkErrors(@agent.page.body, false)
143
- @agent.page.links[3].click
144
- writeResponse("#{LogDir}/homepage.html")
145
- @state = @agent.page.form["javax.faces.ViewState"]
146
- rescue Net::HTTPInternalServerError, Mechanize::ResponseCodeError
147
- puts "Net::HTTPInternalServerError oder Mechanize::ResponseCodeError gesehen.\n #{Base_uri} hat wahrscheinlich Probleme"
148
- exit 3
149
- end
150
- end
151
-
152
- def parse_swissreg(timespan = @timespan, # sollte 377 Treffer ergeben, für 01.06.2007-10.06.2007, 559271 wurde in diesem Zeitraum registriert
153
- marke = @marke,
154
- nummer =@number) # nummer = "559271" ergibt genau einen treffer
155
-
156
- init_swissreg
157
- data = [
158
- ["autoScroll", "0,0"],
159
- ["id_swissreg:_link_hidden_", ""],
160
- ["id_swissreg_SUBMIT", "1"],
161
- ["id_swissreg:_idcl", "id_swissreg_sub_nav_ipiNavigation_item0"],
162
- ["javax.faces.ViewState", @state],
163
- ]
164
- @agent.page.form['id_swissreg:_idcl'] = 'id_swissreg_sub_nav_ipiNavigation_item0'
165
- @agent.page.forms.first.submit
166
- writeResponse("#{LogDir}/trademark_simple.html")
167
- data = [
168
- ["autoScroll", "0,0"],
169
- ["id_swissreg:_link_hidden_", ""],
170
- ["id_swissreg_SUBMIT", "1"],
171
- ["id_swissreg:_idcl", "id_swissreg_sub_nav_ipiNavigation_item0_item3"],
172
- ["javax.faces.ViewState", @state],
173
- ]
174
- @agent.page.form['id_swissreg:_idcl'] = 'id_swissreg_sub_nav_ipiNavigation_item0_item3'
175
- @agent.page.forms.first.submit
176
- writeResponse("#{LogDir}/trademark_extended.html")
177
-
178
- data = [
179
- ["autoScroll", "0,829"],
180
- ["id_swissreg:_link_hidden_", ""],
181
- ["id_swissreg:mainContent:id_ckbTMState", "1"], # Hängige Gesuche 1
182
- ["id_swissreg:mainContent:id_ckbTMState", "3"], # Aktive Marken 3
183
- ["id_swissreg:mainContent:id_txf_tm_no", ""],# Marken Nr
184
- ["id_swissreg:mainContent:id_txf_app_no", ""], # Gesuch Nr.
185
- ["id_swissreg:mainContent:id_txf_tm_text", "#{marke}"],
186
- ["id_swissreg:mainContent:id_txf_applicant", ""], # Inhaber/in
187
- ["id_swissreg:mainContent:id_cbxCountry", @swiss_only ? 'CH' : '_ALL'],
188
- ["id_swissreg:mainContent:id_txf_agent", ""], # Vertreter/in
189
- ["id_swissreg:mainContent:id_txf_licensee", ""], # Lizenznehmer
190
- ["id_swissreg:mainContent:id_txf_nizza_class", ""], # Nizza Klassifikation Nr.
191
- ["id_swissreg:mainContent:id_txf_appDate", "#{timespan}"] ,
192
- ["id_swissreg:mainContent:id_txf_expiryDate", ""], # Ablauf Schutzfrist
193
- ["id_swissreg:mainContent:id_cbxTMTypeGrp", "_ALL"], # Markenart
194
- ["id_swissreg:mainContent:id_cbxTMForm", "_ALL"], # Markentyp
195
- ["id_swissreg:mainContent:id_cbxTMColorClaim", "_ALL"], # Farbanspruch
196
- ["id_swissreg:mainContent:id_txf_pub_date", ""], # Publikationsdatum
197
- ["id_swissreg:mainContent:id_ckbTMPubReason", '1'],
198
- ["id_swissreg:mainContent:id_ckbTMPubReason", '2'],
199
- ["id_swissreg:mainContent:id_ckbTMPubReason", '3'],
200
- ["id_swissreg:mainContent:id_ckbTMPubReason", '4'],
201
- ["id_swissreg:mainContent:id_ckbTMPubReason", '5'],
202
- ["id_swissreg:mainContent:id_ckbTMPubReason", '6'],
203
- ["id_swissreg:mainContent:id_ckbTMPubReason", '7'],
204
- ["id_swissreg:mainContent:id_ckbTMPubReason", '8'],
205
- ["id_swissreg:mainContent:id_cbxFormatChoice", "1"],
206
- ["id_swissreg:mainContent:id_ckbTMChoice", "tm_lbl_tm_text"],
207
- ["id_swissreg:mainContent:id_ckbTMChoice", "tm_lbl_applicant"],
208
- ["id_swissreg:mainContent:id_ckbTMChoice", "tm_lbl_country"],
209
- ["id_swissreg:mainContent:id_ckbTMChoice", "tm_lbl_agent"],
210
- ["id_swissreg:mainContent:id_ckbTMChoice", "tm_lbl_app_date"],
211
- ["id_swissreg:mainContent:id_cbxHitsPerPage", HitsPerPage], # Treffer pro Seite
212
- ["id_swissreg:mainContent:sub_fieldset:id_submit", "suchen"],
213
- ["id_swissreg_SUBMIT", "1"],
214
- ["id_swissreg:_idcl", ""],
215
- ["id_swissreg:_link_hidden_", ""],
216
- ["javax.faces.ViewState", @state],
217
- ]
218
- begin
219
- @agent.post(Sr3, data)
220
- rescue Timeout::Error
221
- puts "Timeout!"
222
- retry
223
- end
224
- writeResponse("#{LogDir}/first_results.html")
225
- checkErrors(@agent.page.body, false)
226
- end
227
-
228
- # the number is only passed to facilitate debugging
229
- # lines are the address lines
230
- def Swissreg::parseAddress(number, inhaber)
231
- ort = nil
232
- plz = nil
233
- if inhaber
234
- lines = CGI.unescapeHTML(inhaber).split(LineSplit)
235
- # Search for plz/address
236
- 1.upto(lines.length-1).each {
237
- |cnt|
238
- if m = AddressRegexp.match(lines[cnt])
239
- lines[cnt+1] = nil
240
- plz = m[1]; ort = m[2]
241
- cnt.upto(MaxZeilen-1).each{ |cnt2| lines[cnt2] = nil }
242
- break
243
- end
244
- }
245
- end
246
- unless plz
247
- puts "Achtung! Konnte Marke #{number} mit Inhaber #{lines.inspect} nicht parsen" if $VERBOSE
248
- return nil, nil, nil, nil, nil, nil, nil, nil
249
- end
250
- # search for lines with only digits
251
- found = false
252
- 1.upto(lines.length-1).each {
253
- |cnt|
254
- break if lines[cnt] == nil
255
- if /^\d*$/.match(lines[cnt])
256
- found = true
257
- if lines[cnt+1] == nil
258
- found = 'before'
259
- lines[cnt-1] += LineSplit + lines[cnt]
260
- lines.delete_at(cnt)
261
- else
262
- found = 'after'
263
- lines[cnt] += LineSplit + lines[cnt+1]
264
- lines.delete_at(cnt+1)
265
- end
266
- end
267
- }
268
- puts "found #{found}: #{lines.inspect}" if found and $VERBOSE
269
- return lines[0], lines[1], lines[2], lines[3], lines[4], plz, ort
270
- end
271
-
272
- def Swissreg::getInputValuesFromPage(body) # body of HTML page
273
- contentData = []
274
- body.search('input').each{ |input|
275
- # puts "name: #{input.attribute('name')} value #{input.attribute('value')}"
276
- contentData << [ input.attribute('name').to_s, input.attribute('value').to_s ]
277
- }
278
- contentData
279
- end
280
-
281
- # return value of an array of POST values
282
- def Swissreg::inputValue(values, key)
283
- values.each{ |val|
284
- return val[1] if key.eql?(val[0])
285
- }
286
- return nil
287
- end
288
-
289
- # set value for a key of an array of POST values
290
- def Swissreg::setInputValue(values, key, newValue)
291
- values.each{ |val|
292
- if key.eql?(val[0])
293
- val[1] = newValue
294
- return
295
- end
296
- }
297
- return
298
- end
299
-
300
- def Swissreg::setAllInputValue(form, values)
301
- values.each{ |newValue|
302
- # puts "x: 0 #{ newValue[0].to_s} 1 #{newValue[1].to_s}"
303
- form.field(:name => newValue[0].to_s) { |elem|
304
- next if elem == nil # puts "Cannot set #{newValue[0].to_s}"
305
- elem.value = newValue[1].to_s
306
- }
307
- }
308
- end
309
-
310
- def Swissreg::getMarkenInfoFromDetail(doc)
311
- marke = nil
312
- number = 'invalid'
313
- bezeichnung = nil
314
- inhaber = nil
315
- hinterlegungsdatum = nil
316
- hatVertreter = 'Nein'
317
- doc.xpath("//html/body/form/div/div/fieldset/div/table/tbody/tr").each{
318
- |x|
319
- if x.children.first.text.eql?('Marke')
320
- if x.children[1].text.index('Markenabbildung')
321
- # we must fetch the link to the image
322
- bezeichnung = x.children[1].elements.first.attribute('href').text
323
- else # we got a trademark
324
- bezeichnung = x.children[1].text
325
- end
326
- end
327
-
328
- if x.children.first.text.eql?('Inhaber/in')
329
- inhaber = />(.*)<\/td/.match(x.children[1].to_s)[1].gsub('<br>',LineSplit)
330
- end
331
-
332
- if x.children.first.text.eql?('Vertreter/in')
333
- hatVertreter = 'Ja' if x.children[1].text.length > 0
334
- end
335
- hinterlegungsdatum = x.children[1].text if x.children.first.text.eql?('Hinterlegungsdatum')
336
- number = x.children[1].text if x.children.first.text.eql?('Gesuch Nr.')
337
- }
338
- zeile_1, zeile_2, zeile_3, zeile_4, zeile_5, plz, ort = Swissreg::parseAddress(number, inhaber)
339
- inhaber = inhaber.split(', , ')[0] # Catch cases where Inhaber has several postal addresses
340
- marke = Marke.new(bezeichnung, number, inhaber, DefaultCountry, hatVertreter, hinterlegungsdatum, zeile_1, zeile_2, zeile_3, zeile_4, zeile_5, plz, ort )
341
- end
342
-
343
- def fetchDetails(nummer) # takes a long time!
344
- @counterDetails += 1
345
- init_swissreg if @counterDetails % 90 == 0 # it seems that swissreg is artificially slowing down serving request after 100 hits
346
- filename = "#{LogDir}/detail_#{sprintf('%05d', @counterDetails)}_#{nummer.gsub('/','.')}.html"
347
- if File.exists?(filename)
348
- doc = Nokogiri::Slop(File.open(filename))
349
- else
350
- url = "#{Sr300}?language=de&section=tm&id=#{nummer}"
351
- pp "#{Time.now.strftime("%H:%M:%S")}: Opening #{filename}" if $VERBOSE
352
- $stdout.flush
353
- nrRetries = 0
354
- begin
355
- content = @agent.get_file url
356
- body = @agent.page.body
357
- rescue 'getaddrinfo: Name or service not known', Exception => e
358
- nrRetries += 1
359
- puts e.backtrace
360
- if nrRetries <= 3
361
- puts "get_file did not work reinit session and retry for #{nr}. nrRetries #{nrRetries}/3. e #{e}"
362
- sleep 60 # Sleep a minute to let network recover
363
- init_swissreg
364
- retry
365
- else
366
- puts "get_file did not work reinit session raise Interrupt"
367
- raise Interrupt
368
- end
369
- end
370
- body.force_encoding('utf-8') unless /^1\.8/.match(RUBY_VERSION)
371
- doc = Nokogiri::Slop(body)
372
- writeResponse(filename)
373
- end
374
- marke = Swissreg::getMarkenInfoFromDetail(doc)
375
- @results << marke
376
- end
377
-
378
- def Swissreg::emitCsv(results, filename='ausgabe.csv')
379
- return if results == nil or results.size == 0
380
- if /^1\.8/.match(RUBY_VERSION)
381
- ausgabe = File.open(filename, 'w+')
382
- # Write header
383
- s=''
384
- results[0].members.each { |member| s += member + ';' }
385
- ausgabe.puts s.chop
386
- # write all line
387
- results.each{
388
- |result|
389
- s = ''
390
- result.members.each{ |member|
391
- unless eval("result.#{member}")
392
- s += ';'
393
- else
394
- value = eval("result.#{member.to_s}")
395
- value = "\"#{value}\"" if value.index(';')
396
- s += value + ';'
397
- end
398
- }
399
- ausgabe.puts s.chop
400
- }
401
- ausgabe.close
402
- else
403
-
404
- CSV.open(filename, 'w', :headers=>results[0].members,
405
- :write_headers => true,
406
- :col_sep => ';'
407
- ) do |csv| results.each{ |x| csv << x }
408
- end
409
- end
410
- end
411
-
412
- def Swissreg::getTrademarkNumbers(doc)
413
- trademark_numbers = []
414
- doc.search('a').each{
415
- |link|
416
- if DetailRegexp.match(link.attribute('id'))
417
- trademark_numbers << link.children.first.children.first.content
418
- end
419
- }
420
- trademark_numbers
421
- end
422
-
423
- class Swissreg::Vereinfachte
424
- attr_reader :links2details, :trademark_search_id, :inputData, :firstHit, :nrHits, :nrSubPages, :pageNr
425
- HitRegexpDE = /Seite (\d*) von ([\d']*) - Treffer ([\d']*)-([\d']*) von ([\d']*)/
426
- Vivian = 'id_swissreg:mainContent:vivian'
427
-
428
- # Parse a HTML page from swissreg sr3.jsp
429
- # There we find info like "Seite 1 von 26 - Treffer 1-250 von 6'349" and upto 250 links to details
430
- def initialize(doc)
431
- @inputData = []
432
- @pageNr = @nrSubPages = @firstHit = @nrHits = 0
433
- m = HitRegexpDE.match(doc.text)
434
- if m
435
- begin
436
- c = m.to_a.map{|n| n.gsub(/'/, "").to_i }
437
- @pageNr = c[1]
438
- @nrSubPages = c[2]
439
- @firstHit = c[3]
440
- @nrHits = c[5]
441
- rescue NoMethodError
442
- end
443
- end
444
- @trademark_search_id = Swissreg::inputValue(Swissreg::getInputValuesFromPage(doc), Vivian)
445
- @links2details = []
446
- doc.search('input').each{ |input|
447
- # puts "name: #{input.attribute('name')} value #{input.attribute('value')}" if $VERBOSE
448
- @inputData << [ input.attribute('name').to_s, input.attribute('value').to_s ]
449
- }
450
-
451
- @state = Swissreg::inputValue(Swissreg::getInputValuesFromPage(doc), 'javax.faces.ViewState')
452
- doc.search('a').each{
453
- |link|
454
- if m = DetailRegexp.match(link.attribute('id'))
455
- # puts "XXX #{link.attribute('onclick').to_s} href: #{link.attribute('href').to_s} value #{link.attribute('value').to_s}" if $VERBOSE
456
- m = /'tmMainId','(\d*)'/.match(link.attribute('onclick').to_s)
457
- tmMainId = m[1].to_i
458
- @links2details << tmMainId
459
- end
460
- }
461
- end
462
-
463
- def getPostDataForDetail(position, id)
464
- [
465
- [ "autoScroll", "0,0"],
466
- [ "id_swissreg:mainContent:sub_options_result:sub_fieldset:cbxHitsPerPage", "#{HitsPerPage}"],
467
- [ "id_swissreg:mainContent:vivian", @trademark_search_id],
468
- [ "id_swissreg_SUBMIT", "1"],
469
- [ "id_swissreg:_idcl", "id_swissreg:mainContent:data:#{position}:tm_no_detail:id_detail", ""],
470
- [ "id_swissreg:mainContent:scroll_1", ""],
471
- [ "tmMainId", "#{id}"],
472
- [ "id_swissreg:_link_hidden_ "],
473
- [ "javax.faces.ViewState", @state]
474
- ]
475
- end
476
-
477
- def getPostDataForSubpage(pageNr)
478
- [
479
- [ "autoScroll", "0,0"],
480
- [ "id_swissreg:mainContent:sub_options_result:sub_fieldset:cbxHitsPerPage", "#{HitsPerPage}"],
481
- [ "id_swissreg:mainContent:vivian", @trademark_search_id],
482
- [ "id_swissreg_SUBMIT", "1"],
483
- [ "id_swissreg:_idcl", "id_swissreg:mainContent:scroll_1idx#{pageNr}"],
484
- [ "id_swissreg:mainContent:scroll_1", "idx#{pageNr}"],
485
- [ "tmMainId", ""],
486
- [ "id_swissreg:_link_hidden_ "],
487
- [ "javax.faces.ViewState", @state]
488
- ]
489
- end
490
-
491
- end
492
-
493
- def getAllHits(filename = nil, pageNr = 1)
494
- if filename && File.exists?(filename)
495
- doc = Nokogiri::Slop(File.open(filename))
496
- else
497
- form = @agent.page.form
498
- btn = form.buttons.last
499
- if btn && btn.name == "id_swissreg:mainContent:id_show_simple_view_hitlist"
500
- res = @agent.submit(form, btn)
501
- body = res.body
502
- else
503
- body = @agent.page.body
504
- end
505
- body.force_encoding('utf-8') unless /^1\.8/.match(RUBY_VERSION)
506
- doc = Nokogiri::Slop(body)
507
- filename = "#{LogDir}/vereinfachte_#{pageNr}.html"
508
- writeResponse(filename)
509
- end
510
- einfach = Swissreg::Vereinfachte.new(doc)
511
- puts "#{Time.now.strftime("%H:%M:%S")} status: getAllHits for #{pageNr} of #{einfach.nrSubPages} pages" if $VERBOSE
512
- subPage2Fetch = pageNr + 1
513
- data2 = einfach.getPostDataForSubpage(subPage2Fetch).clone
514
- if (HitsPerPage < einfach.nrHits - einfach.firstHit)
515
- itemsToFetch = HitsPerPage
516
- else
517
- itemsToFetch = einfach.nrHits - einfach.firstHit
518
- end
519
- @all_trademark_numbers += Swissreg::getTrademarkNumbers(doc)
520
-
521
- filename = "#{LogDir}/vereinfachte_#{pageNr}_back.html"
522
- writeResponse(filename)
523
- if pageNr < (einfach.nrSubPages)
524
- Swissreg::setAllInputValue(@agent.page.forms.first, data2)
525
- @agent.page.forms.first.submit
526
- getAllHits(nil, subPage2Fetch)
527
- end
528
- @all_trademark_numbers
529
- end
530
-
531
- def fetchresult(filename = "#{LogDir}/fetch_1.html", counter = 1)
532
- if filename && File.exists?(filename)
533
- doc = Nokogiri::Slop(File.open(filename))
534
- else
535
- body = @agent.page.body
536
- body.force_encoding('utf-8') unless /^1\.8/.match(RUBY_VERSION)
537
- doc = Nokogiri::Slop(body)
538
- writeResponse(filename)
539
- end
540
-
541
- if /Vereinfachte Trefferliste anzeigen/i.match(doc.text)
542
- form = @agent.page.forms.first
543
- button = form.button_with(:value => /Vereinfachte/i)
544
- # submit the form using that button
545
- @agent.submit(form, button)
546
- filename = "#{LogDir}/vereinfacht.html"
547
- writeResponse(filename)
548
- end
549
- getAllHits(doc, counter)
550
- puts"getAllHits: returned #{@all_trademark_numbers ? @all_trademark_numbers.size : 0} hits "
551
- if @all_trademark_numbers
552
- @all_trademark_numbers.each{
553
- |nr|
554
- nrRetries = 0
555
- begin
556
- fetchDetails(nr)
557
- rescue SocketError, Exception => e
558
- nrRetries += 1
559
- puts e.backtrace
560
- if nrRetries <= 3
561
- puts "fetchDetails did not work reinit session and retry for #{nr}. nrRetries #{nrRetries}/3. e #{e}"
562
- sleep 60 # Sleep a minute to let network recover
563
- init_swissreg
564
- retry
565
- else
566
- puts "fetchDetails did not work reinit session raise Interrupt"
567
- raise Interrupt
568
- end
569
- end
570
-
571
- }
572
- else
573
- puts "Could not find any trademarks in #{filename}"
574
- end
575
- end
576
- end # class Swissreg
577
-
578
- def Brand2csv::run(timespan, marke = 'a*', swiss_only = false)
579
- session = Swissreg.new(timespan, marke, swiss_only)
580
- begin
581
- session.parse_swissreg
582
- session.fetchresult
583
- rescue Interrupt, Net::HTTP::Persistent::Error
584
- puts "Unterbrochen. Vesuche #{session.results.size} Resultate zu speichern"
585
- end
586
- Swissreg::emitCsv(session.results, "#{timespan}.csv")
587
- session.results
588
- end
589
-
590
- end # module Brand2csv