medreg 0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,37 @@
1
+ #!/usr/bin/env ruby
2
+ # encoding: utf-8
3
+ require 'fileutils'
4
+ require 'medreg/company_importer'
5
+ require 'medreg/person_importer'
6
+
7
+ module Medreg
8
+ ARCHIVE_PATH = File.expand_path(File.join(File.dirname(__FILE__), '../../data'))
9
+ LOG_PATH = File.expand_path(File.join(File.dirname(__FILE__), '../../log'))
10
+ Mechanize_Log = File.join(LOG_PATH, File.basename(__FILE__).sub('.rb', '.log'))
11
+ FileUtils.mkdir_p(LOG_PATH)
12
+ FileUtils.mkdir_p(ARCHIVE_PATH)
13
+ FileUtils.mkdir_p(File.dirname(Mechanize_Log))
14
+ ID = File.basename($0, '.rb')
15
+
16
+ def Medreg.log(msg)
17
+ $stdout.puts "#{Time.now}: #{ID} #{msg}" # unless defined?(Minitest)
18
+ $stdout.flush
19
+ @@logfile ||= File.open(File.join(LOG_PATH, "#{ID}.log"), 'a+')
20
+ @@logfile.puts "#{Time.now}: #{msg}"
21
+ end
22
+
23
+ def Medreg.run(only_run=false)
24
+ Medreg.log("Starting with only_run #{only_run}")
25
+ import_company = (not only_run or only_run.match(/compan/i))
26
+ import_person = (not only_run or only_run.match(/person/i))
27
+ if import_company
28
+ importer = Medreg::CompanyImporter.new
29
+ importer.update
30
+ end
31
+ if import_person
32
+ importer = Medreg::PersonImporter.new
33
+ importer.update
34
+ end
35
+ Medreg.log("Finished.")
36
+ end
37
+ end
@@ -0,0 +1,45 @@
1
+ #!/usr/bin/env ruby
2
+ # encoding: utf-8
3
+
4
+ $: << File.expand_path("../../src", File.dirname(__FILE__))
5
+
6
+ require 'medreg'
7
+ require 'medreg/address'
8
+ module Medreg
9
+ class Person
10
+ attr_accessor :capabilities, :title, :name, :firstname,
11
+ :email, :exam, :language, :specialities,
12
+ :praxis, :member, :salutation,
13
+ :origin_db, :origin_id, :addresses, :ean13,
14
+ :dummy_id,
15
+ :experiences,
16
+ :may_dispense_narcotics, :may_sell_drugs, :remark_sell_drugs
17
+ alias :name_first :firstname
18
+ alias :name_first= :firstname=
19
+ alias :correspondence :language
20
+ alias :correspondence= :language=
21
+
22
+ def initialize
23
+ @addresses = []
24
+ @experiences = []
25
+ end
26
+ def fullname
27
+ [@firstname, @name].join(' ')
28
+ end
29
+ def praxis_address
30
+ @addresses.find { |addr|
31
+ addr.type == 'at_praxis'
32
+ }
33
+ end
34
+ def praxis_addresses
35
+ @addresses.select { |addr|
36
+ addr.type == 'at_praxis'
37
+ }
38
+ end
39
+ def work_addresses
40
+ @addresses.select { |addr|
41
+ addr.type == 'at_work'
42
+ }
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,417 @@
1
+ #!/usr/bin/env ruby
2
+ # encoding: utf-8
3
+
4
+ $: << File.expand_path("../../src", File.dirname(__FILE__))
5
+
6
+ require 'medreg'
7
+ require 'medreg/address'
8
+ require 'medreg/person'
9
+ #require 'util/resilient_loop'
10
+ require 'rubyXL'
11
+ require 'mechanize'
12
+ require 'logger'
13
+ require 'cgi'
14
+ require 'psych' if RUBY_VERSION.match(/^1\.9/)
15
+ require "yaml"
16
+ require 'timeout'
17
+
18
+ module Medreg
19
+ DebugImport = false
20
+ Personen_Candidates = File.expand_path(File.join(__FILE__, '../../../data/Personen_20*.xlsx'))
21
+ Personen_YAML = File.expand_path(File.join(__FILE__, "../../../data/persons_#{Time.now.strftime('%Y.%m.%d-%H%M')}.yaml"))
22
+ Personen_CSV = File.expand_path(File.join(__FILE__, "../../../data/persons_#{Time.now.strftime('%Y.%m.%d-%H%M')}.csv"))
23
+ MedRegOmURL = 'http://www.medregom.admin.ch/'
24
+ MedRegPerson_XLS_URL = "https://www.medregbm.admin.ch/Publikation/CreateExcelListMedizinalPersons"
25
+ PersonInfo = Struct.new("PersonInfo",
26
+ :gln,
27
+ # :exam,
28
+ :address,
29
+ :family_name,
30
+ :first_name,
31
+ :addresses,
32
+ :authority,
33
+ :diploma,
34
+ :may_dispense_narcotics,
35
+ :may_sell_drugs,
36
+ :remark_sell_drugs,
37
+ )
38
+ # GLN Person Name Vorname PLZ Ort Bewilligungskanton Land Diplom BTM Berechtigung Bewilligung Selbstdispensation Bemerkung Selbstdispensation
39
+ COL = {
40
+ :gln => 0, # A
41
+ :family_name => 1, # B
42
+ :first_name => 2, # C
43
+ :zip_code => 3, # D
44
+ :place => 4, # E
45
+ :authority => 5, # F
46
+ :country => 6, # G
47
+ :diploma => 7, # H
48
+ :may_dispense_narcotics => 8, # I
49
+ :may_sell_drugs => 9, # J
50
+ :remark_sell_drugs => 10, # K
51
+ }
52
+ class PersonImporter
53
+ RECIPIENTS = []
54
+
55
+ def save_for_log(msg)
56
+ Medreg.log(msg)
57
+ withTimeStamp = "#{Time.now.strftime('%Y-%m-%d %H:%M:%S')}: #{msg}" unless defined?(MiniTest)
58
+ @@logInfo << withTimeStamp
59
+ end
60
+ def initialize(app=nil, glns_to_import = [])
61
+ @glns_to_import = glns_to_import.clone
62
+ @glns_to_import.delete_if {|item| item.size == 0}
63
+ @info_to_gln = {}
64
+ @@logInfo = []
65
+ FileUtils.rm_f(Personen_YAML) if File.exists?(Personen_YAML)
66
+ FileUtils.rm_f(Personen_CSV) if File.exists?(Personen_CSV)
67
+ FileUtils.mkdir_p(File.dirname(Personen_YAML))
68
+ @yaml_file = File.open(Personen_YAML, 'w+')
69
+ @csv_file = File.open(Personen_CSV, 'w+')
70
+ @persons_created = 0
71
+ @persons_updated = 0
72
+ @persons_skipped = 0
73
+ @persons_deleted = 0
74
+ @skip_to_doctor = nil
75
+ @archive = ARCHIVE_PATH
76
+ @@all_doctors = {}
77
+ setup_default_agent unless setup_default_agent
78
+ end
79
+ def write_csv_file
80
+ CSV.open(Personen_CSV, "wb") do |csv|
81
+ csv << ["ean13", "name", "firstname", "may_dispense_narcotics", "remark_sell_drugs", "specialities", "capabilities",
82
+ "address_additional_lines",
83
+ "address_canton",
84
+ "address_fax",
85
+ "address_fon",
86
+ "address_location",
87
+ "address_revision",
88
+ "address_type",
89
+ "address_revision",
90
+ ]
91
+ csv << ["another", "row"]
92
+ @@all_doctors.each{ |doctor| csv << [] }
93
+ end
94
+ end
95
+ def update
96
+ saved = @glns_to_import.clone
97
+ latest = get_latest_file
98
+ save_for_log "parse_xls #{latest} specified GLN glns #{saved.inspect}"
99
+ parse_xls(latest)
100
+ @info_to_gln.keys
101
+ get_detail_to_glns(saved.size > 0 ? saved : @glns_to_import)
102
+ return @persons_created, @persons_updated, @persons_deleted, @persons_skipped
103
+ ensure
104
+ # write_csv_file
105
+ File.open(Personen_YAML, 'w+') {|f| f.write(@@all_doctors.to_yaml) }
106
+ save_for_log "Saved #{@@all_doctors.size} doctors in #{Personen_YAML}"
107
+ end
108
+ def setup_default_agent
109
+ @agent = Mechanize.new
110
+ @agent.user_agent = 'Mozilla/5.0 (X11; Linux x86_64; rv:31.0) Gecko/20100101 Firefox/31.0 Iceweasel/31.1.0'
111
+ @agent.redirect_ok = :all
112
+ @agent.follow_meta_refresh_self = true
113
+ @agent.follow_meta_refresh = :everwhere
114
+ @agent.redirection_limit = 55
115
+ @agent.follow_meta_refresh = true
116
+ @agent.ignore_bad_chunking = true
117
+ if defined?(MiniTest) then @agent.log = Logger.new Mechanize_Log end
118
+ @agent
119
+ end
120
+
121
+ def parse_details(doc, gln, info)
122
+ unless doc.xpath("//tr") and doc.xpath("//tr").size > 3
123
+ Medreg.log "ERROR: Could not find a table with info for #{gln}"
124
+ return nil
125
+ end
126
+ doc_hash = Hash.new
127
+ doc_hash[:ean13] = gln.to_s.clone
128
+ doc_hash[:name] = info.family_name
129
+ doc_hash[:firstname] = info.first_name
130
+ doc_hash[:may_dispense_narcotics] = (info.may_dispense_narcotics && info.may_dispense_narcotics.match(/ja/i)) ? true : false
131
+ doc_hash[:may_sell_drugs] = (info.may_sell_drugs && info.may_sell_drugs.match(/ja/i)) ? true : false
132
+ doc_hash[:remark_sell_drugs] = info.remark_sell_drugs
133
+ idx_beruf = nil; 0.upto(doc.xpath("//tr").size) { |j| if doc.xpath("//tr")[j].text.match(/^\s*Beruf\r\n/) then idx_beruf = j; break; end }
134
+ idx_titel = nil; 0.upto(doc.xpath("//tr").size) { |j| if doc.xpath("//tr")[j].text.match(/^\s*Weiterbildungstitel/) then idx_titel = j; break; end }
135
+ idx_privat = nil; 0.upto(doc.xpath("//tr").size) { |j| if doc.xpath("//tr")[j].text.match(/^\s*Weitere Qualifikationen/) then idx_privat = j; break; end }
136
+ # doc_hash[:exam] = doc.xpath("//tr")[idx_beruf+1].text.strip.split(/\r\n|\n/)[1].to_i
137
+ # Jahr des Staatsexamen wird nicht angezeigt!!
138
+ specialities = []
139
+ (idx_titel+1).upto(idx_privat-1).each{
140
+ |j|
141
+ line = doc.xpath("//tr")[j].text ;
142
+ unless line.match(/Keine Angaben vorhanden/)
143
+ line = line.gsub("\r\n", '')
144
+ specialities << string_to_qualification(line, gln)
145
+ end
146
+ }
147
+ doc_hash[:specialities] = specialities
148
+ capabilities = []
149
+ (idx_privat+1).upto(99).each{
150
+ |j|
151
+ next unless doc.xpath("//tr")[j]
152
+ line = doc.xpath("//tr")[j].text ;
153
+ unless line.match(/Keine Angaben vorhanden/)
154
+ capabilities << string_to_qualification(line, gln)
155
+ end
156
+ }
157
+ doc_hash[:capabilities] = capabilities
158
+ addresses = get_detail_info(info, doc)
159
+ doc_hash[:addresses] = addresses
160
+ doc_hash
161
+ end
162
+
163
+ def get_one_doctor(r_loop, gln)
164
+ maxSeconds = defined?(Minitest) ? 3600 : 120
165
+ r_loop.try_run(gln, maxSeconds) do # increase timeout from default of 10 seconds. Measured 46 seconds for the first gln
166
+ if @@all_doctors[gln.to_s]
167
+ Medreg.log "ERROR: Skip search GLN #{gln} as already found"
168
+ next
169
+ end
170
+ info = @info_to_gln[gln.to_s]
171
+ unless info
172
+ Medreg.log "ERROR: could not find info for GLN #{gln}"
173
+ next
174
+ end
175
+ url = MedRegOmURL + "de/Suche/Detail/?gln=#{gln}&vorname=#{info.first_name.gsub(/ /, '+')}&name=#{info.family_name.gsub(/ /, '+')}"
176
+ page_1 = @agent.get(url)
177
+ data_2 = [
178
+ ['Name', info.family_name],
179
+ ['Vorname', info.first_name],
180
+ ['Gln', gln.to_s],
181
+ ['AutomatischeSuche', 'True'],
182
+ ]
183
+ page_2 = @agent.post(MedRegOmURL + 'Suche/GetSearchCount', data_2)
184
+
185
+ data_3 = [
186
+ ['currentpage', '1'],
187
+ ['pagesize', '10'],
188
+ ['sortfield', ''],
189
+ ['sortorder', 'Ascending'],
190
+ ['pageraction', ''],
191
+ ['filter', ''],
192
+ ]
193
+ page_3 = @agent.post(MedRegOmURL + 'Suche/GetSearchData', data_3)
194
+ data_4 = [
195
+ ['Name', info.family_name],
196
+ ['Vorname', info.first_name],
197
+ ['Gln', gln.to_s],
198
+ ['AutomatischeSuche', 'True'],
199
+ ['currentpage', '1'],
200
+ ['pagesize', '10'],
201
+ ['sortfield', ''],
202
+ ['sortorder', 'Ascending'],
203
+ ['pageraction', ''],
204
+ ['filter', ''],
205
+ ]
206
+ page_4 = @agent.post(MedRegOmURL + 'Suche/GetSearchData', data_4)
207
+ regExp = /id"\:(\d\d+)/i
208
+ unless page_4.body.match(regExp)
209
+ File.open(File.join(LOG_PATH, 'page_4.body'), 'w+') { |f| f.write page_4.body }
210
+ Medreg.log "ERROR: Could not find an gln #{gln} via url #{url}"
211
+ next
212
+ end
213
+ medregId = page_4.body.match(regExp)[1]
214
+ page_5 = @agent.get(MedRegOmURL + "de/Detail/Detail?pid=#{medregId}")
215
+ File.open(File.join(LOG_PATH, "#{gln}.html"), 'w+') { |f| f.write page_5.content } if DebugImport
216
+ doc_hash = parse_details( Nokogiri::HTML(page_5.content), gln, info)
217
+ store_doctor(doc_hash)
218
+ @@all_doctors[gln.to_s] = doc_hash
219
+ end
220
+ end
221
+ def get_detail_to_glns(glns)
222
+ max_retries = 100
223
+ @idx = 0
224
+ r_loop = ResilientLoop.new(File.basename(__FILE__, '.rb'))
225
+ @skip_to_doctor ||= r_loop.state_id
226
+ Medreg.log "get_detail_to_glns #{glns.size}. first 10 are #{glns[0..9]} state_id is #{r_loop.state_id.inspect}" if DebugImport
227
+ glns.each { |gln|
228
+ if r_loop.must_skip?(gln.to_s)
229
+ Medreg.log "Skipping #{gln.inspect}. Waiting for #{r_loop.state_id.inspect}" if DebugImport
230
+ @persons_skipped += 1
231
+ next
232
+ end
233
+ @idx += 1
234
+ nr_tries = 0
235
+ while nr_tries < max_retries
236
+ begin
237
+ Medreg.log "Searching for doctor with GLN #{gln}. Skipped #{@persons_skipped}, created #{@persons_created} updated #{@persons_updated} of #{glns.size}).#{nr_tries > 0 ? ' nr_tries is ' + nr_tries.to_s : ''}"
238
+ get_one_doctor(r_loop, gln)
239
+ break
240
+ rescue Mechanize::ResponseCodeError, Timeout::Error => e
241
+ raise e if defined?(MiniTest)
242
+ nr_tries += 1
243
+ Medreg.log "rescue Mechanize::ResponseCodeError #{gln.inspect}. nr_tries #{nr_tries}"
244
+ sleep(10 * 60) # wait 10 minutes till medreg server is back again
245
+ rescue StandardError => e
246
+ raise e if defined?(MiniTest)
247
+ nr_tries += 1
248
+ Medreg.log "rescue Mechanize::ResponseCodeError #{gln.inspect}. nr_tries #{nr_tries} error was e #{e}"
249
+ sleep(10 * 60) # wait 10 minutes till medreg server is back again
250
+ end
251
+ end
252
+ raise "Max retries #{nr_tries} for #{gln.to_s} reached. Aborting import" if nr_tries == max_retries
253
+ @persons_created += 1
254
+ if (@persons_created + @persons_updated) % 100 == 99
255
+ Medreg.log "Start saving after #{@persons_created} created #{@persons_updated} updated"
256
+ end
257
+ }
258
+ r_loop.finished
259
+ end
260
+ def get_detail_info(info, doc)
261
+ text = doc.xpath('//div').text
262
+ m = text.match(/Nationalität:\s*([Ö\w+])[^:]+:\s+(\d+)/) # Special case Österreich
263
+ unless m and m[2] == info.gln.to_s
264
+ File.open(File.join(LOG_PATH, 'doc_div.txt'), 'w+') { |f| f.write text }
265
+ Medreg.log "ERROR: Id in text does not match #{info.gln } match was #{m.inspect}"
266
+ return []
267
+ end
268
+ addresses = []
269
+ nrAdresses = doc.xpath('//ol/li/div').size
270
+ 0.upto(nrAdresses-1).each {
271
+ |idx|
272
+ lines = []
273
+ doc.xpath('//ol/li/div')[idx].children.each{ |x| lines << x.text }
274
+ address = Address2.new
275
+ address.fon = []
276
+ address.fax = []
277
+ address.type = 'at_praxis'
278
+ address.additional_lines = []
279
+ address.canton = info.authority
280
+ address.name = lines[0]
281
+ lines[1].sub!(/^[A-Z]\. /, '')
282
+ lines[1..-1].each { |line|
283
+ if /^Telefon: /.match(line)
284
+ address.fon << line.split('Telefon: ')[1].gsub(/\-/, ' ')
285
+ next
286
+ elsif /^Fax: /.match(line)
287
+ address.fax << line.split('Fax: ')[1].gsub(/\-/, ' ')
288
+ next
289
+ else
290
+ next if line.length == 0
291
+ if m = line.match(/(|\w\w[-\. ])(\d{4})\s+(\S+)/)
292
+ address.location = line
293
+ else
294
+ address.additional_lines << line
295
+ end
296
+ end
297
+ }
298
+ addresses << address
299
+ }
300
+ addresses
301
+ end
302
+ def get_latest_file
303
+ agent = Mechanize.new
304
+ target = File.join @archive, Time.now.strftime("persons_%Y.%m.%d.xlsx")
305
+ needs_update = true
306
+ save_for_log "get_latest_file target #{target} #{File.exist?(target)} from URL #{MedRegPerson_XLS_URL}"
307
+ return target if File.exist?(target)
308
+ @download = nil
309
+ begin
310
+ file = agent.get(MedRegPerson_XLS_URL)
311
+ @download = file.body
312
+ rescue Net::HTTP::Persistent::Error, Timeout::Error => e
313
+ Medreg.log "Catched error #{e}"
314
+ search_name = File.join @archive, Time.now.strftime("persons_%Y*.xlsx")
315
+ candidates = Dir.glob(search_name)
316
+ if candidates.size == 0
317
+ save_for_log "getting file from MedRegPerson_XLS_URL failed. Could not find any prior downloads via #{search_name}"
318
+ raise e
319
+ end
320
+ best = candidates.max_by {|f| File.mtime(f)}
321
+ save_for_log "getting file from MedRegPerson_XLS_URL failed. Using #{best} #{File.mtime(best)} #{File.size(best)} bytes"
322
+ @download = IO.read(best)
323
+ end
324
+ File.open(target, 'w+') { |f| f.write @download }
325
+ @download = nil # release it
326
+ target
327
+ end
328
+ def report
329
+ report = "Persons update \n\n"
330
+ report << "Skipped doctors: #{@persons_skipped}#{@skip_to_doctor ? '. Waited for ' + @skip_to_doctor.to_s : ''}" << "\n"
331
+ report << "New doctors: " << @persons_created.to_s << "\n"
332
+ report << "Updated doctors: " << @persons_updated.to_s << "\n"
333
+ report << "Deleted doctors: " << @persons_deleted.to_s << "\n"
334
+ report
335
+ end
336
+ def store_doctor(hash)
337
+ return unless hash
338
+ action = nil
339
+ pointer = nil
340
+ doctor = Person.new
341
+ doctor.ean13 = hash[:ean13]
342
+ extract = [
343
+ :ean13,
344
+ # :exam,
345
+ :email,
346
+ :firstname,
347
+ :language,
348
+ :name,
349
+ :praxis,
350
+ :salutation,
351
+ :specialities,
352
+ :capabilities,
353
+ :title,
354
+ :addresses,
355
+ :may_dispense_narcotics,
356
+ :may_sell_drugs,
357
+ :remark_sell_drugs,
358
+ ]
359
+ doc_hash = {}
360
+ extract.each { |key|
361
+ if(value = hash[key])
362
+ case key
363
+ when :praxis
364
+ value = (value == 'Ja')
365
+ when :specialities, :capabilities
366
+ if(value.is_a?(String))
367
+ value = [value]
368
+ elsif(value.is_a?(Array))
369
+ value = value
370
+ end
371
+ end
372
+ doc_hash.store(key, value)
373
+ end
374
+
375
+ }
376
+ end
377
+ def parse_xls(path)
378
+ Medreg.log "parsing #{path}"
379
+ workbook = RubyXL::Parser.parse(path)
380
+ positions = []
381
+ rows = 0
382
+ workbook[0].each do |row|
383
+ next unless row and row[COL[:gln]]
384
+ rows += 1
385
+ if rows > 1
386
+ info = PersonInfo.new
387
+ [:gln, :family_name, :first_name, :authority, :diploma, :may_dispense_narcotics, :may_sell_drugs,:remark_sell_drugs].each {
388
+ |field|
389
+ cmd = "info.#{field} = row[COL[#{field.inspect}]] ? row[COL[#{field.inspect}]].value : nil"
390
+ eval(cmd)
391
+ }
392
+ @info_to_gln[row[COL[:gln]].value] = info
393
+ end
394
+ end
395
+ @glns_to_import = @info_to_gln.keys.sort.uniq
396
+ end
397
+
398
+ # just for debugging when running unit tests
399
+ def Person.all_doctors
400
+ @@all_doctors
401
+ end
402
+ private
403
+ Match_qualification_with_austria = /(.*)\s+(\d+)\s+([Ö\w]+)/
404
+ def string_to_qualification(line, gln)
405
+ return nil if line.match(/Weiterbildungstitel|Weitere Qualifikationen|Beruf.*Jahr.*Land/im)
406
+ m = line.match(Match_qualification_with_austria)
407
+ if m
408
+ infos = m[1..3].join(',').gsub("\r","").gsub(/\s\s+/, ' ').strip.split(/ ,|,/)
409
+ # infos[1] = infos[1].to_i # transform year into an integer
410
+ return infos.join(', ')
411
+ else
412
+ Medreg.log "PROBLEM: could not find speciality for GLN #{gln} in line '#{line}'"
413
+ end
414
+ nil
415
+ end
416
+ end
417
+ end