medreg 0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,37 @@
1
+ #!/usr/bin/env ruby
2
+ # encoding: utf-8
3
+ require 'fileutils'
4
+ require 'medreg/company_importer'
5
+ require 'medreg/person_importer'
6
+
7
+ module Medreg
8
+ ARCHIVE_PATH = File.expand_path(File.join(File.dirname(__FILE__), '../../data'))
9
+ LOG_PATH = File.expand_path(File.join(File.dirname(__FILE__), '../../log'))
10
+ Mechanize_Log = File.join(LOG_PATH, File.basename(__FILE__).sub('.rb', '.log'))
11
+ FileUtils.mkdir_p(LOG_PATH)
12
+ FileUtils.mkdir_p(ARCHIVE_PATH)
13
+ FileUtils.mkdir_p(File.dirname(Mechanize_Log))
14
+ ID = File.basename($0, '.rb')
15
+
16
+ def Medreg.log(msg)
17
+ $stdout.puts "#{Time.now}: #{ID} #{msg}" # unless defined?(Minitest)
18
+ $stdout.flush
19
+ @@logfile ||= File.open(File.join(LOG_PATH, "#{ID}.log"), 'a+')
20
+ @@logfile.puts "#{Time.now}: #{msg}"
21
+ end
22
+
23
+ def Medreg.run(only_run=false)
24
+ Medreg.log("Starting with only_run #{only_run}")
25
+ import_company = (not only_run or only_run.match(/compan/i))
26
+ import_person = (not only_run or only_run.match(/person/i))
27
+ if import_company
28
+ importer = Medreg::CompanyImporter.new
29
+ importer.update
30
+ end
31
+ if import_person
32
+ importer = Medreg::PersonImporter.new
33
+ importer.update
34
+ end
35
+ Medreg.log("Finished.")
36
+ end
37
+ end
@@ -0,0 +1,45 @@
1
+ #!/usr/bin/env ruby
2
+ # encoding: utf-8
3
+
4
+ $: << File.expand_path("../../src", File.dirname(__FILE__))
5
+
6
+ require 'medreg'
7
+ require 'medreg/address'
8
+ module Medreg
9
+ class Person
10
+ attr_accessor :capabilities, :title, :name, :firstname,
11
+ :email, :exam, :language, :specialities,
12
+ :praxis, :member, :salutation,
13
+ :origin_db, :origin_id, :addresses, :ean13,
14
+ :dummy_id,
15
+ :experiences,
16
+ :may_dispense_narcotics, :may_sell_drugs, :remark_sell_drugs
17
+ alias :name_first :firstname
18
+ alias :name_first= :firstname=
19
+ alias :correspondence :language
20
+ alias :correspondence= :language=
21
+
22
+ def initialize
23
+ @addresses = []
24
+ @experiences = []
25
+ end
26
+ def fullname
27
+ [@firstname, @name].join(' ')
28
+ end
29
+ def praxis_address
30
+ @addresses.find { |addr|
31
+ addr.type == 'at_praxis'
32
+ }
33
+ end
34
+ def praxis_addresses
35
+ @addresses.select { |addr|
36
+ addr.type == 'at_praxis'
37
+ }
38
+ end
39
+ def work_addresses
40
+ @addresses.select { |addr|
41
+ addr.type == 'at_work'
42
+ }
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,417 @@
1
+ #!/usr/bin/env ruby
2
+ # encoding: utf-8
3
+
4
+ $: << File.expand_path("../../src", File.dirname(__FILE__))
5
+
6
+ require 'medreg'
7
+ require 'medreg/address'
8
+ require 'medreg/person'
9
+ #require 'util/resilient_loop'
10
+ require 'rubyXL'
11
+ require 'mechanize'
12
+ require 'logger'
13
+ require 'cgi'
14
+ require 'psych' if RUBY_VERSION.match(/^1\.9/)
15
+ require "yaml"
16
+ require 'timeout'
17
+
18
+ module Medreg
19
+ DebugImport = false
20
+ Personen_Candidates = File.expand_path(File.join(__FILE__, '../../../data/Personen_20*.xlsx'))
21
+ Personen_YAML = File.expand_path(File.join(__FILE__, "../../../data/persons_#{Time.now.strftime('%Y.%m.%d-%H%M')}.yaml"))
22
+ Personen_CSV = File.expand_path(File.join(__FILE__, "../../../data/persons_#{Time.now.strftime('%Y.%m.%d-%H%M')}.csv"))
23
+ MedRegOmURL = 'http://www.medregom.admin.ch/'
24
+ MedRegPerson_XLS_URL = "https://www.medregbm.admin.ch/Publikation/CreateExcelListMedizinalPersons"
25
+ PersonInfo = Struct.new("PersonInfo",
26
+ :gln,
27
+ # :exam,
28
+ :address,
29
+ :family_name,
30
+ :first_name,
31
+ :addresses,
32
+ :authority,
33
+ :diploma,
34
+ :may_dispense_narcotics,
35
+ :may_sell_drugs,
36
+ :remark_sell_drugs,
37
+ )
38
+ # GLN Person Name Vorname PLZ Ort Bewilligungskanton Land Diplom BTM Berechtigung Bewilligung Selbstdispensation Bemerkung Selbstdispensation
39
+ COL = {
40
+ :gln => 0, # A
41
+ :family_name => 1, # B
42
+ :first_name => 2, # C
43
+ :zip_code => 3, # D
44
+ :place => 4, # E
45
+ :authority => 5, # F
46
+ :country => 6, # G
47
+ :diploma => 7, # H
48
+ :may_dispense_narcotics => 8, # I
49
+ :may_sell_drugs => 9, # J
50
+ :remark_sell_drugs => 10, # K
51
+ }
52
+ class PersonImporter
53
+ RECIPIENTS = []
54
+
55
+ def save_for_log(msg)
56
+ Medreg.log(msg)
57
+ withTimeStamp = "#{Time.now.strftime('%Y-%m-%d %H:%M:%S')}: #{msg}" unless defined?(MiniTest)
58
+ @@logInfo << withTimeStamp
59
+ end
60
+ def initialize(app=nil, glns_to_import = [])
61
+ @glns_to_import = glns_to_import.clone
62
+ @glns_to_import.delete_if {|item| item.size == 0}
63
+ @info_to_gln = {}
64
+ @@logInfo = []
65
+ FileUtils.rm_f(Personen_YAML) if File.exists?(Personen_YAML)
66
+ FileUtils.rm_f(Personen_CSV) if File.exists?(Personen_CSV)
67
+ FileUtils.mkdir_p(File.dirname(Personen_YAML))
68
+ @yaml_file = File.open(Personen_YAML, 'w+')
69
+ @csv_file = File.open(Personen_CSV, 'w+')
70
+ @persons_created = 0
71
+ @persons_updated = 0
72
+ @persons_skipped = 0
73
+ @persons_deleted = 0
74
+ @skip_to_doctor = nil
75
+ @archive = ARCHIVE_PATH
76
+ @@all_doctors = {}
77
+ setup_default_agent unless setup_default_agent
78
+ end
79
+ def write_csv_file
80
+ CSV.open(Personen_CSV, "wb") do |csv|
81
+ csv << ["ean13", "name", "firstname", "may_dispense_narcotics", "remark_sell_drugs", "specialities", "capabilities",
82
+ "address_additional_lines",
83
+ "address_canton",
84
+ "address_fax",
85
+ "address_fon",
86
+ "address_location",
87
+ "address_revision",
88
+ "address_type",
89
+ "address_revision",
90
+ ]
91
+ csv << ["another", "row"]
92
+ @@all_doctors.each{ |doctor| csv << [] }
93
+ end
94
+ end
95
+ def update
96
+ saved = @glns_to_import.clone
97
+ latest = get_latest_file
98
+ save_for_log "parse_xls #{latest} specified GLN glns #{saved.inspect}"
99
+ parse_xls(latest)
100
+ @info_to_gln.keys
101
+ get_detail_to_glns(saved.size > 0 ? saved : @glns_to_import)
102
+ return @persons_created, @persons_updated, @persons_deleted, @persons_skipped
103
+ ensure
104
+ # write_csv_file
105
+ File.open(Personen_YAML, 'w+') {|f| f.write(@@all_doctors.to_yaml) }
106
+ save_for_log "Saved #{@@all_doctors.size} doctors in #{Personen_YAML}"
107
+ end
108
+ def setup_default_agent
109
+ @agent = Mechanize.new
110
+ @agent.user_agent = 'Mozilla/5.0 (X11; Linux x86_64; rv:31.0) Gecko/20100101 Firefox/31.0 Iceweasel/31.1.0'
111
+ @agent.redirect_ok = :all
112
+ @agent.follow_meta_refresh_self = true
113
+ @agent.follow_meta_refresh = :everwhere
114
+ @agent.redirection_limit = 55
115
+ @agent.follow_meta_refresh = true
116
+ @agent.ignore_bad_chunking = true
117
+ if defined?(MiniTest) then @agent.log = Logger.new Mechanize_Log end
118
+ @agent
119
+ end
120
+
121
+ def parse_details(doc, gln, info)
122
+ unless doc.xpath("//tr") and doc.xpath("//tr").size > 3
123
+ Medreg.log "ERROR: Could not find a table with info for #{gln}"
124
+ return nil
125
+ end
126
+ doc_hash = Hash.new
127
+ doc_hash[:ean13] = gln.to_s.clone
128
+ doc_hash[:name] = info.family_name
129
+ doc_hash[:firstname] = info.first_name
130
+ doc_hash[:may_dispense_narcotics] = (info.may_dispense_narcotics && info.may_dispense_narcotics.match(/ja/i)) ? true : false
131
+ doc_hash[:may_sell_drugs] = (info.may_sell_drugs && info.may_sell_drugs.match(/ja/i)) ? true : false
132
+ doc_hash[:remark_sell_drugs] = info.remark_sell_drugs
133
+ idx_beruf = nil; 0.upto(doc.xpath("//tr").size) { |j| if doc.xpath("//tr")[j].text.match(/^\s*Beruf\r\n/) then idx_beruf = j; break; end }
134
+ idx_titel = nil; 0.upto(doc.xpath("//tr").size) { |j| if doc.xpath("//tr")[j].text.match(/^\s*Weiterbildungstitel/) then idx_titel = j; break; end }
135
+ idx_privat = nil; 0.upto(doc.xpath("//tr").size) { |j| if doc.xpath("//tr")[j].text.match(/^\s*Weitere Qualifikationen/) then idx_privat = j; break; end }
136
+ # doc_hash[:exam] = doc.xpath("//tr")[idx_beruf+1].text.strip.split(/\r\n|\n/)[1].to_i
137
+ # Jahr des Staatsexamen wird nicht angezeigt!!
138
+ specialities = []
139
+ (idx_titel+1).upto(idx_privat-1).each{
140
+ |j|
141
+ line = doc.xpath("//tr")[j].text ;
142
+ unless line.match(/Keine Angaben vorhanden/)
143
+ line = line.gsub("\r\n", '')
144
+ specialities << string_to_qualification(line, gln)
145
+ end
146
+ }
147
+ doc_hash[:specialities] = specialities
148
+ capabilities = []
149
+ (idx_privat+1).upto(99).each{
150
+ |j|
151
+ next unless doc.xpath("//tr")[j]
152
+ line = doc.xpath("//tr")[j].text ;
153
+ unless line.match(/Keine Angaben vorhanden/)
154
+ capabilities << string_to_qualification(line, gln)
155
+ end
156
+ }
157
+ doc_hash[:capabilities] = capabilities
158
+ addresses = get_detail_info(info, doc)
159
+ doc_hash[:addresses] = addresses
160
+ doc_hash
161
+ end
162
+
163
+ def get_one_doctor(r_loop, gln)
164
+ maxSeconds = defined?(Minitest) ? 3600 : 120
165
+ r_loop.try_run(gln, maxSeconds) do # increase timeout from default of 10 seconds. Measured 46 seconds for the first gln
166
+ if @@all_doctors[gln.to_s]
167
+ Medreg.log "ERROR: Skip search GLN #{gln} as already found"
168
+ next
169
+ end
170
+ info = @info_to_gln[gln.to_s]
171
+ unless info
172
+ Medreg.log "ERROR: could not find info for GLN #{gln}"
173
+ next
174
+ end
175
+ url = MedRegOmURL + "de/Suche/Detail/?gln=#{gln}&vorname=#{info.first_name.gsub(/ /, '+')}&name=#{info.family_name.gsub(/ /, '+')}"
176
+ page_1 = @agent.get(url)
177
+ data_2 = [
178
+ ['Name', info.family_name],
179
+ ['Vorname', info.first_name],
180
+ ['Gln', gln.to_s],
181
+ ['AutomatischeSuche', 'True'],
182
+ ]
183
+ page_2 = @agent.post(MedRegOmURL + 'Suche/GetSearchCount', data_2)
184
+
185
+ data_3 = [
186
+ ['currentpage', '1'],
187
+ ['pagesize', '10'],
188
+ ['sortfield', ''],
189
+ ['sortorder', 'Ascending'],
190
+ ['pageraction', ''],
191
+ ['filter', ''],
192
+ ]
193
+ page_3 = @agent.post(MedRegOmURL + 'Suche/GetSearchData', data_3)
194
+ data_4 = [
195
+ ['Name', info.family_name],
196
+ ['Vorname', info.first_name],
197
+ ['Gln', gln.to_s],
198
+ ['AutomatischeSuche', 'True'],
199
+ ['currentpage', '1'],
200
+ ['pagesize', '10'],
201
+ ['sortfield', ''],
202
+ ['sortorder', 'Ascending'],
203
+ ['pageraction', ''],
204
+ ['filter', ''],
205
+ ]
206
+ page_4 = @agent.post(MedRegOmURL + 'Suche/GetSearchData', data_4)
207
+ regExp = /id"\:(\d\d+)/i
208
+ unless page_4.body.match(regExp)
209
+ File.open(File.join(LOG_PATH, 'page_4.body'), 'w+') { |f| f.write page_4.body }
210
+ Medreg.log "ERROR: Could not find an gln #{gln} via url #{url}"
211
+ next
212
+ end
213
+ medregId = page_4.body.match(regExp)[1]
214
+ page_5 = @agent.get(MedRegOmURL + "de/Detail/Detail?pid=#{medregId}")
215
+ File.open(File.join(LOG_PATH, "#{gln}.html"), 'w+') { |f| f.write page_5.content } if DebugImport
216
+ doc_hash = parse_details( Nokogiri::HTML(page_5.content), gln, info)
217
+ store_doctor(doc_hash)
218
+ @@all_doctors[gln.to_s] = doc_hash
219
+ end
220
+ end
221
+ def get_detail_to_glns(glns)
222
+ max_retries = 100
223
+ @idx = 0
224
+ r_loop = ResilientLoop.new(File.basename(__FILE__, '.rb'))
225
+ @skip_to_doctor ||= r_loop.state_id
226
+ Medreg.log "get_detail_to_glns #{glns.size}. first 10 are #{glns[0..9]} state_id is #{r_loop.state_id.inspect}" if DebugImport
227
+ glns.each { |gln|
228
+ if r_loop.must_skip?(gln.to_s)
229
+ Medreg.log "Skipping #{gln.inspect}. Waiting for #{r_loop.state_id.inspect}" if DebugImport
230
+ @persons_skipped += 1
231
+ next
232
+ end
233
+ @idx += 1
234
+ nr_tries = 0
235
+ while nr_tries < max_retries
236
+ begin
237
+ Medreg.log "Searching for doctor with GLN #{gln}. Skipped #{@persons_skipped}, created #{@persons_created} updated #{@persons_updated} of #{glns.size}).#{nr_tries > 0 ? ' nr_tries is ' + nr_tries.to_s : ''}"
238
+ get_one_doctor(r_loop, gln)
239
+ break
240
+ rescue Mechanize::ResponseCodeError, Timeout::Error => e
241
+ raise e if defined?(MiniTest)
242
+ nr_tries += 1
243
+ Medreg.log "rescue Mechanize::ResponseCodeError #{gln.inspect}. nr_tries #{nr_tries}"
244
+ sleep(10 * 60) # wait 10 minutes till medreg server is back again
245
+ rescue StandardError => e
246
+ raise e if defined?(MiniTest)
247
+ nr_tries += 1
248
+ Medreg.log "rescue Mechanize::ResponseCodeError #{gln.inspect}. nr_tries #{nr_tries} error was e #{e}"
249
+ sleep(10 * 60) # wait 10 minutes till medreg server is back again
250
+ end
251
+ end
252
+ raise "Max retries #{nr_tries} for #{gln.to_s} reached. Aborting import" if nr_tries == max_retries
253
+ @persons_created += 1
254
+ if (@persons_created + @persons_updated) % 100 == 99
255
+ Medreg.log "Start saving after #{@persons_created} created #{@persons_updated} updated"
256
+ end
257
+ }
258
+ r_loop.finished
259
+ end
260
+ def get_detail_info(info, doc)
261
+ text = doc.xpath('//div').text
262
+ m = text.match(/Nationalität:\s*([Ö\w+])[^:]+:\s+(\d+)/) # Special case Österreich
263
+ unless m and m[2] == info.gln.to_s
264
+ File.open(File.join(LOG_PATH, 'doc_div.txt'), 'w+') { |f| f.write text }
265
+ Medreg.log "ERROR: Id in text does not match #{info.gln } match was #{m.inspect}"
266
+ return []
267
+ end
268
+ addresses = []
269
+ nrAdresses = doc.xpath('//ol/li/div').size
270
+ 0.upto(nrAdresses-1).each {
271
+ |idx|
272
+ lines = []
273
+ doc.xpath('//ol/li/div')[idx].children.each{ |x| lines << x.text }
274
+ address = Address2.new
275
+ address.fon = []
276
+ address.fax = []
277
+ address.type = 'at_praxis'
278
+ address.additional_lines = []
279
+ address.canton = info.authority
280
+ address.name = lines[0]
281
+ lines[1].sub!(/^[A-Z]\. /, '')
282
+ lines[1..-1].each { |line|
283
+ if /^Telefon: /.match(line)
284
+ address.fon << line.split('Telefon: ')[1].gsub(/\-/, ' ')
285
+ next
286
+ elsif /^Fax: /.match(line)
287
+ address.fax << line.split('Fax: ')[1].gsub(/\-/, ' ')
288
+ next
289
+ else
290
+ next if line.length == 0
291
+ if m = line.match(/(|\w\w[-\. ])(\d{4})\s+(\S+)/)
292
+ address.location = line
293
+ else
294
+ address.additional_lines << line
295
+ end
296
+ end
297
+ }
298
+ addresses << address
299
+ }
300
+ addresses
301
+ end
302
+ def get_latest_file
303
+ agent = Mechanize.new
304
+ target = File.join @archive, Time.now.strftime("persons_%Y.%m.%d.xlsx")
305
+ needs_update = true
306
+ save_for_log "get_latest_file target #{target} #{File.exist?(target)} from URL #{MedRegPerson_XLS_URL}"
307
+ return target if File.exist?(target)
308
+ @download = nil
309
+ begin
310
+ file = agent.get(MedRegPerson_XLS_URL)
311
+ @download = file.body
312
+ rescue Net::HTTP::Persistent::Error, Timeout::Error => e
313
+ Medreg.log "Catched error #{e}"
314
+ search_name = File.join @archive, Time.now.strftime("persons_%Y*.xlsx")
315
+ candidates = Dir.glob(search_name)
316
+ if candidates.size == 0
317
+ save_for_log "getting file from MedRegPerson_XLS_URL failed. Could not find any prior downloads via #{search_name}"
318
+ raise e
319
+ end
320
+ best = candidates.max_by {|f| File.mtime(f)}
321
+ save_for_log "getting file from MedRegPerson_XLS_URL failed. Using #{best} #{File.mtime(best)} #{File.size(best)} bytes"
322
+ @download = IO.read(best)
323
+ end
324
+ File.open(target, 'w+') { |f| f.write @download }
325
+ @download = nil # release it
326
+ target
327
+ end
328
+ def report
329
+ report = "Persons update \n\n"
330
+ report << "Skipped doctors: #{@persons_skipped}#{@skip_to_doctor ? '. Waited for ' + @skip_to_doctor.to_s : ''}" << "\n"
331
+ report << "New doctors: " << @persons_created.to_s << "\n"
332
+ report << "Updated doctors: " << @persons_updated.to_s << "\n"
333
+ report << "Deleted doctors: " << @persons_deleted.to_s << "\n"
334
+ report
335
+ end
336
+ def store_doctor(hash)
337
+ return unless hash
338
+ action = nil
339
+ pointer = nil
340
+ doctor = Person.new
341
+ doctor.ean13 = hash[:ean13]
342
+ extract = [
343
+ :ean13,
344
+ # :exam,
345
+ :email,
346
+ :firstname,
347
+ :language,
348
+ :name,
349
+ :praxis,
350
+ :salutation,
351
+ :specialities,
352
+ :capabilities,
353
+ :title,
354
+ :addresses,
355
+ :may_dispense_narcotics,
356
+ :may_sell_drugs,
357
+ :remark_sell_drugs,
358
+ ]
359
+ doc_hash = {}
360
+ extract.each { |key|
361
+ if(value = hash[key])
362
+ case key
363
+ when :praxis
364
+ value = (value == 'Ja')
365
+ when :specialities, :capabilities
366
+ if(value.is_a?(String))
367
+ value = [value]
368
+ elsif(value.is_a?(Array))
369
+ value = value
370
+ end
371
+ end
372
+ doc_hash.store(key, value)
373
+ end
374
+
375
+ }
376
+ end
377
+ def parse_xls(path)
378
+ Medreg.log "parsing #{path}"
379
+ workbook = RubyXL::Parser.parse(path)
380
+ positions = []
381
+ rows = 0
382
+ workbook[0].each do |row|
383
+ next unless row and row[COL[:gln]]
384
+ rows += 1
385
+ if rows > 1
386
+ info = PersonInfo.new
387
+ [:gln, :family_name, :first_name, :authority, :diploma, :may_dispense_narcotics, :may_sell_drugs,:remark_sell_drugs].each {
388
+ |field|
389
+ cmd = "info.#{field} = row[COL[#{field.inspect}]] ? row[COL[#{field.inspect}]].value : nil"
390
+ eval(cmd)
391
+ }
392
+ @info_to_gln[row[COL[:gln]].value] = info
393
+ end
394
+ end
395
+ @glns_to_import = @info_to_gln.keys.sort.uniq
396
+ end
397
+
398
+ # just for debugging when running unit tests
399
+ def Person.all_doctors
400
+ @@all_doctors
401
+ end
402
+ private
403
+ Match_qualification_with_austria = /(.*)\s+(\d+)\s+([Ö\w]+)/
404
+ def string_to_qualification(line, gln)
405
+ return nil if line.match(/Weiterbildungstitel|Weitere Qualifikationen|Beruf.*Jahr.*Land/im)
406
+ m = line.match(Match_qualification_with_austria)
407
+ if m
408
+ infos = m[1..3].join(',').gsub("\r","").gsub(/\s\s+/, ' ').strip.split(/ ,|,/)
409
+ # infos[1] = infos[1].to_i # transform year into an integer
410
+ return infos.join(', ')
411
+ else
412
+ Medreg.log "PROBLEM: could not find speciality for GLN #{gln} in line '#{line}'"
413
+ end
414
+ nil
415
+ end
416
+ end
417
+ end