medreg 0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,22 @@
1
+ h1. medreg
2
+
3
+ * https://github.com/zdavatz/medreg
4
+
5
+ h2. DESCRIPTION
6
+
7
+ Create yaml file for all
8
+ * companies in the health care fromh https://www.medregbm.admin.ch/
9
+ * medical persons from http://www.medregom.admin.ch/
10
+
11
+ h2. INSTALL:
12
+
13
+ * gem install medreg
14
+
15
+ h2. DEVELOPERS:
16
+
17
+ * Zeno R.R. Davatz
18
+ * Niklaus Giger
19
+
20
+ h2. LICENSE:
21
+
22
+ * GPLv3
@@ -0,0 +1,30 @@
1
+ #!/usr/bin/env ruby
2
+ # encoding: utf-8
3
+ lib = File.expand_path('../lib', __FILE__)
4
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
5
+ require 'medreg/version'
6
+ require "bundler/gem_tasks"
7
+ require 'rake/testtask'
8
+
9
+ # dependencies are declared in medreg.gemspec
10
+
11
+ desc 'Offer a gem task like hoe'
12
+ task :gem => :build do
13
+ Rake::Task[:build].invoke
14
+ end
15
+
16
+ desc "Run tests"
17
+ task :default => :test
18
+
19
+ desc 'Run medreg tests'
20
+ task :test do
21
+ log_file = 'suite.log'
22
+ res = system("bash -c 'set -o pipefail && bundle exec ruby test/suite.rb 2>&1 | tee #{log_file}'")
23
+ puts "Running test/suite.rb returned #{res.inspect}. Output was redirected to #{log_file}"
24
+ exit 1 unless res
25
+ end
26
+
27
+ require 'rake/clean'
28
+ CLEAN.include FileList['pkg/*.gem']
29
+
30
+ # vim: syntax=ruby
@@ -0,0 +1,44 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'pathname'
4
+ root = Pathname.new(__FILE__).realpath.parent.parent
5
+ $:.unshift root.join('lib') if $0 == __FILE__
6
+
7
+ require 'optparse'
8
+ require "date"
9
+ require 'medreg'
10
+ require 'medreg/version'
11
+
12
+ def help
13
+ <<EOS
14
+ #$0 ver.#{Medreg::VERSION}
15
+ Usage:
16
+ #{File.basename(__FILE__)} [persons | companies]
17
+ Create yaml and csv files under data for all companies/persons known to the
18
+ swiss health authorities. If no parameter given, fetches companies and persons.
19
+ EOS
20
+ end
21
+
22
+ parser = OptionParser.new
23
+ opts = {}
24
+ parser.on('--swiss_only') {|v| opts[:swiss_only] = true }
25
+ parser.on_tail('-h', '--help') { puts help; exit }
26
+
27
+ args = ARGV.dup
28
+ begin
29
+ parser.parse!(args)
30
+ rescue OptionParser::MissingArgument,
31
+ OptionParser::InvalidArgument,
32
+ OptionParser::InvalidOption
33
+ puts help
34
+ exit 1
35
+ end
36
+
37
+ begin
38
+ Medreg::run(args[0])
39
+ rescue Interrupt
40
+ puts "Unterbrochen. Breche mit Fehler ab"
41
+ exit 1
42
+ end
43
+
44
+ puts "#{__FILE__} completed successfully" if $VERBOSE
@@ -0,0 +1,3 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'medreg/medreg'
@@ -0,0 +1,67 @@
1
+ #!/usr/bin/env ruby
2
+ # encoding: utf-8
3
+
4
+ module Medreg
5
+ class Address2 # copied from oddb
6
+ @@city_pattern = /[^0-9]+[^0-9\-](?!-)([0-9]+)?/u
7
+ attr_accessor :name, :additional_lines, :address, :location, :title, :fon, :fax, :canton, :type
8
+ alias :address_type :type
9
+ def initialize
10
+ super
11
+ @additional_lines = []
12
+ @fon = []
13
+ @fax = []
14
+ end
15
+ def city
16
+ @location
17
+ if(match = @@city_pattern.match(@location.to_s))
18
+ match.to_s.strip
19
+ end
20
+ end
21
+ def lines
22
+ lines = lines_without_title
23
+ if(!@title.to_s.empty?)
24
+ lines.unshift(@title)
25
+ end
26
+ lines
27
+ end
28
+ def lines_without_title
29
+ ([
30
+ @name,
31
+ ] + @additional_lines +
32
+ [
33
+ @address,
34
+ location_canton,
35
+ ]).delete_if { |line| line.to_s.empty? }
36
+ end
37
+ def location_canton
38
+ if(@canton && @location)
39
+ @location + " (#{@canton})"
40
+ else
41
+ @location
42
+ end
43
+ end
44
+ def number
45
+ if(match = /[0-9][^\s,]*/u.match(@address.to_s))
46
+ match.to_s.strip
47
+ elsif @additional_lines[-1]
48
+ @additional_lines[-1].split(/\s/)[-1]
49
+ end
50
+ end
51
+ def plz
52
+ if(match = /[1-9][0-9]{3}/u.match(@location.to_s))
53
+ match.to_s
54
+ end
55
+ end
56
+ def street
57
+ if(match = /[^0-9,]+/u.match(@address.to_s))
58
+ match.to_s.strip
59
+ elsif @additional_lines[-1]
60
+ @additional_lines[0].split(/\s/)[0]
61
+ end
62
+ end
63
+ def <=>(other)
64
+ self.lines <=> other.lines
65
+ end
66
+ end
67
+ end
@@ -0,0 +1,38 @@
1
+ #!/usr/bin/env ruby
2
+ # encoding: utf-8
3
+ # Business area for companies
4
+ require 'set'
5
+ require 'medreg'
6
+
7
+ module Medreg
8
+ class BA_type
9
+ include Enumerable
10
+ BA_cantonal_authority = 'ba_cantonal_authority'
11
+ BA_doctor = 'ba_doctor'
12
+ BA_health = 'ba_health'
13
+ BA_hospital = 'ba_hospital'
14
+ BA_hospital_pharmacy = 'ba_hospital_pharmacy'
15
+ BA_info = 'ba_info'
16
+ BA_insurance = 'ba_insurance'
17
+ BA_pharma = 'ba_pharma'
18
+ BA_public_pharmacy = 'ba_public_pharmacy'
19
+ BA_research_institute = 'ba_research_institute'
20
+ def BA_type.collect
21
+ BA_types
22
+ end
23
+ end
24
+
25
+ BA_types = Set[
26
+ nil,
27
+ BA_type::BA_cantonal_authority,
28
+ BA_type::BA_doctor,
29
+ BA_type::BA_health,
30
+ BA_type::BA_hospital,
31
+ BA_type::BA_hospital_pharmacy,
32
+ BA_type::BA_info,
33
+ BA_type::BA_insurance,
34
+ BA_type::BA_pharma,
35
+ BA_type::BA_public_pharmacy,
36
+ BA_type::BA_research_institute,
37
+ ]
38
+ end
@@ -0,0 +1,33 @@
1
+ #!/usr/bin/env ruby
2
+ # encoding: utf-8
3
+
4
+ require 'medreg/address'
5
+ require 'medreg/ba_type'
6
+ module Medreg
7
+ class Company
8
+ attr_accessor :address_email, :business_area, :business_unit, :cl_status,
9
+ :competition_email, :complementary_type, :contact, :deductible_display,
10
+ :disable_patinfo, :ean13, :generic_type, :addresses,
11
+ :invoice_htmlinfos, :logo_filename, :lookandfeel_member_count, :name,
12
+ :powerlink, :regulatory_email, :swissmedic_email, :swissmedic_salutation,
13
+ :url, :ydim_id, :limit_invoice_duration, :force_new_ydim_debitor,
14
+ :narcotics
15
+ attr_reader :disabled_invoices
16
+ alias :fullname :name
17
+ alias :power_link= :powerlink=
18
+ alias :power_link :powerlink
19
+ alias :to_s :name
20
+ alias :email :address_email
21
+ def initialize
22
+ @addresses = [Address2.new]
23
+ end
24
+ def is_pharmacy?
25
+ case @business_area
26
+ when BA_type::BA_public_pharmacy, BA_type:: BA_hospital_pharmacy
27
+ return true
28
+ else
29
+ false
30
+ end
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,262 @@
1
+ #!/usr/bin/env ruby
2
+ # encoding: utf-8
3
+
4
+ require 'medreg/address'
5
+ require 'medreg/ba_type'
6
+ require 'medreg/company'
7
+ require 'medreg/resilient_loop'
8
+ require 'rubyXL'
9
+ require 'mechanize'
10
+ require 'logger'
11
+ require 'cgi'
12
+ require 'psych' if RUBY_VERSION.match(/^1\.9/)
13
+ require "yaml"
14
+
15
+ module Medreg
16
+ DebugImport = defined?(MiniTest)
17
+ BetriebeURL = 'https://www.medregbm.admin.ch/Betrieb/Search'
18
+ BetriebeXLS_URL = "https://www.medregbm.admin.ch/Publikation/CreateExcelListBetriebs"
19
+ RegExpBetriebDetail = /\/Betrieb\/Details\//
20
+ Companies_curr = File.expand_path(File.join(__FILE__, "../../../data/companies_#{Time.now.strftime('%Y.%m.%d')}.xlsx"))
21
+ Companies_YAML = File.expand_path(File.join(__FILE__, "../../../data/companies_#{Time.now.strftime('%Y.%m.%d')}.yaml"))
22
+ # MedRegURL = 'http://www.medreg.admin.ch/'
23
+ CompanyInfo = Struct.new("CompanyInfo",
24
+ :gln,
25
+ :exam,
26
+ :address,
27
+ :name_1,
28
+ :name_2,
29
+ :addresses,
30
+ :plz,
31
+ :canton_giving_permit,
32
+ :country,
33
+ :company_type,
34
+ :drug_permit,
35
+ )
36
+ # GLN Person Name Vorname PLZ Ort Bewilligungskanton Land Diplom BTM Berechtigung Bewilligung Selbstdispensation Bemerkung Selbstdispensation
37
+
38
+ COMPANY_COL = {
39
+ :gln => 0, # A
40
+ :name_1 => 1, # B
41
+ :name_2 => 2, # C
42
+ :street => 3, # D
43
+ :street_number => 4, # E
44
+ :plz => 5, # F
45
+ :locality => 6, # G
46
+ :canton_giving_permit => 7, # H
47
+ :country => 8, # I
48
+ :company_type => 9, # J
49
+ :drug_permit => 10, # K
50
+ }
51
+ class CompanyImporter
52
+ RECIPIENTS = []
53
+
54
+ def save_for_log(msg)
55
+ Medreg.log(msg)
56
+ withTimeStamp = "#{Time.now.strftime('%Y-%m-%d %H:%M:%S')}: #{msg}"
57
+ @@logInfo << withTimeStamp
58
+ end
59
+ def initialize(glns_to_import = [])
60
+ @glns_to_import = glns_to_import.clone
61
+ @glns_to_import.delete_if {|item| item.size == 0}
62
+ @info_to_gln = {}
63
+ @@logInfo = []
64
+ FileUtils.rm_f(Companies_YAML) if File.exists?(Companies_YAML)
65
+ @yaml_file = File.open(Companies_YAML, 'w+')
66
+ @companies_created = 0
67
+ @companies_updated = 0
68
+ @companies_skipped = 0
69
+ @companies_deleted = 0
70
+ @archive = ARCHIVE_PATH
71
+ @@all_companies = []
72
+ setup_default_agent
73
+ end
74
+ def update
75
+ saved = @glns_to_import.clone
76
+ latest = get_latest_file
77
+ save_for_log "parse_xls #{latest} specified GLN ids #{saved.inspect}"
78
+ parse_xls(latest)
79
+ @info_to_gln.keys
80
+ get_detail_to_glns(saved.size > 0 ? saved : @glns_to_import)
81
+ return @companies_created, @companies_updated, @companies_deleted, @companies_skipped
82
+ ensure
83
+ File.open(Companies_YAML, 'w+') {|f| f.write(@@all_companies.to_yaml) }
84
+ save_for_log "Saved #{@@all_companies.size} companies in #{Companies_YAML}"
85
+ end
86
+ def setup_default_agent
87
+ @agent = Mechanize.new
88
+ @agent.user_agent = 'Mozilla/5.0 (X11; Linux x86_64; rv:31.0) Gecko/20100101 Firefox/31.0 Iceweasel/31.1.0'
89
+ @agent.redirect_ok = :all
90
+ @agent.follow_meta_refresh_self = true
91
+ @agent.follow_meta_refresh = :everwhere
92
+ @agent.redirection_limit = 55
93
+ @agent.follow_meta_refresh = true
94
+ @agent.ignore_bad_chunking = true
95
+ @agent
96
+ end
97
+ def parse_details(html, gln)
98
+ left = html.at('div[class="colLeft"]').text
99
+ right = html.at('div[class="colRight"]').text
100
+ btm = html.at('div[class="twoColSpan"]').text
101
+ infos = []
102
+ infos = left.split(/\r\n\s*/)
103
+ unless infos[2].eql?(gln.to_s)
104
+ Medreg.log "Mismatch between searched gln #{gln} and details #{infos[2]}"
105
+ return nil
106
+ end
107
+ company = Hash.new
108
+ company[:ean13] = gln.to_s.clone
109
+ company[:name] = infos[4]
110
+ idx_plz = infos.index("PLZ \\ Ort")
111
+ idx_canton = infos.index('Bewilligungskanton')
112
+ address = infos[6..idx_plz-1].join(' ')
113
+ company[:plz] = infos[idx_plz+1]
114
+ company[:location] = infos[idx_plz+2]
115
+ idx_typ = infos.index('Betriebstyp')
116
+ ba_type = infos[idx_typ+1]
117
+ company[:address] = address
118
+ company[:ba_type] = ba_type
119
+ company[:narcotics] = btm.split(/\r\n\s*/)[-1]
120
+ update_address(company)
121
+ Medreg.log company if $VERBOSE
122
+ company
123
+ end
124
+ Search_failure = 'search_took_to_long'
125
+ def get_detail_to_glns(glns)
126
+ r_loop = ResilientLoop.new(File.basename(__FILE__, '.rb'))
127
+ failure = 'Die Personensuche dauerte zu lange'
128
+ idx = 0
129
+ max_retries = 3
130
+ Medreg.log "get_detail_to_glns for #{glns.size} glns. first 10 are #{glns[0..9]} state_id is #{r_loop.state_id.inspect}" if DebugImport
131
+ glns.each { |gln|
132
+ idx += 1
133
+ if r_loop.must_skip?(gln)
134
+ Medreg.log "Skipping #{gln}. Waiting for #{r_loop.state_id.inspect}" if DebugImport
135
+ next
136
+ end
137
+ nr_tries = 0
138
+ success = false
139
+ while nr_tries < max_retries and not success
140
+ begin
141
+ r_loop.try_run(gln, defined?(Minitest) ? 500 : 5 ) do
142
+ Medreg.log "Searching for company with GLN #{gln}. Skipped #{@companies_skipped}, created #{@companies_created} updated #{@companies_updated} of #{glns.size}).#{nr_tries > 0 ? ' nr_tries is ' + nr_tries.to_s : ''}"
143
+ page_1 = @agent.get(BetriebeURL)
144
+ raise Search_failure if page_1.content.match(failure)
145
+ hash = [
146
+ ['Betriebsname', ''],
147
+ ['Plz', ''],
148
+ ['Ort', ''],
149
+ ['GlnBetrieb', gln.to_s],
150
+ ['BetriebsCodeId', '0'],
151
+ ['KantonsCodeId', '0'],
152
+ ]
153
+ res_2 = @agent.post(BetriebeURL, hash)
154
+ if res_2.link(:href => RegExpBetriebDetail)
155
+ page_3 = res_2.link(:href => RegExpBetriebDetail).click
156
+ raise Search_failure if page_3.content.match(failure)
157
+ company = parse_details(page_3, gln)
158
+ store_company(company)
159
+ @@all_companies << company
160
+ else
161
+ Medreg.log "could not find gln #{gln}"
162
+ @companies_skipped += 1
163
+ end
164
+ success = true
165
+ end
166
+ rescue Timeout => e
167
+ nr_tries += max_retries if defined?(MiniTest)
168
+ Medreg.log "rescue #{e} will retry #{max_retries - nr_tries} times"
169
+ nr_tries += 1
170
+ sleep defined?(MiniTest) ? 0.01 : 60
171
+ end
172
+ if (@companies_created + @companies_updated) % 100 == 99
173
+ Medreg.log "Start saving #{gln} after #{@companies_created} created #{@companies_updated} updated"
174
+ end
175
+ end
176
+ }
177
+ r_loop.finished
178
+ ensure
179
+ Medreg.log "Start saving"
180
+ Medreg.log "Finished"
181
+ end
182
+ def get_latest_file
183
+ agent = Mechanize.new
184
+ target = Companies_curr
185
+ needs_update = true
186
+ return target if File.exist?(target)
187
+ file = agent.get(BetriebeXLS_URL)
188
+ download = file.body
189
+ File.open(target, 'w+') { |f| f.write download }
190
+ save_for_log "saved #{file.body.size} bytes as #{target}"
191
+ target
192
+ end
193
+ def report
194
+ report = "Companies update \n\n"
195
+ report << "New companies: " << @companies_created.to_s << "\n"
196
+ report << "Updated companies: " << @companies_updated.to_s << "\n"
197
+ report << "Deleted companies: " << @companies_deleted.to_s << "\n"
198
+ report
199
+ end
200
+ def update_address(data)
201
+ addr = Address2.new
202
+ addr.name = data[:name ]
203
+ addr.address = data[:address]
204
+ # addr.additional_lines = [data[:address] ]
205
+ addr.location = [data[:plz], data[:location]].compact.join(' ')
206
+ if(fon = data[:phone])
207
+ addr.fon = [fon]
208
+ end
209
+ if(fax = data[:fax])
210
+ addr.fax = [fax]
211
+ end
212
+ data[:addresses] = [addr]
213
+ end
214
+ def store_company(data)
215
+ @companies_created += 1
216
+ company = Company.new
217
+ action = 'create'
218
+ ba_type = nil
219
+ case data[:ba_type]
220
+ when /kantonale Beh/i
221
+ ba_type = Medreg::BA_type::BA_cantonal_authority
222
+ when /ffentliche Apotheke/i
223
+ ba_type = Medreg::BA_type::BA_public_pharmacy
224
+ when /Spitalapotheke/i
225
+ ba_type = Medreg::BA_type::BA_hospital_pharmacy
226
+ when /wissenschaftliches Institut/i
227
+ ba_type = Medreg::BA_type::BA_research_institute
228
+ else
229
+ ba_type = 'unknown'
230
+ end
231
+ company.ean13 = data[:ean13]
232
+ company.name = data[:name]
233
+ company.business_area = ba_type
234
+ company.narcotics = data[:narcotics]
235
+ company.addresses = data[:addresses]
236
+ Medreg.log "store_company updated #{data[:ean13]} database. ba_type #{ba_type}." if $VERBOSE
237
+ end
238
+ def parse_xls(path)
239
+ Medreg.log "parsing #{path}"
240
+ workbook = RubyXL::Parser.parse(path)
241
+ positions = []
242
+ rows = 0
243
+ workbook[0].each do |row|
244
+ next unless row and (row[COMPANY_COL[:gln]] or row[COMPANY_COL[:name_1]])
245
+ rows += 1
246
+ if rows > 1
247
+ info = CompanyInfo.new
248
+ [:gln, :name_1, :name_2, :plz, :canton_giving_permit, :country, :company_type,:drug_permit].each {
249
+ |field|
250
+ cmd = "info.#{field} = row[COMPANY_COL[#{field.inspect}]] ? row[COMPANY_COL[#{field.inspect}]].value : nil"
251
+ eval(cmd)
252
+ }
253
+ @info_to_gln[ row[COMPANY_COL[:gln]] ? row[COMPANY_COL[:gln]].value : row[COMPANY_COL[:name_1]].value ] = info
254
+ end
255
+ end
256
+ @glns_to_import = @info_to_gln.keys.sort.uniq
257
+ end
258
+ def Company.all_companies
259
+ @@all_companies
260
+ end
261
+ end
262
+ end