medreg 0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,22 @@
1
+ h1. medreg
2
+
3
+ * https://github.com/zdavatz/medreg
4
+
5
+ h2. DESCRIPTION
6
+
7
+ Create yaml file for all
8
+ * companies in the health care fromh https://www.medregbm.admin.ch/
9
+ * medical persons from http://www.medregom.admin.ch/
10
+
11
+ h2. INSTALL:
12
+
13
+ * gem install medreg
14
+
15
+ h2. DEVELOPERS:
16
+
17
+ * Zeno R.R. Davatz
18
+ * Niklaus Giger
19
+
20
+ h2. LICENSE:
21
+
22
+ * GPLv3
@@ -0,0 +1,30 @@
1
+ #!/usr/bin/env ruby
2
+ # encoding: utf-8
3
+ lib = File.expand_path('../lib', __FILE__)
4
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
5
+ require 'medreg/version'
6
+ require "bundler/gem_tasks"
7
+ require 'rake/testtask'
8
+
9
+ # dependencies are declared in medreg.gemspec
10
+
11
+ desc 'Offer a gem task like hoe'
12
+ task :gem => :build do
13
+ Rake::Task[:build].invoke
14
+ end
15
+
16
+ desc "Run tests"
17
+ task :default => :test
18
+
19
+ desc 'Run medreg tests'
20
+ task :test do
21
+ log_file = 'suite.log'
22
+ res = system("bash -c 'set -o pipefail && bundle exec ruby test/suite.rb 2>&1 | tee #{log_file}'")
23
+ puts "Running test/suite.rb returned #{res.inspect}. Output was redirected to #{log_file}"
24
+ exit 1 unless res
25
+ end
26
+
27
+ require 'rake/clean'
28
+ CLEAN.include FileList['pkg/*.gem']
29
+
30
+ # vim: syntax=ruby
@@ -0,0 +1,44 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'pathname'
4
+ root = Pathname.new(__FILE__).realpath.parent.parent
5
+ $:.unshift root.join('lib') if $0 == __FILE__
6
+
7
+ require 'optparse'
8
+ require "date"
9
+ require 'medreg'
10
+ require 'medreg/version'
11
+
12
+ def help
13
+ <<EOS
14
+ #$0 ver.#{Medreg::VERSION}
15
+ Usage:
16
+ #{File.basename(__FILE__)} [persons | companies]
17
+ Create yaml and csv files under data for all companies/persons known to the
18
+ swiss health authorities. If no parameter given, fetches companies and persons.
19
+ EOS
20
+ end
21
+
22
+ parser = OptionParser.new
23
+ opts = {}
24
+ parser.on('--swiss_only') {|v| opts[:swiss_only] = true }
25
+ parser.on_tail('-h', '--help') { puts help; exit }
26
+
27
+ args = ARGV.dup
28
+ begin
29
+ parser.parse!(args)
30
+ rescue OptionParser::MissingArgument,
31
+ OptionParser::InvalidArgument,
32
+ OptionParser::InvalidOption
33
+ puts help
34
+ exit 1
35
+ end
36
+
37
+ begin
38
+ Medreg::run(args[0])
39
+ rescue Interrupt
40
+ puts "Unterbrochen. Breche mit Fehler ab"
41
+ exit 1
42
+ end
43
+
44
+ puts "#{__FILE__} completed successfully" if $VERBOSE
@@ -0,0 +1,3 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'medreg/medreg'
@@ -0,0 +1,67 @@
1
+ #!/usr/bin/env ruby
2
+ # encoding: utf-8
3
+
4
+ module Medreg
5
+ class Address2 # copied from oddb
6
+ @@city_pattern = /[^0-9]+[^0-9\-](?!-)([0-9]+)?/u
7
+ attr_accessor :name, :additional_lines, :address, :location, :title, :fon, :fax, :canton, :type
8
+ alias :address_type :type
9
+ def initialize
10
+ super
11
+ @additional_lines = []
12
+ @fon = []
13
+ @fax = []
14
+ end
15
+ def city
16
+ @location
17
+ if(match = @@city_pattern.match(@location.to_s))
18
+ match.to_s.strip
19
+ end
20
+ end
21
+ def lines
22
+ lines = lines_without_title
23
+ if(!@title.to_s.empty?)
24
+ lines.unshift(@title)
25
+ end
26
+ lines
27
+ end
28
+ def lines_without_title
29
+ ([
30
+ @name,
31
+ ] + @additional_lines +
32
+ [
33
+ @address,
34
+ location_canton,
35
+ ]).delete_if { |line| line.to_s.empty? }
36
+ end
37
+ def location_canton
38
+ if(@canton && @location)
39
+ @location + " (#{@canton})"
40
+ else
41
+ @location
42
+ end
43
+ end
44
+ def number
45
+ if(match = /[0-9][^\s,]*/u.match(@address.to_s))
46
+ match.to_s.strip
47
+ elsif @additional_lines[-1]
48
+ @additional_lines[-1].split(/\s/)[-1]
49
+ end
50
+ end
51
+ def plz
52
+ if(match = /[1-9][0-9]{3}/u.match(@location.to_s))
53
+ match.to_s
54
+ end
55
+ end
56
+ def street
57
+ if(match = /[^0-9,]+/u.match(@address.to_s))
58
+ match.to_s.strip
59
+ elsif @additional_lines[-1]
60
+ @additional_lines[0].split(/\s/)[0]
61
+ end
62
+ end
63
+ def <=>(other)
64
+ self.lines <=> other.lines
65
+ end
66
+ end
67
+ end
@@ -0,0 +1,38 @@
1
+ #!/usr/bin/env ruby
2
+ # encoding: utf-8
3
+ # Business area for companies
4
+ require 'set'
5
+ require 'medreg'
6
+
7
+ module Medreg
8
+ class BA_type
9
+ include Enumerable
10
+ BA_cantonal_authority = 'ba_cantonal_authority'
11
+ BA_doctor = 'ba_doctor'
12
+ BA_health = 'ba_health'
13
+ BA_hospital = 'ba_hospital'
14
+ BA_hospital_pharmacy = 'ba_hospital_pharmacy'
15
+ BA_info = 'ba_info'
16
+ BA_insurance = 'ba_insurance'
17
+ BA_pharma = 'ba_pharma'
18
+ BA_public_pharmacy = 'ba_public_pharmacy'
19
+ BA_research_institute = 'ba_research_institute'
20
+ def BA_type.collect
21
+ BA_types
22
+ end
23
+ end
24
+
25
+ BA_types = Set[
26
+ nil,
27
+ BA_type::BA_cantonal_authority,
28
+ BA_type::BA_doctor,
29
+ BA_type::BA_health,
30
+ BA_type::BA_hospital,
31
+ BA_type::BA_hospital_pharmacy,
32
+ BA_type::BA_info,
33
+ BA_type::BA_insurance,
34
+ BA_type::BA_pharma,
35
+ BA_type::BA_public_pharmacy,
36
+ BA_type::BA_research_institute,
37
+ ]
38
+ end
@@ -0,0 +1,33 @@
1
+ #!/usr/bin/env ruby
2
+ # encoding: utf-8
3
+
4
+ require 'medreg/address'
5
+ require 'medreg/ba_type'
6
+ module Medreg
7
+ class Company
8
+ attr_accessor :address_email, :business_area, :business_unit, :cl_status,
9
+ :competition_email, :complementary_type, :contact, :deductible_display,
10
+ :disable_patinfo, :ean13, :generic_type, :addresses,
11
+ :invoice_htmlinfos, :logo_filename, :lookandfeel_member_count, :name,
12
+ :powerlink, :regulatory_email, :swissmedic_email, :swissmedic_salutation,
13
+ :url, :ydim_id, :limit_invoice_duration, :force_new_ydim_debitor,
14
+ :narcotics
15
+ attr_reader :disabled_invoices
16
+ alias :fullname :name
17
+ alias :power_link= :powerlink=
18
+ alias :power_link :powerlink
19
+ alias :to_s :name
20
+ alias :email :address_email
21
+ def initialize
22
+ @addresses = [Address2.new]
23
+ end
24
+ def is_pharmacy?
25
+ case @business_area
26
+ when BA_type::BA_public_pharmacy, BA_type:: BA_hospital_pharmacy
27
+ return true
28
+ else
29
+ false
30
+ end
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,262 @@
1
+ #!/usr/bin/env ruby
2
+ # encoding: utf-8
3
+
4
+ require 'medreg/address'
5
+ require 'medreg/ba_type'
6
+ require 'medreg/company'
7
+ require 'medreg/resilient_loop'
8
+ require 'rubyXL'
9
+ require 'mechanize'
10
+ require 'logger'
11
+ require 'cgi'
12
+ require 'psych' if RUBY_VERSION.match(/^1\.9/)
13
+ require "yaml"
14
+
15
+ module Medreg
16
+ DebugImport = defined?(MiniTest)
17
+ BetriebeURL = 'https://www.medregbm.admin.ch/Betrieb/Search'
18
+ BetriebeXLS_URL = "https://www.medregbm.admin.ch/Publikation/CreateExcelListBetriebs"
19
+ RegExpBetriebDetail = /\/Betrieb\/Details\//
20
+ Companies_curr = File.expand_path(File.join(__FILE__, "../../../data/companies_#{Time.now.strftime('%Y.%m.%d')}.xlsx"))
21
+ Companies_YAML = File.expand_path(File.join(__FILE__, "../../../data/companies_#{Time.now.strftime('%Y.%m.%d')}.yaml"))
22
+ # MedRegURL = 'http://www.medreg.admin.ch/'
23
+ CompanyInfo = Struct.new("CompanyInfo",
24
+ :gln,
25
+ :exam,
26
+ :address,
27
+ :name_1,
28
+ :name_2,
29
+ :addresses,
30
+ :plz,
31
+ :canton_giving_permit,
32
+ :country,
33
+ :company_type,
34
+ :drug_permit,
35
+ )
36
+ # GLN Person Name Vorname PLZ Ort Bewilligungskanton Land Diplom BTM Berechtigung Bewilligung Selbstdispensation Bemerkung Selbstdispensation
37
+
38
+ COMPANY_COL = {
39
+ :gln => 0, # A
40
+ :name_1 => 1, # B
41
+ :name_2 => 2, # C
42
+ :street => 3, # D
43
+ :street_number => 4, # E
44
+ :plz => 5, # F
45
+ :locality => 6, # G
46
+ :canton_giving_permit => 7, # H
47
+ :country => 8, # I
48
+ :company_type => 9, # J
49
+ :drug_permit => 10, # K
50
+ }
51
+ class CompanyImporter
52
+ RECIPIENTS = []
53
+
54
+ def save_for_log(msg)
55
+ Medreg.log(msg)
56
+ withTimeStamp = "#{Time.now.strftime('%Y-%m-%d %H:%M:%S')}: #{msg}"
57
+ @@logInfo << withTimeStamp
58
+ end
59
+ def initialize(glns_to_import = [])
60
+ @glns_to_import = glns_to_import.clone
61
+ @glns_to_import.delete_if {|item| item.size == 0}
62
+ @info_to_gln = {}
63
+ @@logInfo = []
64
+ FileUtils.rm_f(Companies_YAML) if File.exists?(Companies_YAML)
65
+ @yaml_file = File.open(Companies_YAML, 'w+')
66
+ @companies_created = 0
67
+ @companies_updated = 0
68
+ @companies_skipped = 0
69
+ @companies_deleted = 0
70
+ @archive = ARCHIVE_PATH
71
+ @@all_companies = []
72
+ setup_default_agent
73
+ end
74
+ def update
75
+ saved = @glns_to_import.clone
76
+ latest = get_latest_file
77
+ save_for_log "parse_xls #{latest} specified GLN ids #{saved.inspect}"
78
+ parse_xls(latest)
79
+ @info_to_gln.keys
80
+ get_detail_to_glns(saved.size > 0 ? saved : @glns_to_import)
81
+ return @companies_created, @companies_updated, @companies_deleted, @companies_skipped
82
+ ensure
83
+ File.open(Companies_YAML, 'w+') {|f| f.write(@@all_companies.to_yaml) }
84
+ save_for_log "Saved #{@@all_companies.size} companies in #{Companies_YAML}"
85
+ end
86
+ def setup_default_agent
87
+ @agent = Mechanize.new
88
+ @agent.user_agent = 'Mozilla/5.0 (X11; Linux x86_64; rv:31.0) Gecko/20100101 Firefox/31.0 Iceweasel/31.1.0'
89
+ @agent.redirect_ok = :all
90
+ @agent.follow_meta_refresh_self = true
91
+ @agent.follow_meta_refresh = :everwhere
92
+ @agent.redirection_limit = 55
93
+ @agent.follow_meta_refresh = true
94
+ @agent.ignore_bad_chunking = true
95
+ @agent
96
+ end
97
+ def parse_details(html, gln)
98
+ left = html.at('div[class="colLeft"]').text
99
+ right = html.at('div[class="colRight"]').text
100
+ btm = html.at('div[class="twoColSpan"]').text
101
+ infos = []
102
+ infos = left.split(/\r\n\s*/)
103
+ unless infos[2].eql?(gln.to_s)
104
+ Medreg.log "Mismatch between searched gln #{gln} and details #{infos[2]}"
105
+ return nil
106
+ end
107
+ company = Hash.new
108
+ company[:ean13] = gln.to_s.clone
109
+ company[:name] = infos[4]
110
+ idx_plz = infos.index("PLZ \\ Ort")
111
+ idx_canton = infos.index('Bewilligungskanton')
112
+ address = infos[6..idx_plz-1].join(' ')
113
+ company[:plz] = infos[idx_plz+1]
114
+ company[:location] = infos[idx_plz+2]
115
+ idx_typ = infos.index('Betriebstyp')
116
+ ba_type = infos[idx_typ+1]
117
+ company[:address] = address
118
+ company[:ba_type] = ba_type
119
+ company[:narcotics] = btm.split(/\r\n\s*/)[-1]
120
+ update_address(company)
121
+ Medreg.log company if $VERBOSE
122
+ company
123
+ end
124
+ Search_failure = 'search_took_to_long'
125
+ def get_detail_to_glns(glns)
126
+ r_loop = ResilientLoop.new(File.basename(__FILE__, '.rb'))
127
+ failure = 'Die Personensuche dauerte zu lange'
128
+ idx = 0
129
+ max_retries = 3
130
+ Medreg.log "get_detail_to_glns for #{glns.size} glns. first 10 are #{glns[0..9]} state_id is #{r_loop.state_id.inspect}" if DebugImport
131
+ glns.each { |gln|
132
+ idx += 1
133
+ if r_loop.must_skip?(gln)
134
+ Medreg.log "Skipping #{gln}. Waiting for #{r_loop.state_id.inspect}" if DebugImport
135
+ next
136
+ end
137
+ nr_tries = 0
138
+ success = false
139
+ while nr_tries < max_retries and not success
140
+ begin
141
+ r_loop.try_run(gln, defined?(Minitest) ? 500 : 5 ) do
142
+ Medreg.log "Searching for company with GLN #{gln}. Skipped #{@companies_skipped}, created #{@companies_created} updated #{@companies_updated} of #{glns.size}).#{nr_tries > 0 ? ' nr_tries is ' + nr_tries.to_s : ''}"
143
+ page_1 = @agent.get(BetriebeURL)
144
+ raise Search_failure if page_1.content.match(failure)
145
+ hash = [
146
+ ['Betriebsname', ''],
147
+ ['Plz', ''],
148
+ ['Ort', ''],
149
+ ['GlnBetrieb', gln.to_s],
150
+ ['BetriebsCodeId', '0'],
151
+ ['KantonsCodeId', '0'],
152
+ ]
153
+ res_2 = @agent.post(BetriebeURL, hash)
154
+ if res_2.link(:href => RegExpBetriebDetail)
155
+ page_3 = res_2.link(:href => RegExpBetriebDetail).click
156
+ raise Search_failure if page_3.content.match(failure)
157
+ company = parse_details(page_3, gln)
158
+ store_company(company)
159
+ @@all_companies << company
160
+ else
161
+ Medreg.log "could not find gln #{gln}"
162
+ @companies_skipped += 1
163
+ end
164
+ success = true
165
+ end
166
+ rescue Timeout => e
167
+ nr_tries += max_retries if defined?(MiniTest)
168
+ Medreg.log "rescue #{e} will retry #{max_retries - nr_tries} times"
169
+ nr_tries += 1
170
+ sleep defined?(MiniTest) ? 0.01 : 60
171
+ end
172
+ if (@companies_created + @companies_updated) % 100 == 99
173
+ Medreg.log "Start saving #{gln} after #{@companies_created} created #{@companies_updated} updated"
174
+ end
175
+ end
176
+ }
177
+ r_loop.finished
178
+ ensure
179
+ Medreg.log "Start saving"
180
+ Medreg.log "Finished"
181
+ end
182
+ def get_latest_file
183
+ agent = Mechanize.new
184
+ target = Companies_curr
185
+ needs_update = true
186
+ return target if File.exist?(target)
187
+ file = agent.get(BetriebeXLS_URL)
188
+ download = file.body
189
+ File.open(target, 'w+') { |f| f.write download }
190
+ save_for_log "saved #{file.body.size} bytes as #{target}"
191
+ target
192
+ end
193
+ def report
194
+ report = "Companies update \n\n"
195
+ report << "New companies: " << @companies_created.to_s << "\n"
196
+ report << "Updated companies: " << @companies_updated.to_s << "\n"
197
+ report << "Deleted companies: " << @companies_deleted.to_s << "\n"
198
+ report
199
+ end
200
+ def update_address(data)
201
+ addr = Address2.new
202
+ addr.name = data[:name ]
203
+ addr.address = data[:address]
204
+ # addr.additional_lines = [data[:address] ]
205
+ addr.location = [data[:plz], data[:location]].compact.join(' ')
206
+ if(fon = data[:phone])
207
+ addr.fon = [fon]
208
+ end
209
+ if(fax = data[:fax])
210
+ addr.fax = [fax]
211
+ end
212
+ data[:addresses] = [addr]
213
+ end
214
+ def store_company(data)
215
+ @companies_created += 1
216
+ company = Company.new
217
+ action = 'create'
218
+ ba_type = nil
219
+ case data[:ba_type]
220
+ when /kantonale Beh/i
221
+ ba_type = Medreg::BA_type::BA_cantonal_authority
222
+ when /ffentliche Apotheke/i
223
+ ba_type = Medreg::BA_type::BA_public_pharmacy
224
+ when /Spitalapotheke/i
225
+ ba_type = Medreg::BA_type::BA_hospital_pharmacy
226
+ when /wissenschaftliches Institut/i
227
+ ba_type = Medreg::BA_type::BA_research_institute
228
+ else
229
+ ba_type = 'unknown'
230
+ end
231
+ company.ean13 = data[:ean13]
232
+ company.name = data[:name]
233
+ company.business_area = ba_type
234
+ company.narcotics = data[:narcotics]
235
+ company.addresses = data[:addresses]
236
+ Medreg.log "store_company updated #{data[:ean13]} database. ba_type #{ba_type}." if $VERBOSE
237
+ end
238
+ def parse_xls(path)
239
+ Medreg.log "parsing #{path}"
240
+ workbook = RubyXL::Parser.parse(path)
241
+ positions = []
242
+ rows = 0
243
+ workbook[0].each do |row|
244
+ next unless row and (row[COMPANY_COL[:gln]] or row[COMPANY_COL[:name_1]])
245
+ rows += 1
246
+ if rows > 1
247
+ info = CompanyInfo.new
248
+ [:gln, :name_1, :name_2, :plz, :canton_giving_permit, :country, :company_type,:drug_permit].each {
249
+ |field|
250
+ cmd = "info.#{field} = row[COMPANY_COL[#{field.inspect}]] ? row[COMPANY_COL[#{field.inspect}]].value : nil"
251
+ eval(cmd)
252
+ }
253
+ @info_to_gln[ row[COMPANY_COL[:gln]] ? row[COMPANY_COL[:gln]].value : row[COMPANY_COL[:name_1]].value ] = info
254
+ end
255
+ end
256
+ @glns_to_import = @info_to_gln.keys.sort.uniq
257
+ end
258
+ def Company.all_companies
259
+ @@all_companies
260
+ end
261
+ end
262
+ end