gtin2atc 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +22 -0
- data/.travis.yml +27 -0
- data/Gemfile +11 -0
- data/Gemfile.lock +115 -0
- data/History.txt +3 -0
- data/LICENSE +675 -0
- data/Rakefile +43 -0
- data/bin/gtin2atc +34 -0
- data/gtin2atc.gemspec +38 -0
- data/lib/gtin2atc/builder.rb +376 -0
- data/lib/gtin2atc/downloader.rb +252 -0
- data/lib/gtin2atc/options.rb +39 -0
- data/lib/gtin2atc/util.rb +47 -0
- data/lib/gtin2atc/version.rb +3 -0
- data/lib/gtin2atc/xml_definitions.rb +250 -0
- data/lib/gtin2atc.rb +9 -0
- data/readme.textile +19 -0
- data/spec/builder_spec.rb +147 -0
- data/spec/data/XMLPublications.zip +0 -0
- data/spec/data/swissindex_Pharma_DE.xml +179 -0
- data/spec/data/swissmedic_package.xlsx +0 -0
- data/spec/data/swissmedic_packages.html +12 -0
- data/spec/spec_helper.rb +69 -0
- metadata +227 -0
data/Rakefile
ADDED
@@ -0,0 +1,43 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# encoding: utf-8
|
3
|
+
lib = File.expand_path('../lib', __FILE__)
|
4
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
5
|
+
require 'gtin2atc/version'
|
6
|
+
require "bundler/gem_tasks"
|
7
|
+
require 'rspec/core/rake_task'
|
8
|
+
|
9
|
+
RSpec::Core::RakeTask.new(:spec)
|
10
|
+
|
11
|
+
# dependencies are now declared in gtin2atc.gemspec
|
12
|
+
|
13
|
+
desc 'Offer a gem task like hoe'
|
14
|
+
task :gem => :build do
|
15
|
+
Rake::Task[:build].invoke
|
16
|
+
end
|
17
|
+
|
18
|
+
task :spec => :clean
|
19
|
+
|
20
|
+
desc 'Run gtin2atc with all commonly used combinations'
|
21
|
+
task :test => [:clean, :spec, :gem] do
|
22
|
+
log_file = 'test_options.log'
|
23
|
+
puts "Running test_options.rb with Output redirected to #{log_file}. This will take some time (e.g. 20 minutes)"
|
24
|
+
# must use bash -o pipefail to catch error in test_options.rb and not tee
|
25
|
+
# see http://stackoverflow.com/questions/985876/tee-and-exit-status
|
26
|
+
res = system("bash -c 'set -o pipefail && ./test_options.rb 2>&1 | tee #{log_file}'")
|
27
|
+
puts "Running test_options.rb returned #{res.inspect}. Output was redirected to #{log_file}"
|
28
|
+
exit 1 unless res
|
29
|
+
end
|
30
|
+
|
31
|
+
require 'rake/clean'
|
32
|
+
CLEAN.include FileList['pkg/*.gem']
|
33
|
+
CLEAN.include FileList['*.zip*']
|
34
|
+
CLEAN.include FileList['*.xls*']
|
35
|
+
CLEAN.include FileList['*.xml*']
|
36
|
+
CLEAN.include FileList['*.dat*']
|
37
|
+
CLEAN.include FileList['*.tar.gz']
|
38
|
+
CLEAN.include FileList['*.txt.*']
|
39
|
+
CLEAN.include FileList['*.csv.*']
|
40
|
+
CLEAN.include FileList['*.zip.*']
|
41
|
+
CLEAN.include FileList['ruby*.tmp']
|
42
|
+
CLEAN.include FileList['data/download']
|
43
|
+
CLEAN.include FileList['duplicate_ean13_from_zur_rose.txt']
|
data/bin/gtin2atc
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'pathname'
|
4
|
+
|
5
|
+
root = Pathname.new(__FILE__).realpath.parent.parent
|
6
|
+
$:.unshift root.join('lib') if $0 == __FILE__
|
7
|
+
|
8
|
+
require 'optparse'
|
9
|
+
require 'gtin2atc'
|
10
|
+
require 'gtin2atc/builder'
|
11
|
+
require "gtin2atc/util"
|
12
|
+
|
13
|
+
options = Gtin2atc::Options.new
|
14
|
+
|
15
|
+
begin
|
16
|
+
options.parser.parse!(ARGV)
|
17
|
+
rescue OptionParser::MissingArgument,
|
18
|
+
OptionParser::InvalidArgument,
|
19
|
+
OptionParser::InvalidOption
|
20
|
+
puts Gtin2atc::Options.help
|
21
|
+
exit
|
22
|
+
end
|
23
|
+
|
24
|
+
opts = options.opts
|
25
|
+
startTime = Time.now
|
26
|
+
if ARGV.size == 1 and File.exists?(ARGV[0])
|
27
|
+
args = []
|
28
|
+
IO.readlines(ARGV[0]).each{ |x| args << x.chomp}
|
29
|
+
else
|
30
|
+
args = ARGV.clone
|
31
|
+
end
|
32
|
+
Gtin2atc::Builder.new(opts).run(args)
|
33
|
+
diff = (Time.now-startTime).to_i
|
34
|
+
Gtin2atc::Util.debug_msg "#{File.basename(__FILE__)} done. Took #{diff} seconds"
|
data/gtin2atc.gemspec
ADDED
@@ -0,0 +1,38 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'gtin2atc/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "gtin2atc"
|
8
|
+
spec.version = Gtin2atc::VERSION
|
9
|
+
spec.author = "Niklaus Giger, Zeno R.R. Davatz"
|
10
|
+
spec.email = "ngiger@ywesee.com, zdavatz@ywesee.com"
|
11
|
+
spec.description = "gtin2atc file with gtin, atc_code, pharmanr from input file with gtin"
|
12
|
+
spec.summary = "gtin2atc creates csv files with GTIN and ATC."
|
13
|
+
spec.homepage = "https://github.com/zdavatz/gtin2atc"
|
14
|
+
spec.license = "GPL-v2"
|
15
|
+
spec.files = `git ls-files -z`.split("\x0")
|
16
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
17
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
18
|
+
spec.require_paths = ["lib"]
|
19
|
+
|
20
|
+
# We fix the version of the spec to newer versions only in the third position
|
21
|
+
# hoping that these version fix only security/severe bugs
|
22
|
+
# Consulted the Gemfile.lock to get
|
23
|
+
spec.add_dependency 'rubyzip', '~> 1.1.3'
|
24
|
+
# spec.add_dependency 'archive-tar-minitar', '~> 0.5.2'
|
25
|
+
spec.add_dependency 'mechanize', '~> 2.5.1'
|
26
|
+
spec.add_dependency 'nokogiri', '~> 1.5.10'
|
27
|
+
spec.add_dependency 'savon'#, '~> 2.4.0'
|
28
|
+
# spec.add_dependency 'spreadsheet', '~> 1.0.0'
|
29
|
+
spec.add_dependency 'rubyXL'
|
30
|
+
spec.add_dependency 'sax-machine' #, '~> 0.1.0'
|
31
|
+
|
32
|
+
spec.add_development_dependency "bundler"
|
33
|
+
spec.add_development_dependency "rake"
|
34
|
+
spec.add_development_dependency "rspec"
|
35
|
+
spec.add_development_dependency "webmock"
|
36
|
+
spec.add_development_dependency "rdoc"
|
37
|
+
end
|
38
|
+
|
@@ -0,0 +1,376 @@
|
|
1
|
+
require 'csv'
|
2
|
+
require 'rubyXL'
|
3
|
+
require "gtin2atc/options"
|
4
|
+
require "gtin2atc/downloader"
|
5
|
+
require "gtin2atc/xml_definitions"
|
6
|
+
require 'mechanize'
|
7
|
+
|
8
|
+
module Gtin2atc
|
9
|
+
class Builder
|
10
|
+
Strip_For_Sax_Machine = '<?xml version="1.0" encoding="utf-8"?>'+"\n"
|
11
|
+
def initialize(opts)
|
12
|
+
Util.set_logging(opts[:log])
|
13
|
+
@do_compare = opts[:compare]
|
14
|
+
Util.debug_msg "Builder: opts are #{opts} @do_compare is #{@do_compare}"
|
15
|
+
@data_swissmedic = {}
|
16
|
+
@data_bag = {}
|
17
|
+
@data_swissindex = {}
|
18
|
+
@bag_entries_without_gtin = 0
|
19
|
+
end
|
20
|
+
def calc_checksum(str)
|
21
|
+
str = str.strip
|
22
|
+
sum = 0
|
23
|
+
val = str.split(//u)
|
24
|
+
12.times do |idx|
|
25
|
+
fct = ((idx%2)*2)+1
|
26
|
+
sum += fct*val[idx].to_i
|
27
|
+
end
|
28
|
+
((10-(sum%10))%10).to_s
|
29
|
+
end
|
30
|
+
def swissmedic_xls_extractor
|
31
|
+
@swissmedic = SwissmedicDownloader.new
|
32
|
+
filename = @swissmedic.download
|
33
|
+
Util.debug_msg "swissmedic_xls_extractor xml is #{filename}"
|
34
|
+
data = {}
|
35
|
+
@sheet = RubyXL::Parser.parse(File.expand_path(filename)).worksheets[0]
|
36
|
+
i_5,i_3 = 0,10 # :swissmedic_numbers
|
37
|
+
atc = 5 # :atc_code
|
38
|
+
@sheet.each_with_index do |row, i|
|
39
|
+
next if (i <= 1)
|
40
|
+
next unless row[i_5] and row[i_3]
|
41
|
+
no8 = sprintf('%05d',row[i_5].value.to_i) + sprintf('%03d',row[i_3].value.to_i)
|
42
|
+
unless no8.empty?
|
43
|
+
next if no8.to_i == 0
|
44
|
+
item = {}
|
45
|
+
ean_base12 = "7680#{no8}"
|
46
|
+
gtin = (ean_base12.ljust(12, '0') + calc_checksum(ean_base12)).to_i
|
47
|
+
item = {}
|
48
|
+
item[:gtin] = gtin
|
49
|
+
item[:atc_code] = row[atc] ? row[atc].value.to_s : ''
|
50
|
+
item[:name] = row[2].value.to_s
|
51
|
+
data[gtin] = item
|
52
|
+
end
|
53
|
+
end
|
54
|
+
Util.debug_msg "swissmedic_xls_extractor extracted #{data.size} items"
|
55
|
+
data
|
56
|
+
end
|
57
|
+
def swissindex_xml_extractor
|
58
|
+
@swissindex = SwissIndexDownloader.new
|
59
|
+
xml = @swissindex.download
|
60
|
+
Util.debug_msg "swissindex_xml_extractor xml is #{xml.size} bytes long"
|
61
|
+
data = {}
|
62
|
+
result = PharmaEntry.parse(xml.sub(Strip_For_Sax_Machine, ''), :lazy => true)
|
63
|
+
items = result.PHARMA.ITEM
|
64
|
+
items.each do |pac|
|
65
|
+
item = {}
|
66
|
+
gtin = pac.GTIN ? pac.GTIN.to_i : nil
|
67
|
+
next unless item[:gtin].to_i
|
68
|
+
item[:gtin] = gtin
|
69
|
+
item[:pharmacode] = (phar = pac.PHAR) ? phar: ''
|
70
|
+
item[:atc_code] = (code = pac.ATC) ? code.to_s : ''
|
71
|
+
item[:description] = pac.DSCR
|
72
|
+
data[gtin] = item
|
73
|
+
end
|
74
|
+
Util.debug_msg "swissindex_xml_extractor extracted #{data.size} items"
|
75
|
+
data
|
76
|
+
end
|
77
|
+
def bag_xml_extractor
|
78
|
+
data = {}
|
79
|
+
@bag = BagXmlDownloader.new
|
80
|
+
xml = @bag.download
|
81
|
+
Util.debug_msg "bag_xml_extractor xml is #{xml.size} bytes long"
|
82
|
+
|
83
|
+
result = PreparationsEntry.parse(xml.sub(Strip_For_Sax_Machine, ''), :lazy => true)
|
84
|
+
@bag_entries_without_gtin = 0
|
85
|
+
result.Preparations.Preparation.each do |seq|
|
86
|
+
item = {}
|
87
|
+
item[:atc_code] = (atcc = seq.AtcCode) ? atcc : ''
|
88
|
+
seq.Packs.Pack.each do |pac|
|
89
|
+
gtin = pac.GTIN
|
90
|
+
if gtin
|
91
|
+
gtin = gtin.to_i
|
92
|
+
item[:gtin] = gtin
|
93
|
+
item[:name] = seq.NameDe + " " + pac.DescriptionDe
|
94
|
+
data[gtin] = item
|
95
|
+
Util.debug_msg "run_bag_extractor add #{item}" if $VERBOSE
|
96
|
+
else
|
97
|
+
@bag_entries_without_gtin += 1
|
98
|
+
Util.debug_msg "run_bag_extractor skip phar #{seq.NameDe}: #{seq.DescriptionDe} without gtin."
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
102
|
+
Util.debug_msg "bag_xml_extractor extracted #{data.size} items. Skipped #{@bag_entries_without_gtin} entries without gtin"
|
103
|
+
data
|
104
|
+
end
|
105
|
+
def run(gtins_to_parse=[])
|
106
|
+
Util.debug_msg("run #{gtins_to_parse}")
|
107
|
+
Util.debug_msg("@use_swissindex true")
|
108
|
+
@data_swissindex = swissindex_xml_extractor
|
109
|
+
output_name = File.join(Util.get_archive, @do_compare ? 'gtin2atc_swissindex.csv' : 'gtin2atc.csv')
|
110
|
+
CSV.open(output_name,'w+') do |csvfile|
|
111
|
+
csvfile << ["gtin", "ATC", 'pharmacode', 'description']
|
112
|
+
@data_swissindex.sort.each do |gtin, item|
|
113
|
+
if @do_compare or gtins_to_parse.size == 0 or
|
114
|
+
gtins_to_parse.index(gtin.to_s) or
|
115
|
+
gtins_to_parse.index(item[:pharmacode])
|
116
|
+
csvfile << [gtin, item[:atc_code], item[:pharmacode], item[:description]]
|
117
|
+
end
|
118
|
+
end
|
119
|
+
end
|
120
|
+
msg = "SwissIndex: Extracted #{gtins_to_parse.size} of #{@data_swissindex.size} items into #{output_name} for #{gtins_to_parse}"
|
121
|
+
Util.debug_msg(msg)
|
122
|
+
return unless @do_compare
|
123
|
+
@data_bag = bag_xml_extractor
|
124
|
+
output_name = File.join(Util.get_archive, 'gtin2atc_bag.csv')
|
125
|
+
CSV.open(output_name,'w+') do |csvfile|
|
126
|
+
csvfile << ["gtin", "ATC", 'description']
|
127
|
+
@data_bag.sort.each do |gtin, item|
|
128
|
+
csvfile << [gtin, item[:atc_code], item[:description]]
|
129
|
+
end
|
130
|
+
end
|
131
|
+
Util.debug_msg "BAG: Extracted #{gtins_to_parse.size} of #{@data_bag.size} items into #{output_name} for #{gtins_to_parse}"
|
132
|
+
@data_swissmedic = swissmedic_xls_extractor
|
133
|
+
output_name = File.join(Util.get_archive, 'gtin2atc_swissmedic.csv')
|
134
|
+
CSV.open(output_name,'w+') do |csvfile|
|
135
|
+
csvfile << ["gtin", "ATC", 'description']
|
136
|
+
@data_swissmedic.sort.each do |gtin, item|
|
137
|
+
csvfile << [gtin, item[:atc_code], item[:pharmacode], item[:description]]
|
138
|
+
end
|
139
|
+
end
|
140
|
+
Util.debug_msg "SwissMedic: Extracted #{@data_swissmedic.size} items into #{output_name}"
|
141
|
+
check_bag
|
142
|
+
check_swissmedic
|
143
|
+
compare
|
144
|
+
end
|
145
|
+
# require 'pry';
|
146
|
+
def check_bag
|
147
|
+
matching_atc_codes = 0
|
148
|
+
|
149
|
+
not_in_swissmedic = 0
|
150
|
+
match_in_swissmedic = 0
|
151
|
+
shorter_in_swissmedic = 0
|
152
|
+
longer_in_swissmedic = 0
|
153
|
+
different_atc_in_swissmedic = 0
|
154
|
+
|
155
|
+
not_in_swissindex = 0
|
156
|
+
match_in_swissindex = 0
|
157
|
+
shorter_in_swissindex = 0
|
158
|
+
longer_in_swissindex = 0
|
159
|
+
different_atc_in_swissindex = 0
|
160
|
+
j = 0
|
161
|
+
@data_bag.each{
|
162
|
+
|gtin, item|
|
163
|
+
atc_code = item[:atc_code]
|
164
|
+
j += 1
|
165
|
+
Util.debug_msg "#{gtin}: j #{j} checking #{atc_code} in #{item}"
|
166
|
+
if @data_swissmedic[gtin] and @data_swissindex[gtin] and
|
167
|
+
atc_code == @data_swissmedic[gtin][:atc_code] and
|
168
|
+
atc_code == @data_swissindex[gtin][:atc_code]
|
169
|
+
Util.debug_msg "#{gtin}: matching_atc_codes SwissIndex #{item} #{@data_swissmedic[gtin][:atc_code]} and #{@data_swissindex[gtin][:atc_code]}"
|
170
|
+
matching_atc_codes += 1
|
171
|
+
next
|
172
|
+
end
|
173
|
+
|
174
|
+
if not @data_swissindex[gtin]
|
175
|
+
Util.debug_msg "#{gtin}: Not in SwissIndex #{item}"
|
176
|
+
not_in_swissindex += 1
|
177
|
+
elsif atc_code == @data_swissindex[gtin][:atc_code]
|
178
|
+
Util.debug_msg "SwissIndex #{gtin}: ATC code #{atc_code} matches swissindex #{@data_swissindex[gtin][:atc_code]}"
|
179
|
+
match_in_swissindex += 1
|
180
|
+
elsif atc_code.length < @data_swissindex[gtin][:atc_code].length
|
181
|
+
longer_in_swissindex += 1
|
182
|
+
Util.debug_msg "SwissIndex #{gtin}: ATC code #{atc_code} longer in swissindex #{@data_swissindex[gtin][:atc_code]}"
|
183
|
+
elsif atc_code.length > @data_swissindex[gtin][:atc_code].length
|
184
|
+
shorter_in_swissindex += 1
|
185
|
+
Util.debug_msg "SwissIndex #{gtin}: ATC code #{atc_code} shorter in swissindex #{@data_swissindex[gtin][:atc_code]}"
|
186
|
+
else
|
187
|
+
different_atc_in_swissindex += 1
|
188
|
+
Util.debug_msg "SwissIndex #{gtin}: ATC code #{atc_code} differs from swissindex #{@data_swissindex[gtin][:atc_code]}"
|
189
|
+
end
|
190
|
+
|
191
|
+
if not @data_swissmedic[gtin]
|
192
|
+
Util.debug_msg "#{gtin}: Not in SwissMedic #{item}"
|
193
|
+
not_in_swissmedic += 1
|
194
|
+
elsif atc_code == @data_swissmedic[gtin][:atc_code]
|
195
|
+
Util.debug_msg "SwissMedic #{gtin}: ATC code #{atc_code} matches swissmedic #{@data_swissmedic[gtin][:atc_code]}"
|
196
|
+
match_in_swissmedic += 1
|
197
|
+
elsif atc_code.length < @data_swissmedic[gtin][:atc_code].length
|
198
|
+
longer_in_swissmedic += 1
|
199
|
+
Util.debug_msg "SwissMedic #{gtin}: ATC code #{atc_code} longer in swissmedic #{@data_swissmedic[gtin][:atc_code]}"
|
200
|
+
elsif atc_code.length > @data_swissmedic[gtin][:atc_code].length
|
201
|
+
shorter_in_swissmedic += 1
|
202
|
+
Util.debug_msg "SwissMedic #{gtin}: ATC code #{atc_code} shorter in swissmedic #{@data_swissmedic[gtin][:atc_code]}"
|
203
|
+
else
|
204
|
+
different_atc_in_swissmedic += 1
|
205
|
+
Util.debug_msg "SwissMedic #{gtin}: ATC code #{atc_code} differs from swissmedic #{@data_swissmedic[gtin][:atc_code]}"
|
206
|
+
end
|
207
|
+
total1 = not_in_swissindex + match_in_swissindex + longer_in_swissindex + shorter_in_swissindex + different_atc_in_swissindex
|
208
|
+
total2 = not_in_swissmedic + match_in_swissmedic + longer_in_swissmedic + shorter_in_swissmedic + different_atc_in_swissmedic
|
209
|
+
# binding.pry if j != (total1 + matching_atc_codes)
|
210
|
+
# binding.pry if j != (total2 + matching_atc_codes)
|
211
|
+
# Util.debug_msg "#{gtin}: j #{j} finished #{total1} #{total2} #{atc_code} matching_atc_codes #{matching_atc_codes}"
|
212
|
+
}
|
213
|
+
Util.info "Result of verifing data from BAG (SL):
|
214
|
+
BAG-data fetched from #{@bag.origin}.
|
215
|
+
BAG had #{@data_bag.size} entries
|
216
|
+
#{@bag_entries_without_gtin.size} entries had no GTIN field
|
217
|
+
Not in SwissMedic #{not_in_swissmedic}
|
218
|
+
Not in SwissIndex #{not_in_swissindex}
|
219
|
+
Comparing ATC-Codes between BAG and Swissmedic
|
220
|
+
#{sprintf("%6d", matching_atc_codes)} items had the same ATC code in BAG, SwissIndex and SwissMedic
|
221
|
+
#{sprintf("%6d", match_in_swissindex)} are the same in SwissMedic and BAG
|
222
|
+
#{sprintf("%6d", different_atc_in_swissmedic)} are different in SwissMedic and BAG
|
223
|
+
#{sprintf("%6d", shorter_in_swissmedic)} are shorter in SwissMedic than in BAG
|
224
|
+
#{sprintf("%6d", longer_in_swissindex)} are longer in SwissMedic than in BAG
|
225
|
+
Comparing ATC-Codes between BAG and Swissindex
|
226
|
+
#{sprintf("%6d", matching_atc_codes)} items had the same ATC code in BAG, SwissIndex and SwissMedic
|
227
|
+
#{sprintf("%6d", match_in_swissindex)} are the same in SwissIndex and BAG
|
228
|
+
#{sprintf("%6d", different_atc_in_swissindex)} are different in SwissMedic and BAG
|
229
|
+
#{sprintf("%6d", shorter_in_swissindex)} are shorter in SwissIndex than in BAG
|
230
|
+
#{sprintf("%6d", longer_in_swissindex)} are longer in SwissIndex than in BAG
|
231
|
+
"
|
232
|
+
end
|
233
|
+
|
234
|
+
def check_swissmedic
|
235
|
+
matching = 0
|
236
|
+
not_in_bag = 0
|
237
|
+
not_in_swissindex = 0
|
238
|
+
matching_atc_codes = 0
|
239
|
+
shorter_in_swissmedic = 0
|
240
|
+
longer_in_swissindex = 0
|
241
|
+
different_atc = 0
|
242
|
+
@data_swissmedic.each{
|
243
|
+
|gtin, item|
|
244
|
+
if @data_bag[gtin] and @data_swissindex[gtin] and @data_bag[gtin][1] == @data_swissindex[gtin][1]
|
245
|
+
matching += 1
|
246
|
+
next
|
247
|
+
end
|
248
|
+
unless @data_swissindex[gtin]
|
249
|
+
Util.debug_msg "#{gtin}: Not in SwissIndex #{item}"
|
250
|
+
not_in_swissindex += 1
|
251
|
+
next
|
252
|
+
end
|
253
|
+
if item[:atc_code] == @data_swissindex[gtin][:atc_code]
|
254
|
+
Util.debug_msg "SwissIndex #{gtin}: ATC code #{item[:atc_code]} matches swissindex #{@data_swissindex[gtin][:atc_code]}"
|
255
|
+
matching_atc_codes += 1
|
256
|
+
elsif item[:atc_code].length < @data_swissindex[gtin][:atc_code].length
|
257
|
+
longer_in_swissindex += 1
|
258
|
+
Util.debug_msg "SwissIndex #{gtin}: ATC code #{item[:atc_code]} longer in swissindex #{@data_swissindex[gtin][:atc_code]}"
|
259
|
+
elsif item[:atc_code].length > @data_swissindex[gtin][:atc_code].length
|
260
|
+
shorter_in_swissmedic += 1
|
261
|
+
Util.debug_msg "SwissIndex #{gtin}: ATC code #{item[:atc_code]} shorter in swissindex #{@data_swissindex[gtin][:atc_code]}"
|
262
|
+
else
|
263
|
+
different_atc += 1
|
264
|
+
Util.debug_msg "SwissIndex #{gtin}: ATC code #{item[:atc_code]} differs from swissindex #{@data_swissindex[gtin][:atc_code]}"
|
265
|
+
end
|
266
|
+
unless @data_bag[gtin]
|
267
|
+
Util.debug_msg "#{gtin}: Not in BAG #{item}"
|
268
|
+
not_in_bag += 1
|
269
|
+
next
|
270
|
+
end
|
271
|
+
}
|
272
|
+
Util.info "Result of verifing data from swissmedic:
|
273
|
+
SwissMedic had #{@data_swissmedic.size} entries. Fetched from #{@swissmedic.origin}
|
274
|
+
SwissIndex #{@data_swissindex.size} entries. Fetched from #{@swissindex.origin}
|
275
|
+
BAG #{@data_bag.size} entries. #{@bag_entries_without_gtin.size} entries had no GTIN field. Fetched from #{@bag.origin}
|
276
|
+
Matching #{matching} items.
|
277
|
+
Not in BAG #{not_in_bag}
|
278
|
+
Not in SwissIndex #{not_in_swissindex}
|
279
|
+
Comparing ATC-Codes between Swissmedic and Swissindex
|
280
|
+
#{sprintf("%6d", matching_atc_codes)} match
|
281
|
+
#{sprintf("%6d", different_atc)} are different
|
282
|
+
#{sprintf("%6d", matching_atc_codes)} are the same in SwissIndex and SwissMedic
|
283
|
+
#{sprintf("%6d", shorter_in_swissmedic)} are shorter in SwissIndex
|
284
|
+
#{sprintf("%6d", longer_in_swissindex)} are longer in SwissIndex
|
285
|
+
"
|
286
|
+
end
|
287
|
+
|
288
|
+
def compare
|
289
|
+
all_gtin = @data_bag.merge(@data_swissindex).merge(@data_swissmedic).sort
|
290
|
+
matching_atc_codes = 0
|
291
|
+
not_in_bag = 0
|
292
|
+
not_in_swissmedic = 0
|
293
|
+
not_in_swissindex = 0
|
294
|
+
different_atc = 0
|
295
|
+
all_gtin.each{
|
296
|
+
|gtin, item|
|
297
|
+
if @data_bag[gtin] and @data_swissindex[gtin] and @data_swissmedic[gtin] and
|
298
|
+
@data_bag[gtin][:atc_code] == @data_swissindex[gtin][:atc_code] and
|
299
|
+
@data_bag[gtin][:atc_code] == @data_swissindex[gtin][:atc_code]
|
300
|
+
matching_atc_codes += 1
|
301
|
+
next
|
302
|
+
end
|
303
|
+
unless @data_swissmedic[gtin]
|
304
|
+
Util.debug_msg "#{gtin}: Not in SwissMedic #{item}"
|
305
|
+
not_in_swissmedic += 1
|
306
|
+
next
|
307
|
+
end
|
308
|
+
unless @data_swissindex[gtin]
|
309
|
+
Util.debug_msg "#{gtin}: Not in SwissIndex #{item}"
|
310
|
+
not_in_swissindex += 1
|
311
|
+
next
|
312
|
+
end
|
313
|
+
unless @data_bag[gtin]
|
314
|
+
Util.debug_msg "#{gtin}: Not in BAG #{item}"
|
315
|
+
not_in_bag += 1
|
316
|
+
next
|
317
|
+
end
|
318
|
+
different_atc += 1
|
319
|
+
Util.debug_msg "#{gtin}: ATC code differs BAG #{@data_bag[gtin][:atc_code]} swissindex #{@data_swissindex[gtin][:atc_code]}"
|
320
|
+
}
|
321
|
+
Util.info "Comparing all GTIN-codes:
|
322
|
+
Found infos about #{all_gtin.size} entries
|
323
|
+
BAG #{@data_bag.size} entries. #{@bag_entries_without_gtin.size} entries had no GTIN field. Fetched from #{@bag.origin}
|
324
|
+
SwissIndex #{@data_swissindex.size} entries. Fetched from #{@swissindex.origin}
|
325
|
+
SwissMedic #{@data_swissmedic.size} entries. Fetched from #{@swissmedic.origin}
|
326
|
+
#{sprintf("%6d", matching_atc_codes)} items had the same ATC code in BAG, SwissIndex and SwissMedic
|
327
|
+
#{sprintf("%6d", not_in_bag)} not in BAG
|
328
|
+
#{sprintf("%6d", not_in_swissindex)} not in SwissIndex
|
329
|
+
#{sprintf("%6d", not_in_swissmedic)} not in SwissMedic
|
330
|
+
#{sprintf("%6d", different_atc)} ATC-Codes differed
|
331
|
+
"
|
332
|
+
end
|
333
|
+
end
|
334
|
+
class Swissmedic
|
335
|
+
def Swissmedic.get_latest
|
336
|
+
Util.debug_msg 'test'
|
337
|
+
@index_url = 'https://www.swissmedic.ch/arzneimittel/00156/00221/00222/00230/index.html?lang=de'
|
338
|
+
Util.debug_msg("SwissmedicPlugin @index_url #{@index_url}")
|
339
|
+
latest_name, target = Util.get_latest_and_dated_name('Packungen', '.xlsx')
|
340
|
+
if File.exist?(target)
|
341
|
+
Util.debug_msg "#{__FILE__}: #{__LINE__} skip writing #{target} as it already exists and is #{File.size(target)} bytes."
|
342
|
+
return target
|
343
|
+
end
|
344
|
+
Util.debug_msg "target #{target} #{latest_name}"
|
345
|
+
latest = ''
|
346
|
+
if(File.exist? latest_name)
|
347
|
+
latest = File.read latest_name
|
348
|
+
return latest_name
|
349
|
+
end
|
350
|
+
|
351
|
+
agent=Mechanize.new
|
352
|
+
page = agent.get @index_url
|
353
|
+
links = page.links.select do |link|
|
354
|
+
/Packungen/iu.match link.attributes['title']
|
355
|
+
end
|
356
|
+
link = links.first or raise "could not identify url to Packungen.xlsx"
|
357
|
+
file = agent.get(link.href)
|
358
|
+
download = file.body
|
359
|
+
|
360
|
+
if(download[-1] != ?\n)
|
361
|
+
download << "\n"
|
362
|
+
end
|
363
|
+
if(!File.exist?(latest_name) or download.size != File.size(latest_name))
|
364
|
+
File.open(target, 'w') { |fh| fh.puts(download) }
|
365
|
+
msg = "#{__FILE__}: #{__LINE__} updated download.size is #{download.size} -> #{target} #{File.size(target)}"
|
366
|
+
msg += "#{target} now #{File.size(target)} bytes != #{latest_name} #{File.size(latest_name)}" if File.exists?(latest_name)
|
367
|
+
Util.debug_msg(msg)
|
368
|
+
target
|
369
|
+
else
|
370
|
+
Util.debug_msg "#{__FILE__}: #{__LINE__} skip writing #{target} as #{latest_name} is #{File.size(latest_name)} bytes. Returning latest"
|
371
|
+
nil
|
372
|
+
end
|
373
|
+
end
|
374
|
+
|
375
|
+
end
|
376
|
+
end
|