gtin2atc 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Rakefile ADDED
@@ -0,0 +1,43 @@
1
+ #!/usr/bin/env ruby
2
+ # encoding: utf-8
3
+ lib = File.expand_path('../lib', __FILE__)
4
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
5
+ require 'gtin2atc/version'
6
+ require "bundler/gem_tasks"
7
+ require 'rspec/core/rake_task'
8
+
9
+ RSpec::Core::RakeTask.new(:spec)
10
+
11
+ # dependencies are now declared in gtin2atc.gemspec
12
+
13
+ desc 'Offer a gem task like hoe'
14
+ task :gem => :build do
15
+ Rake::Task[:build].invoke
16
+ end
17
+
18
+ task :spec => :clean
19
+
20
+ desc 'Run gtin2atc with all commonly used combinations'
21
+ task :test => [:clean, :spec, :gem] do
22
+ log_file = 'test_options.log'
23
+ puts "Running test_options.rb with Output redirected to #{log_file}. This will take some time (e.g. 20 minutes)"
24
+ # must use bash -o pipefail to catch error in test_options.rb and not tee
25
+ # see http://stackoverflow.com/questions/985876/tee-and-exit-status
26
+ res = system("bash -c 'set -o pipefail && ./test_options.rb 2>&1 | tee #{log_file}'")
27
+ puts "Running test_options.rb returned #{res.inspect}. Output was redirected to #{log_file}"
28
+ exit 1 unless res
29
+ end
30
+
31
+ require 'rake/clean'
32
+ CLEAN.include FileList['pkg/*.gem']
33
+ CLEAN.include FileList['*.zip*']
34
+ CLEAN.include FileList['*.xls*']
35
+ CLEAN.include FileList['*.xml*']
36
+ CLEAN.include FileList['*.dat*']
37
+ CLEAN.include FileList['*.tar.gz']
38
+ CLEAN.include FileList['*.txt.*']
39
+ CLEAN.include FileList['*.csv.*']
40
+ CLEAN.include FileList['*.zip.*']
41
+ CLEAN.include FileList['ruby*.tmp']
42
+ CLEAN.include FileList['data/download']
43
+ CLEAN.include FileList['duplicate_ean13_from_zur_rose.txt']
data/bin/gtin2atc ADDED
@@ -0,0 +1,34 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'pathname'
4
+
5
+ root = Pathname.new(__FILE__).realpath.parent.parent
6
+ $:.unshift root.join('lib') if $0 == __FILE__
7
+
8
+ require 'optparse'
9
+ require 'gtin2atc'
10
+ require 'gtin2atc/builder'
11
+ require "gtin2atc/util"
12
+
13
+ options = Gtin2atc::Options.new
14
+
15
+ begin
16
+ options.parser.parse!(ARGV)
17
+ rescue OptionParser::MissingArgument,
18
+ OptionParser::InvalidArgument,
19
+ OptionParser::InvalidOption
20
+ puts Gtin2atc::Options.help
21
+ exit
22
+ end
23
+
24
+ opts = options.opts
25
+ startTime = Time.now
26
+ if ARGV.size == 1 and File.exists?(ARGV[0])
27
+ args = []
28
+ IO.readlines(ARGV[0]).each{ |x| args << x.chomp}
29
+ else
30
+ args = ARGV.clone
31
+ end
32
+ Gtin2atc::Builder.new(opts).run(args)
33
+ diff = (Time.now-startTime).to_i
34
+ Gtin2atc::Util.debug_msg "#{File.basename(__FILE__)} done. Took #{diff} seconds"
data/gtin2atc.gemspec ADDED
@@ -0,0 +1,38 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'gtin2atc/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "gtin2atc"
8
+ spec.version = Gtin2atc::VERSION
9
+ spec.author = "Niklaus Giger, Zeno R.R. Davatz"
10
+ spec.email = "ngiger@ywesee.com, zdavatz@ywesee.com"
11
+ spec.description = "gtin2atc file with gtin, atc_code, pharmanr from input file with gtin"
12
+ spec.summary = "gtin2atc creates csv files with GTIN and ATC."
13
+ spec.homepage = "https://github.com/zdavatz/gtin2atc"
14
+ spec.license = "GPL-v2"
15
+ spec.files = `git ls-files -z`.split("\x0")
16
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
17
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
18
+ spec.require_paths = ["lib"]
19
+
20
+ # We fix the version of the spec to newer versions only in the third position
21
+ # hoping that these version fix only security/severe bugs
22
+ # Consulted the Gemfile.lock to get
23
+ spec.add_dependency 'rubyzip', '~> 1.1.3'
24
+ # spec.add_dependency 'archive-tar-minitar', '~> 0.5.2'
25
+ spec.add_dependency 'mechanize', '~> 2.5.1'
26
+ spec.add_dependency 'nokogiri', '~> 1.5.10'
27
+ spec.add_dependency 'savon'#, '~> 2.4.0'
28
+ # spec.add_dependency 'spreadsheet', '~> 1.0.0'
29
+ spec.add_dependency 'rubyXL'
30
+ spec.add_dependency 'sax-machine' #, '~> 0.1.0'
31
+
32
+ spec.add_development_dependency "bundler"
33
+ spec.add_development_dependency "rake"
34
+ spec.add_development_dependency "rspec"
35
+ spec.add_development_dependency "webmock"
36
+ spec.add_development_dependency "rdoc"
37
+ end
38
+
@@ -0,0 +1,376 @@
1
+ require 'csv'
2
+ require 'rubyXL'
3
+ require "gtin2atc/options"
4
+ require "gtin2atc/downloader"
5
+ require "gtin2atc/xml_definitions"
6
+ require 'mechanize'
7
+
8
+ module Gtin2atc
9
+ class Builder
10
+ Strip_For_Sax_Machine = '<?xml version="1.0" encoding="utf-8"?>'+"\n"
11
+ def initialize(opts)
12
+ Util.set_logging(opts[:log])
13
+ @do_compare = opts[:compare]
14
+ Util.debug_msg "Builder: opts are #{opts} @do_compare is #{@do_compare}"
15
+ @data_swissmedic = {}
16
+ @data_bag = {}
17
+ @data_swissindex = {}
18
+ @bag_entries_without_gtin = 0
19
+ end
20
+ def calc_checksum(str)
21
+ str = str.strip
22
+ sum = 0
23
+ val = str.split(//u)
24
+ 12.times do |idx|
25
+ fct = ((idx%2)*2)+1
26
+ sum += fct*val[idx].to_i
27
+ end
28
+ ((10-(sum%10))%10).to_s
29
+ end
30
+ def swissmedic_xls_extractor
31
+ @swissmedic = SwissmedicDownloader.new
32
+ filename = @swissmedic.download
33
+ Util.debug_msg "swissmedic_xls_extractor xml is #{filename}"
34
+ data = {}
35
+ @sheet = RubyXL::Parser.parse(File.expand_path(filename)).worksheets[0]
36
+ i_5,i_3 = 0,10 # :swissmedic_numbers
37
+ atc = 5 # :atc_code
38
+ @sheet.each_with_index do |row, i|
39
+ next if (i <= 1)
40
+ next unless row[i_5] and row[i_3]
41
+ no8 = sprintf('%05d',row[i_5].value.to_i) + sprintf('%03d',row[i_3].value.to_i)
42
+ unless no8.empty?
43
+ next if no8.to_i == 0
44
+ item = {}
45
+ ean_base12 = "7680#{no8}"
46
+ gtin = (ean_base12.ljust(12, '0') + calc_checksum(ean_base12)).to_i
47
+ item = {}
48
+ item[:gtin] = gtin
49
+ item[:atc_code] = row[atc] ? row[atc].value.to_s : ''
50
+ item[:name] = row[2].value.to_s
51
+ data[gtin] = item
52
+ end
53
+ end
54
+ Util.debug_msg "swissmedic_xls_extractor extracted #{data.size} items"
55
+ data
56
+ end
57
+ def swissindex_xml_extractor
58
+ @swissindex = SwissIndexDownloader.new
59
+ xml = @swissindex.download
60
+ Util.debug_msg "swissindex_xml_extractor xml is #{xml.size} bytes long"
61
+ data = {}
62
+ result = PharmaEntry.parse(xml.sub(Strip_For_Sax_Machine, ''), :lazy => true)
63
+ items = result.PHARMA.ITEM
64
+ items.each do |pac|
65
+ item = {}
66
+ gtin = pac.GTIN ? pac.GTIN.to_i : nil
67
+ next unless item[:gtin].to_i
68
+ item[:gtin] = gtin
69
+ item[:pharmacode] = (phar = pac.PHAR) ? phar: ''
70
+ item[:atc_code] = (code = pac.ATC) ? code.to_s : ''
71
+ item[:description] = pac.DSCR
72
+ data[gtin] = item
73
+ end
74
+ Util.debug_msg "swissindex_xml_extractor extracted #{data.size} items"
75
+ data
76
+ end
77
+ def bag_xml_extractor
78
+ data = {}
79
+ @bag = BagXmlDownloader.new
80
+ xml = @bag.download
81
+ Util.debug_msg "bag_xml_extractor xml is #{xml.size} bytes long"
82
+
83
+ result = PreparationsEntry.parse(xml.sub(Strip_For_Sax_Machine, ''), :lazy => true)
84
+ @bag_entries_without_gtin = 0
85
+ result.Preparations.Preparation.each do |seq|
86
+ item = {}
87
+ item[:atc_code] = (atcc = seq.AtcCode) ? atcc : ''
88
+ seq.Packs.Pack.each do |pac|
89
+ gtin = pac.GTIN
90
+ if gtin
91
+ gtin = gtin.to_i
92
+ item[:gtin] = gtin
93
+ item[:name] = seq.NameDe + " " + pac.DescriptionDe
94
+ data[gtin] = item
95
+ Util.debug_msg "run_bag_extractor add #{item}" if $VERBOSE
96
+ else
97
+ @bag_entries_without_gtin += 1
98
+ Util.debug_msg "run_bag_extractor skip phar #{seq.NameDe}: #{seq.DescriptionDe} without gtin."
99
+ end
100
+ end
101
+ end
102
+ Util.debug_msg "bag_xml_extractor extracted #{data.size} items. Skipped #{@bag_entries_without_gtin} entries without gtin"
103
+ data
104
+ end
105
+ def run(gtins_to_parse=[])
106
+ Util.debug_msg("run #{gtins_to_parse}")
107
+ Util.debug_msg("@use_swissindex true")
108
+ @data_swissindex = swissindex_xml_extractor
109
+ output_name = File.join(Util.get_archive, @do_compare ? 'gtin2atc_swissindex.csv' : 'gtin2atc.csv')
110
+ CSV.open(output_name,'w+') do |csvfile|
111
+ csvfile << ["gtin", "ATC", 'pharmacode', 'description']
112
+ @data_swissindex.sort.each do |gtin, item|
113
+ if @do_compare or gtins_to_parse.size == 0 or
114
+ gtins_to_parse.index(gtin.to_s) or
115
+ gtins_to_parse.index(item[:pharmacode])
116
+ csvfile << [gtin, item[:atc_code], item[:pharmacode], item[:description]]
117
+ end
118
+ end
119
+ end
120
+ msg = "SwissIndex: Extracted #{gtins_to_parse.size} of #{@data_swissindex.size} items into #{output_name} for #{gtins_to_parse}"
121
+ Util.debug_msg(msg)
122
+ return unless @do_compare
123
+ @data_bag = bag_xml_extractor
124
+ output_name = File.join(Util.get_archive, 'gtin2atc_bag.csv')
125
+ CSV.open(output_name,'w+') do |csvfile|
126
+ csvfile << ["gtin", "ATC", 'description']
127
+ @data_bag.sort.each do |gtin, item|
128
+ csvfile << [gtin, item[:atc_code], item[:description]]
129
+ end
130
+ end
131
+ Util.debug_msg "BAG: Extracted #{gtins_to_parse.size} of #{@data_bag.size} items into #{output_name} for #{gtins_to_parse}"
132
+ @data_swissmedic = swissmedic_xls_extractor
133
+ output_name = File.join(Util.get_archive, 'gtin2atc_swissmedic.csv')
134
+ CSV.open(output_name,'w+') do |csvfile|
135
+ csvfile << ["gtin", "ATC", 'description']
136
+ @data_swissmedic.sort.each do |gtin, item|
137
+ csvfile << [gtin, item[:atc_code], item[:pharmacode], item[:description]]
138
+ end
139
+ end
140
+ Util.debug_msg "SwissMedic: Extracted #{@data_swissmedic.size} items into #{output_name}"
141
+ check_bag
142
+ check_swissmedic
143
+ compare
144
+ end
145
+ # require 'pry';
146
+ def check_bag
147
+ matching_atc_codes = 0
148
+
149
+ not_in_swissmedic = 0
150
+ match_in_swissmedic = 0
151
+ shorter_in_swissmedic = 0
152
+ longer_in_swissmedic = 0
153
+ different_atc_in_swissmedic = 0
154
+
155
+ not_in_swissindex = 0
156
+ match_in_swissindex = 0
157
+ shorter_in_swissindex = 0
158
+ longer_in_swissindex = 0
159
+ different_atc_in_swissindex = 0
160
+ j = 0
161
+ @data_bag.each{
162
+ |gtin, item|
163
+ atc_code = item[:atc_code]
164
+ j += 1
165
+ Util.debug_msg "#{gtin}: j #{j} checking #{atc_code} in #{item}"
166
+ if @data_swissmedic[gtin] and @data_swissindex[gtin] and
167
+ atc_code == @data_swissmedic[gtin][:atc_code] and
168
+ atc_code == @data_swissindex[gtin][:atc_code]
169
+ Util.debug_msg "#{gtin}: matching_atc_codes SwissIndex #{item} #{@data_swissmedic[gtin][:atc_code]} and #{@data_swissindex[gtin][:atc_code]}"
170
+ matching_atc_codes += 1
171
+ next
172
+ end
173
+
174
+ if not @data_swissindex[gtin]
175
+ Util.debug_msg "#{gtin}: Not in SwissIndex #{item}"
176
+ not_in_swissindex += 1
177
+ elsif atc_code == @data_swissindex[gtin][:atc_code]
178
+ Util.debug_msg "SwissIndex #{gtin}: ATC code #{atc_code} matches swissindex #{@data_swissindex[gtin][:atc_code]}"
179
+ match_in_swissindex += 1
180
+ elsif atc_code.length < @data_swissindex[gtin][:atc_code].length
181
+ longer_in_swissindex += 1
182
+ Util.debug_msg "SwissIndex #{gtin}: ATC code #{atc_code} longer in swissindex #{@data_swissindex[gtin][:atc_code]}"
183
+ elsif atc_code.length > @data_swissindex[gtin][:atc_code].length
184
+ shorter_in_swissindex += 1
185
+ Util.debug_msg "SwissIndex #{gtin}: ATC code #{atc_code} shorter in swissindex #{@data_swissindex[gtin][:atc_code]}"
186
+ else
187
+ different_atc_in_swissindex += 1
188
+ Util.debug_msg "SwissIndex #{gtin}: ATC code #{atc_code} differs from swissindex #{@data_swissindex[gtin][:atc_code]}"
189
+ end
190
+
191
+ if not @data_swissmedic[gtin]
192
+ Util.debug_msg "#{gtin}: Not in SwissMedic #{item}"
193
+ not_in_swissmedic += 1
194
+ elsif atc_code == @data_swissmedic[gtin][:atc_code]
195
+ Util.debug_msg "SwissMedic #{gtin}: ATC code #{atc_code} matches swissmedic #{@data_swissmedic[gtin][:atc_code]}"
196
+ match_in_swissmedic += 1
197
+ elsif atc_code.length < @data_swissmedic[gtin][:atc_code].length
198
+ longer_in_swissmedic += 1
199
+ Util.debug_msg "SwissMedic #{gtin}: ATC code #{atc_code} longer in swissmedic #{@data_swissmedic[gtin][:atc_code]}"
200
+ elsif atc_code.length > @data_swissmedic[gtin][:atc_code].length
201
+ shorter_in_swissmedic += 1
202
+ Util.debug_msg "SwissMedic #{gtin}: ATC code #{atc_code} shorter in swissmedic #{@data_swissmedic[gtin][:atc_code]}"
203
+ else
204
+ different_atc_in_swissmedic += 1
205
+ Util.debug_msg "SwissMedic #{gtin}: ATC code #{atc_code} differs from swissmedic #{@data_swissmedic[gtin][:atc_code]}"
206
+ end
207
+ total1 = not_in_swissindex + match_in_swissindex + longer_in_swissindex + shorter_in_swissindex + different_atc_in_swissindex
208
+ total2 = not_in_swissmedic + match_in_swissmedic + longer_in_swissmedic + shorter_in_swissmedic + different_atc_in_swissmedic
209
+ # binding.pry if j != (total1 + matching_atc_codes)
210
+ # binding.pry if j != (total2 + matching_atc_codes)
211
+ # Util.debug_msg "#{gtin}: j #{j} finished #{total1} #{total2} #{atc_code} matching_atc_codes #{matching_atc_codes}"
212
+ }
213
+ Util.info "Result of verifing data from BAG (SL):
214
+ BAG-data fetched from #{@bag.origin}.
215
+ BAG had #{@data_bag.size} entries
216
+ #{@bag_entries_without_gtin.size} entries had no GTIN field
217
+ Not in SwissMedic #{not_in_swissmedic}
218
+ Not in SwissIndex #{not_in_swissindex}
219
+ Comparing ATC-Codes between BAG and Swissmedic
220
+ #{sprintf("%6d", matching_atc_codes)} items had the same ATC code in BAG, SwissIndex and SwissMedic
221
+ #{sprintf("%6d", match_in_swissindex)} are the same in SwissMedic and BAG
222
+ #{sprintf("%6d", different_atc_in_swissmedic)} are different in SwissMedic and BAG
223
+ #{sprintf("%6d", shorter_in_swissmedic)} are shorter in SwissMedic than in BAG
224
+ #{sprintf("%6d", longer_in_swissindex)} are longer in SwissMedic than in BAG
225
+ Comparing ATC-Codes between BAG and Swissindex
226
+ #{sprintf("%6d", matching_atc_codes)} items had the same ATC code in BAG, SwissIndex and SwissMedic
227
+ #{sprintf("%6d", match_in_swissindex)} are the same in SwissIndex and BAG
228
+ #{sprintf("%6d", different_atc_in_swissindex)} are different in SwissMedic and BAG
229
+ #{sprintf("%6d", shorter_in_swissindex)} are shorter in SwissIndex than in BAG
230
+ #{sprintf("%6d", longer_in_swissindex)} are longer in SwissIndex than in BAG
231
+ "
232
+ end
233
+
234
+ def check_swissmedic
235
+ matching = 0
236
+ not_in_bag = 0
237
+ not_in_swissindex = 0
238
+ matching_atc_codes = 0
239
+ shorter_in_swissmedic = 0
240
+ longer_in_swissindex = 0
241
+ different_atc = 0
242
+ @data_swissmedic.each{
243
+ |gtin, item|
244
+ if @data_bag[gtin] and @data_swissindex[gtin] and @data_bag[gtin][1] == @data_swissindex[gtin][1]
245
+ matching += 1
246
+ next
247
+ end
248
+ unless @data_swissindex[gtin]
249
+ Util.debug_msg "#{gtin}: Not in SwissIndex #{item}"
250
+ not_in_swissindex += 1
251
+ next
252
+ end
253
+ if item[:atc_code] == @data_swissindex[gtin][:atc_code]
254
+ Util.debug_msg "SwissIndex #{gtin}: ATC code #{item[:atc_code]} matches swissindex #{@data_swissindex[gtin][:atc_code]}"
255
+ matching_atc_codes += 1
256
+ elsif item[:atc_code].length < @data_swissindex[gtin][:atc_code].length
257
+ longer_in_swissindex += 1
258
+ Util.debug_msg "SwissIndex #{gtin}: ATC code #{item[:atc_code]} longer in swissindex #{@data_swissindex[gtin][:atc_code]}"
259
+ elsif item[:atc_code].length > @data_swissindex[gtin][:atc_code].length
260
+ shorter_in_swissmedic += 1
261
+ Util.debug_msg "SwissIndex #{gtin}: ATC code #{item[:atc_code]} shorter in swissindex #{@data_swissindex[gtin][:atc_code]}"
262
+ else
263
+ different_atc += 1
264
+ Util.debug_msg "SwissIndex #{gtin}: ATC code #{item[:atc_code]} differs from swissindex #{@data_swissindex[gtin][:atc_code]}"
265
+ end
266
+ unless @data_bag[gtin]
267
+ Util.debug_msg "#{gtin}: Not in BAG #{item}"
268
+ not_in_bag += 1
269
+ next
270
+ end
271
+ }
272
+ Util.info "Result of verifing data from swissmedic:
273
+ SwissMedic had #{@data_swissmedic.size} entries. Fetched from #{@swissmedic.origin}
274
+ SwissIndex #{@data_swissindex.size} entries. Fetched from #{@swissindex.origin}
275
+ BAG #{@data_bag.size} entries. #{@bag_entries_without_gtin.size} entries had no GTIN field. Fetched from #{@bag.origin}
276
+ Matching #{matching} items.
277
+ Not in BAG #{not_in_bag}
278
+ Not in SwissIndex #{not_in_swissindex}
279
+ Comparing ATC-Codes between Swissmedic and Swissindex
280
+ #{sprintf("%6d", matching_atc_codes)} match
281
+ #{sprintf("%6d", different_atc)} are different
282
+ #{sprintf("%6d", matching_atc_codes)} are the same in SwissIndex and SwissMedic
283
+ #{sprintf("%6d", shorter_in_swissmedic)} are shorter in SwissIndex
284
+ #{sprintf("%6d", longer_in_swissindex)} are longer in SwissIndex
285
+ "
286
+ end
287
+
288
+ def compare
289
+ all_gtin = @data_bag.merge(@data_swissindex).merge(@data_swissmedic).sort
290
+ matching_atc_codes = 0
291
+ not_in_bag = 0
292
+ not_in_swissmedic = 0
293
+ not_in_swissindex = 0
294
+ different_atc = 0
295
+ all_gtin.each{
296
+ |gtin, item|
297
+ if @data_bag[gtin] and @data_swissindex[gtin] and @data_swissmedic[gtin] and
298
+ @data_bag[gtin][:atc_code] == @data_swissindex[gtin][:atc_code] and
299
+ @data_bag[gtin][:atc_code] == @data_swissindex[gtin][:atc_code]
300
+ matching_atc_codes += 1
301
+ next
302
+ end
303
+ unless @data_swissmedic[gtin]
304
+ Util.debug_msg "#{gtin}: Not in SwissMedic #{item}"
305
+ not_in_swissmedic += 1
306
+ next
307
+ end
308
+ unless @data_swissindex[gtin]
309
+ Util.debug_msg "#{gtin}: Not in SwissIndex #{item}"
310
+ not_in_swissindex += 1
311
+ next
312
+ end
313
+ unless @data_bag[gtin]
314
+ Util.debug_msg "#{gtin}: Not in BAG #{item}"
315
+ not_in_bag += 1
316
+ next
317
+ end
318
+ different_atc += 1
319
+ Util.debug_msg "#{gtin}: ATC code differs BAG #{@data_bag[gtin][:atc_code]} swissindex #{@data_swissindex[gtin][:atc_code]}"
320
+ }
321
+ Util.info "Comparing all GTIN-codes:
322
+ Found infos about #{all_gtin.size} entries
323
+ BAG #{@data_bag.size} entries. #{@bag_entries_without_gtin.size} entries had no GTIN field. Fetched from #{@bag.origin}
324
+ SwissIndex #{@data_swissindex.size} entries. Fetched from #{@swissindex.origin}
325
+ SwissMedic #{@data_swissmedic.size} entries. Fetched from #{@swissmedic.origin}
326
+ #{sprintf("%6d", matching_atc_codes)} items had the same ATC code in BAG, SwissIndex and SwissMedic
327
+ #{sprintf("%6d", not_in_bag)} not in BAG
328
+ #{sprintf("%6d", not_in_swissindex)} not in SwissIndex
329
+ #{sprintf("%6d", not_in_swissmedic)} not in SwissMedic
330
+ #{sprintf("%6d", different_atc)} ATC-Codes differed
331
+ "
332
+ end
333
+ end
334
+ class Swissmedic
335
+ def Swissmedic.get_latest
336
+ Util.debug_msg 'test'
337
+ @index_url = 'https://www.swissmedic.ch/arzneimittel/00156/00221/00222/00230/index.html?lang=de'
338
+ Util.debug_msg("SwissmedicPlugin @index_url #{@index_url}")
339
+ latest_name, target = Util.get_latest_and_dated_name('Packungen', '.xlsx')
340
+ if File.exist?(target)
341
+ Util.debug_msg "#{__FILE__}: #{__LINE__} skip writing #{target} as it already exists and is #{File.size(target)} bytes."
342
+ return target
343
+ end
344
+ Util.debug_msg "target #{target} #{latest_name}"
345
+ latest = ''
346
+ if(File.exist? latest_name)
347
+ latest = File.read latest_name
348
+ return latest_name
349
+ end
350
+
351
+ agent=Mechanize.new
352
+ page = agent.get @index_url
353
+ links = page.links.select do |link|
354
+ /Packungen/iu.match link.attributes['title']
355
+ end
356
+ link = links.first or raise "could not identify url to Packungen.xlsx"
357
+ file = agent.get(link.href)
358
+ download = file.body
359
+
360
+ if(download[-1] != ?\n)
361
+ download << "\n"
362
+ end
363
+ if(!File.exist?(latest_name) or download.size != File.size(latest_name))
364
+ File.open(target, 'w') { |fh| fh.puts(download) }
365
+ msg = "#{__FILE__}: #{__LINE__} updated download.size is #{download.size} -> #{target} #{File.size(target)}"
366
+ msg += "#{target} now #{File.size(target)} bytes != #{latest_name} #{File.size(latest_name)}" if File.exists?(latest_name)
367
+ Util.debug_msg(msg)
368
+ target
369
+ else
370
+ Util.debug_msg "#{__FILE__}: #{__LINE__} skip writing #{target} as #{latest_name} is #{File.size(latest_name)} bytes. Returning latest"
371
+ nil
372
+ end
373
+ end
374
+
375
+ end
376
+ end