oddb2xml 2.5.0 → 2.5.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (61) hide show
  1. checksums.yaml +4 -4
  2. data/.travis.yml +1 -1
  3. data/Elexis_Artikelstamm_v003.xsd +387 -0
  4. data/Elexis_Artikelstamm_v5.xsd +513 -0
  5. data/Gemfile +2 -6
  6. data/History.txt +11 -0
  7. data/README.md +35 -27
  8. data/artikelstamm.md +68 -0
  9. data/bin/compare_v5 +41 -0
  10. data/bin/oddb2xml +3 -15
  11. data/data/article_overrides.yaml +51859 -0
  12. data/data/gtin2ignore.yaml +30510 -0
  13. data/data/product_overrides.yaml +4 -0
  14. data/lib/oddb2xml/builder.rb +543 -192
  15. data/lib/oddb2xml/cli.rb +82 -62
  16. data/lib/oddb2xml/compare.rb +189 -0
  17. data/lib/oddb2xml/compressor.rb +6 -3
  18. data/lib/oddb2xml/downloader.rb +79 -64
  19. data/lib/oddb2xml/extractor.rb +67 -40
  20. data/lib/oddb2xml/options.rb +76 -77
  21. data/lib/oddb2xml/parslet_compositions.rb +18 -1
  22. data/lib/oddb2xml/util.rb +25 -3
  23. data/lib/oddb2xml/version.rb +1 -1
  24. data/oddb2xml.gemspec +8 -5
  25. data/oddb2xml.xsd +1 -0
  26. data/spec/artikelstamm_spec.rb +383 -0
  27. data/spec/builder_spec.rb +147 -118
  28. data/spec/calc_spec.rb +3 -15
  29. data/spec/cli_spec.rb +24 -35
  30. data/spec/compare_spec.rb +24 -0
  31. data/spec/compressor_spec.rb +1 -3
  32. data/spec/data/Elexis_Artikelstamm_v5.xsd +513 -0
  33. data/spec/data/Preparations.xml +2200 -0
  34. data/spec/data/Publications.xls +0 -0
  35. data/spec/data/artikelstamm_N_010917.xml +39 -0
  36. data/spec/data/artikelstamm_N_011217.xml +17 -0
  37. data/spec/data/artikelstamm_P_010917.xml +86 -0
  38. data/spec/data/artikelstamm_P_011217.xml +63 -0
  39. data/spec/data/oddb2xml_files_lppv.txt +2 -0
  40. data/spec/data/refdata_NonPharma.xml +38 -0
  41. data/spec/data/refdata_Pharma.xml +220 -0
  42. data/spec/data/swissmedic_orphan.xlsx +0 -0
  43. data/spec/data/swissmedic_package.xlsx +0 -0
  44. data/spec/data/transfer.dat +59 -19
  45. data/spec/data/v5_first.xml +102 -0
  46. data/spec/data/v5_second.xml +184 -0
  47. data/spec/data_helper.rb +72 -0
  48. data/spec/downloader_spec.rb +19 -27
  49. data/spec/extractor_spec.rb +27 -33
  50. data/spec/fixtures/vcr_cassettes/artikelstamm.json +1 -0
  51. data/spec/options_spec.rb +73 -66
  52. data/spec/spec_helper.rb +73 -24
  53. data/test_options.rb +4 -2
  54. metadata +100 -21
  55. data/spec/data/XMLPublications.zip +0 -0
  56. data/spec/data/compressor/oddb_article.xml +0 -0
  57. data/spec/data/compressor/oddb_fi.xml +0 -0
  58. data/spec/data/compressor/oddb_fi_product.xml +0 -0
  59. data/spec/data/compressor/oddb_limitation.xml +0 -0
  60. data/spec/data/compressor/oddb_product.xml +0 -0
  61. data/spec/data/compressor/oddb_substance.xml +0 -0
@@ -11,7 +11,7 @@ require 'rubyXL'
11
11
  require 'date' # for today
12
12
 
13
13
  module Oddb2xml
14
-
14
+
15
15
  class Cli
16
16
  attr_reader :options
17
17
  SUBJECTS = %w[product article]
@@ -19,6 +19,7 @@ module Oddb2xml
19
19
  OPTIONALS = %w[fi fi_product]
20
20
  def initialize(args)
21
21
  @options = args
22
+ STDOUT.puts "\nStarting cli with from #{caller[1]} using #{@options}" if defined?(RSpec)
22
23
  Oddb2xml.save_options(@options)
23
24
  @mutex = Mutex.new
24
25
  # product
@@ -38,6 +39,7 @@ module Oddb2xml
38
39
  end
39
40
  def run
40
41
  threads = []
42
+ startTime = Time.now
41
43
  files2rm = Dir.glob(File.join(Downloads, '*'))
42
44
  FileUtils.rm_f(files2rm, :verbose => @options[:log]) if files2rm.size > 0 and not Oddb2xml.skip_download?
43
45
  if @options[:calc] and not @options[:extended]
@@ -79,46 +81,56 @@ module Oddb2xml
79
81
  exit
80
82
  end
81
83
  build
84
+ if @options[:artikelstamm]
85
+ elexis_v5_xsd = File.expand_path(File.join(__FILE__, '..', '..', '..', 'Elexis_Artikelstamm_v5.xsd'))
86
+ cmd = "xmllint --noout --schema #{elexis_v5_xsd} #{@_files[:artikelstamm]}"
87
+ if system(cmd)
88
+ puts "Validatied #{@_files[:artikelstamm]}"
89
+ else
90
+ puts "Validating failed using #{cmd}"
91
+ exit(2)
92
+ end
93
+ end
82
94
  compress if @options[:compress_ext]
83
- report
95
+ res = report
96
+ nrSecs = (Time.now - startTime).to_i
97
+ if defined?(RSpec) && (nrSecs).to_i > 10 then require 'pry'; binding.pry ; end
98
+ res
84
99
  end
85
100
  private
86
101
  def build
87
- Oddb2xml.log("Start build")
88
102
  begin
89
- @_files = {"calc"=>"oddb_calc.xml"} if @options[:calc] and not @options[:extended]
90
- files.each_pair do |sbj, file|
91
- builder = Builder.new(@options) do |builder|
92
- if @options[:calc] and not @options[:extended]
93
- builder.packs = @packs
94
- builder.subject = sbj
95
- elsif @options[:address]
96
- builder.subject = sbj
97
- builder.companies = @companies
98
- builder.people = @people
99
- else # product
100
- if @options[:format] != :dat
101
- refdata = {}
102
- types.each do |type|
103
- refdata.merge!(@refdata_types[type]) if @refdata_types[type]
104
- end
105
- builder.refdata = refdata
106
- builder.subject = sbj
107
- end
108
- # common sources
109
- builder.items = @items
110
- builder.flags = @flags
111
- builder.lppvs = @lppvs
112
- # optional sources
113
- builder.infos = @infos
114
- builder.packs = @packs
115
- # additional sources
116
- %w[actions orphan migel infos_zur_rose].each do |addition|
117
- builder.send("#{addition}=".intern, self.instance_variable_get("@#{addition}"))
103
+ @_files = {"calc"=>"oddb_calc.xml"} if @options[:calc] and not (@options[:extended] || @options[:artikelstamm])
104
+ builder = Builder.new(@options) do |builder|
105
+ if @options[:calc] and not (@options[:extended] || @options[:artikelstamm])
106
+ builder.packs = @packs
107
+ elsif @options[:address]
108
+ builder.companies = @companies
109
+ builder.people = @people
110
+ else # product
111
+ if @options[:format] != :dat
112
+ refdata = {}
113
+ types.each do |type|
114
+ refdata.merge!(@refdata_types[type]) if @refdata_types[type]
118
115
  end
116
+ builder.refdata = refdata
117
+ end
118
+ # common sources
119
+ builder.items = @items
120
+ builder.flags = @flags
121
+ builder.lppvs = @lppvs
122
+ # optional sources
123
+ builder.infos = @infos
124
+ builder.packs = @packs
125
+ # additional sources
126
+ %w[actions orphan migel infos_zur_rose].each do |addition|
127
+ builder.send("#{addition}=".intern, self.instance_variable_get("@#{addition}"))
119
128
  end
120
- builder.tag_suffix = @options[:tag_suffix]
121
129
  end
130
+ builder.tag_suffix = @options[:tag_suffix]
131
+ end
132
+ files.each_pair do |sbj, file|
133
+ builder.subject = sbj
122
134
  output = ''
123
135
  if !@options[:address] and (@options[:format] == :dat)
124
136
  types.each do |type|
@@ -187,14 +199,14 @@ module Oddb2xml
187
199
  when :orphan
188
200
  var = what.to_s
189
201
  begin # instead of Thread.new do
190
- downloader = SwissmedicDownloader.new(what)
202
+ downloader = SwissmedicDownloader.new(what, @options)
191
203
  bin = downloader.download
192
204
  Oddb2xml.log("SwissmedicDownloader #{var} #{bin} #{File.size(bin)} bytes")
193
205
  self.instance_variable_set(
194
206
  "@#{var}",
195
207
  items = SwissmedicExtractor.new(bin, what).to_arry
196
208
  )
197
- Oddb2xml.log("SwissmedicExtractor added #{items.size} #{var}. File #{bin} was #{File.size(bin)} bytes")
209
+ Oddb2xml.log("SwissmedicExtractor added #{items.size}")
198
210
  items
199
211
  end
200
212
  when :interaction
@@ -259,7 +271,7 @@ module Oddb2xml
259
271
  xml = downloader.download
260
272
  Oddb2xml.log("ZurroseDownloader xml #{xml.size} bytes")
261
273
  @mutex.synchronize do
262
- hsh = ZurroseExtractor.new(xml, @options[:extended]).to_hash
274
+ hsh = ZurroseExtractor.new(xml, @options[:extended], @options[:artikelstamm]).to_hash
263
275
  Oddb2xml.log("ZurroseExtractor added #{hsh.size} items from xml with #{xml.size} bytes")
264
276
  @infos_zur_rose = hsh
265
277
  end
@@ -302,7 +314,9 @@ module Oddb2xml
302
314
  unless @_files
303
315
  @_files = {}
304
316
  @_files[:calc] = "oddb_calc.xml" if @options[:calc]
305
- if @options[:address]
317
+ if @options[:artikelstamm]
318
+ @_files[:artikelstamm] = "artikelstamm_#{Date.today.strftime('%d%m%Y')}_v5.xml"
319
+ elsif @options[:address]
306
320
  @_files[:company] = "#{prefix}_betrieb.xml"
307
321
  @_files[:person] = "#{prefix}_medizinalperson.xml"
308
322
  elsif @options[:format] == :dat
@@ -336,41 +350,47 @@ module Oddb2xml
336
350
  lines << Calc.report_conversion
337
351
  lines << ParseComposition.report
338
352
  end
339
- unless @options[:address]
340
- types.each do |type|
341
- if @refdata_types[type]
342
- indices = @refdata_types[type].values.flatten.length
343
- if type == :nonpharma
344
- nonpharmas = @refdata_types[type].keys
345
- if SkipMigelDownloader
346
- indices + nonpharmas.length
353
+ if @options[:artikelstamm]
354
+ lines << "Generated artikelstamm.xml for Elexis"
355
+ lines += Builder.articlestamm_v5_info_lines
356
+ else
357
+ unless @options[:address]
358
+ types.each do |type|
359
+ if @refdata_types[type]
360
+ indices = @refdata_types[type].values.flatten.length
361
+
362
+ if type == :nonpharma
363
+ nonpharmas = @refdata_types[type].keys
364
+ if SkipMigelDownloader
365
+ indices + nonpharmas.length
366
+ else
367
+ migel_xls = @migel.values.compact.select{|m| !m[:pharmacode]}.map{|m| m[:pharmacode] }
368
+ indices += (migel_xls - nonpharmas).length # ignore duplicates, null
369
+ end
370
+ lines << sprintf("\tNonPharma products: %i", indices)
347
371
  else
348
- migel_xls = @migel.values.compact.select{|m| !m[:pharmacode]}.map{|m| m[:pharmacode] }
349
- indices += (migel_xls - nonpharmas).length # ignore duplicates, null
372
+ lines << sprintf("\tPharma products: %i", indices)
350
373
  end
351
- lines << sprintf("\tNonPharma products: %i", indices)
352
- else
353
- lines << sprintf("\tPharma products: %i", indices)
354
374
  end
355
375
  end
356
- end
357
- if @options[:extended]
358
- lines << sprintf("\tInformation items zur Rose: %i", @infos_zur_rose.length)
359
- end
360
- else
361
- {
362
- 'Betrieb' => :@companies,
363
- 'Person' => :@people
364
- }.each do |type, var|
365
- lines << sprintf(
366
- "#{type} addresses: %i", self.instance_variable_get(var).length)
376
+ if (@options[:extended] || @options[:artikelstamm])
377
+ lines << sprintf("\tInformation items zur Rose: %i", @infos_zur_rose.length)
378
+ end
379
+ else
380
+ {
381
+ 'Betrieb' => :@companies,
382
+ 'Person' => :@people
383
+ }.each do |type, var|
384
+ lines << sprintf(
385
+ "#{type} addresses: %i", self.instance_variable_get(var).length)
386
+ end
367
387
  end
368
388
  end
369
389
  puts lines.join("\n")
370
390
  end
371
391
  def types # RefData
372
392
  @_types ||=
373
- if @options[:nonpharma]
393
+ if @options[:nonpharma] || @options[:artikelstamm]
374
394
  [:pharma, :nonpharma]
375
395
  else
376
396
  [:pharma]
@@ -0,0 +1,189 @@
1
+ # encoding: utf-8
2
+ require 'xmlsimple'
3
+
4
+ module Oddb2xml
5
+ def self.log_timestamp(msg)
6
+ full_msg = "#{Time.now.strftime("%H:%M:%S")}: #{msg}"
7
+ puts full_msg
8
+ STDOUT.flush
9
+ full_msg
10
+ end
11
+ class StammXML
12
+ V3_NAME_REG = /_([N,P])_/
13
+ attr_accessor :components
14
+ attr_reader :keys, :sub_key_names, :filename, :basename, :version, :hash
15
+ def initialize(filename, components = ['ITEMS'])
16
+ raise "File #{filename} must exist" unless File.exist?(filename)
17
+ @filename = filename
18
+ @basename = File.basename(filename)
19
+ @version = V3_NAME_REG.match(filename) ? 3 : 5
20
+ @components = components
21
+ if @version == 5
22
+ @hash = load_file(@filename)
23
+ else
24
+ raise "Unsupported version #{@version}"
25
+ end
26
+ end
27
+ def self.get_component_key_name(component_name)
28
+ return 'LIMNAMEBAG' if /LIMITATION/i.match(component_name)
29
+ return 'PRODNO' if /PRODUCT/i.match(component_name)
30
+ return 'GTIN' if /ITEM/i.match(component_name)
31
+ raise "Cannot determine keyname for component #{component_name}"
32
+ end
33
+ def get_limitation_from_v5(item)
34
+ get_item('PRODUCTS', item['PRODNO'].first.to_i)['LIMNAMEBAG'] ? ['true'] : nil
35
+ end
36
+ def get_field_from_v5_product(item, field_name)
37
+ get_item('PRODUCTS', item['PRODNO'].first.to_i)[field_name]
38
+ end
39
+ def get_items(component_name)
40
+ if @version == 3
41
+ items = @hash[component_name]
42
+ else
43
+ items = @hash[component_name].first.values.first
44
+ end
45
+ items
46
+ end
47
+ def get_item(component_name, id)
48
+ keyname = StammXML.get_component_key_name(component_name)
49
+ get_items(component_name).find{|item| item[keyname].first.to_i == id}
50
+ end
51
+ def load_file(name)
52
+ Oddb2xml.log_timestamp "Reading #{name} #{(File.size(name)/1024/1024).to_i} MB. This may take some time"
53
+ XmlSimple.xml_in(IO.read(name))
54
+ end
55
+ end
56
+ class CompareV5
57
+ DEFAULTS = {
58
+ :components => ["PRODUCTS", "LIMITATIONS", "ITEMS",],
59
+ :fields_to_ignore => ['COMP', 'DOSAGE_FORMF', 'MEASUREF'],
60
+ :fields_as_floats => [ 'PEXT', 'PEXF', 'PPUB' ],
61
+ :min_diff_for_floats => 0.01,
62
+ }
63
+ def initialize(left, right, options = DEFAULTS.clone)
64
+ @options = options
65
+ @left = StammXML.new(left, @options[:components])
66
+ @right = StammXML.new(right, @options[:components])
67
+ @diff_stat = {}
68
+ @occurrences = {}
69
+ @report = []
70
+ end
71
+ def get_keys(items, key='GTIN')
72
+ items.collect{|item| item[key].first.to_i }
73
+ end
74
+ def get_names(items)
75
+ items.collect{|item| item.keys}.flatten.uniq.sort
76
+ end
77
+ def compare
78
+ show_header("Start comparing #{@left.filename} with #{@right.filename}")
79
+ (@left.components & @right.components).each do |name|
80
+ begin
81
+ puts "\n#{Time.now.strftime("%H:%M:%S")}: Comparing #{name} in #{@left.basename} with #{@right.basename}"
82
+ key = StammXML.get_component_key_name(name)
83
+ left_items = @left.get_items(name)
84
+ next unless left_items
85
+ right_items = @right.get_items(name)
86
+ next unless right_items
87
+ @diff_stat[name] = {}
88
+ @occurrences[name] = {}
89
+ @diff_stat[name][NR_COMPARED] = 0
90
+ l_names = get_names(left_items)
91
+ r_names = get_names(right_items)
92
+ compare_names = l_names & r_names
93
+ l_keys = get_keys(left_items, key)
94
+ r_keys = get_keys(right_items, key)
95
+ (l_keys & r_keys).each do |id|
96
+ compare_details(name, compare_names, id)
97
+ end
98
+ key_results_details(name, compare_names, l_keys, r_keys)
99
+ rescue => error
100
+ puts "Execution failed with #{error}"
101
+ end
102
+ end
103
+ show_header("Summary comparing #{@left.filename} with #{@right.filename}")
104
+ puts "Ignored differences in #{@options[:fields_to_ignore]}. Signaled when differences in #{@options[:fields_as_floats]} were bigger than #{@options[:min_diff_for_floats]}"
105
+ puts @report.join("\n")
106
+ @diff_stat.each do |component, stats|
107
+ puts "\nFor #{stats[NR_COMPARED]} #{component} we have the following number of differences per field"
108
+ stats.each do |name, nr|
109
+ next if name.eql?(NR_COMPARED)
110
+ next if @options[:fields_to_ignore].index(name)
111
+ puts " #{name.ljust(20)} #{nr} of #{@occurrences[component][name]}"
112
+ end
113
+ end
114
+ @diff_stat
115
+ rescue => error
116
+ puts "Execution failed with #{error}"
117
+ raise error
118
+ end
119
+ private
120
+ NR_COMPARED = 'NR_COMPARED'
121
+ COUNT = '_count'
122
+ def show_header(header)
123
+ text = Oddb2xml.log_timestamp(header)
124
+ pad = 5
125
+ puts
126
+ puts '-'*(text.length+2*pad)
127
+ puts ''.ljust(pad) + text
128
+ puts '-'*(text.length+2*pad)
129
+ puts
130
+ end
131
+ def compare_details(component_name, compare_names, id)
132
+ l_item = @left.get_item(component_name, id)
133
+ r_item = @right.get_item(component_name, id)
134
+ found_one = false
135
+ length = 32
136
+ found = false
137
+ detail_name = l_item['DSCR'] ? l_item['DSCR'].first[0..length-1].rjust(length) : ''.rjust(length)
138
+ details = "Diff in #{id.to_s.ljust(15)} #{detail_name}"
139
+ diff_name = component_name
140
+ diff_name += 'S' unless /S$/.match(diff_name)
141
+ @diff_stat[diff_name] ||= {}
142
+ @occurrences[diff_name] ||= {}
143
+ @diff_stat[diff_name][NR_COMPARED] ||= 0
144
+ @diff_stat[diff_name][NR_COMPARED] += 1
145
+ l_item.keys.each do |sub_key|
146
+ next if @options[:fields_to_ignore].index(sub_key)
147
+ @diff_stat[diff_name][sub_key] ||= 0
148
+ @occurrences[diff_name][sub_key] ||= 0
149
+ @occurrences[diff_name][sub_key] += 1
150
+ r_value = r_item[sub_key]
151
+ l_value = l_item[sub_key]
152
+ if @options[:fields_as_floats].index(sub_key)
153
+ l_float = l_value ? l_value.first.to_f : 0.0
154
+ r_float = r_value ? r_value.first.to_f : 0.0
155
+ next if (l_float - r_float).abs < @options[:min_diff_for_floats]
156
+ end
157
+ next if (r_value.is_a?(Array) && '--missing--'.eql?(r_value.first)) || (l_value.is_a?(Array) && '--missing--'.eql?(l_value.first))
158
+ # TODO: get_field_from_v5_product
159
+ next if r_value.to_s.eql?(l_value.to_s)
160
+ next if r_value.to_s.upcase.eql?(l_value.to_s.upcase) && @options[:case_insensitive]
161
+ details += " #{sub_key}: '#{l_value}' != '#{r_value}'"
162
+ found = found_one = true
163
+ @diff_stat[diff_name][sub_key] += 1
164
+ end
165
+ puts details.gsub(/[\[\]]/,'') if found
166
+ end
167
+
168
+ def show_keys(keys, batch_size = 20)
169
+ 0.upto(keys.size) do |idx|
170
+ next unless idx % batch_size == 0
171
+ puts ' ' + keys[idx..(idx + batch_size-1)].join(' ')
172
+ end
173
+ end
174
+ def key_results_details(component_name, compare_names, l_keys, r_keys)
175
+ component_name += 'S' unless /S$/.match(component_name)
176
+ @report << "#{component_name}: Found #{l_keys.size} items only in #{@left.basename} #{r_keys.size} items only in #{@right.basename}, compared #{@diff_stat[component_name][NR_COMPARED]} items"
177
+ keys = r_keys - l_keys
178
+ head = "#{component_name}: #{(keys).size} keys only in #{@right.basename}"
179
+ puts "#{head}: Keys were #{keys.size}"
180
+ show_keys(keys)
181
+ @report << head
182
+ keys = l_keys - r_keys
183
+ head = "#{component_name}: #{(keys).size} keys only in #{@left.basename}"
184
+ puts "#{head}: Keys were #{keys.size}"
185
+ show_keys(keys)
186
+ @report << head
187
+ end
188
+ end
189
+ end
@@ -1,7 +1,7 @@
1
1
  # encoding: utf-8
2
2
 
3
3
  require 'zlib'
4
- require 'archive/tar/minitar'
4
+ require 'minitar'
5
5
  require 'zip'
6
6
 
7
7
  module Oddb2xml
@@ -35,9 +35,12 @@ module Oddb2xml
35
35
  end
36
36
  end
37
37
  end
38
- if File.exists? @compress_file and not defined?(Rspec)
38
+ if File.exists? @compress_file
39
+ puts "#{__LINE__}: @compress_file"
39
40
  @contents.each do |file|
40
- FileUtils.rm(file)
41
+ @tmpfile = file
42
+ puts "#{__LINE__}: @tmpfile"
43
+ FileUtils.rm(file) if file && File.exists?(file)
41
44
  end
42
45
  end
43
46
  rescue Errno::ENOENT, StandardError => e
@@ -12,24 +12,21 @@ SkipMigelDownloader = true # https://github.com/zdavatz/oddb2xml_files/raw/mast
12
12
  module Oddb2xml
13
13
  module DownloadMethod
14
14
  private
15
- def download_as(file, option='r')
15
+ def download_as(file, option='w+')
16
16
  tempFile = File.join(WorkDir, File.basename(file))
17
- file2save = File.join(Downloads, File.basename(file))
18
- Oddb2xml.log "download_as file #{file2save} via #{tempFile} from #{@url}"
17
+ @file2save = File.join(Downloads, File.basename(file))
18
+ report_download(@url, @file2save)
19
19
  data = nil
20
- FileUtils.rm_f(tempFile, :verbose => false)
21
20
  if Oddb2xml.skip_download(file)
22
21
  io = File.open(file, option)
23
22
  data = io.read
24
23
  else
25
24
  begin
26
- response = @agent.get(@url)
27
- response.save_as(file)
28
- response = nil # win
29
25
  io = File.open(file, option)
30
- data = io.read
31
- rescue Timeout::Error, Errno::ETIMEDOUT
32
- retrievable? ? retry : raise
26
+ data = open(@url).read
27
+ io.write(data)
28
+ rescue => error
29
+ puts "error #{error} while fetching #{@url}"
33
30
  ensure
34
31
  io.close if io and !io.closed? # win
35
32
  Oddb2xml.download_finished(tempFile)
@@ -39,7 +36,7 @@ module Oddb2xml
39
36
  end
40
37
  end
41
38
  class Downloader
42
- attr_reader :type, :agent
39
+ attr_reader :type, :agent, :url; :file2save
43
40
  def initialize(options={}, url=nil)
44
41
  @options = options
45
42
  @url = url
@@ -48,6 +45,12 @@ module Oddb2xml
48
45
  Oddb2xml.log "Downloader from #{@url} for #{self.class}"
49
46
  init
50
47
  end
48
+ def report_download(url, file)
49
+ Oddb2xml.log sprintf("%-20s: download_as %-24s from %s",
50
+ self.class.to_s.split('::').last,
51
+ File.basename(file),
52
+ url)
53
+ end
51
54
  def init
52
55
  @agent = Mechanize.new
53
56
  @agent.user_agent = 'Mozilla/5.0 (X11; Linux x86_64; rv:16.0) Gecko/20100101 Firefox/16.0'
@@ -79,6 +82,7 @@ module Oddb2xml
79
82
  Dir.glob(File.join(Downloads, '*')).each { |name| if target.match(name) then entry = name; break end }
80
83
  if entry
81
84
  dest = "#{Downloads}/#{File.basename(entry)}"
85
+ @file2save = dest
82
86
  if File.exists?(dest)
83
87
  Oddb2xml.log "read_xml_from_zip return content of #{dest} #{File.size(dest)} bytes "
84
88
  return IO.read(dest)
@@ -133,8 +137,9 @@ module Oddb2xml
133
137
  include DownloadMethod
134
138
  def download
135
139
  @url ||= 'https://download.epha.ch/cleaned/matrix.csv'
136
- content = download_as('epha_interactions.csv', 'r')
137
- FileUtils.rm_f('epha_interactions.csv', :verbose => true)
140
+ file = 'epha_interactions.csv'
141
+ content = download_as(file, 'w+')
142
+ FileUtils.rm_f(file, :verbose => false)
138
143
  content
139
144
  end
140
145
  end
@@ -142,36 +147,27 @@ module Oddb2xml
142
147
  include DownloadMethod
143
148
  def download
144
149
  @url ||= 'https://raw.githubusercontent.com/zdavatz/oddb2xml_files/master/LPPV.txt'
145
- download_as('oddb2xml_files_lppv.txt', 'r')
150
+ download_as('oddb2xml_files_lppv.txt', 'w+')
146
151
  end
147
152
  end
148
153
  class ZurroseDownloader < Downloader
149
154
  include DownloadMethod
150
155
  def download
151
156
  @url ||= 'http://pillbox.oddb.org/TRANSFER.ZIP'
152
- unless @url =~ /^http/
153
- io = File.open(@url, 'r:iso-8859-1:utf-8')
154
- content = io.read
155
- Oddb2xml.log("ZurroseDownloader #{__LINE__} download #{@url} @url returns #{content.bytes}")
156
- content
157
- else
158
- file = File.join(Downloads, 'transfer.zip')
159
- unless Oddb2xml.skip_download(file)
160
- Oddb2xml.log "ZurroseDownloader #{__LINE__}: #{file}"
161
- begin
162
- response = @agent.get(@url)
163
- response.save_as(file)
164
- response = nil # win
165
- rescue Timeout::Error, Errno::ETIMEDOUT
166
- retrievable? ? retry : raise
167
- ensure
168
- Oddb2xml.download_finished(file)
169
- end
170
- end
171
- read_xml_from_zip(/transfer.dat/, file)
172
- dest = File.join(Downloads, 'transfer.dat')
173
- File.open(dest, 'r:iso-8859-1:utf-8').read
157
+ zipfile = File.join(WorkDir, 'transfer.zip')
158
+ download_as(zipfile)
159
+ dest = File.join(Downloads, 'transfer.dat')
160
+ cmd = "unzip -o '#{zipfile}' -d '#{Downloads}'"
161
+ system(cmd)
162
+ if @options[:artikelstamm]
163
+ cmd = "iconv -f ISO8859-1 -t utf-8 -o #{dest.sub('.dat','.utf8')} #{dest}"
164
+ Oddb2xml.log(cmd)
165
+ system(cmd)
174
166
  end
167
+ # read file and convert it to utf-8
168
+ File.open(dest, 'r:iso-8859-1:utf-8').read
169
+ ensure
170
+ FileUtils.rm(zipfile) if File.exist?(dest) && File.exist?(zipfile)
175
171
  end
176
172
  end
177
173
  class MedregbmDownloader < Downloader
@@ -190,30 +186,35 @@ module Oddb2xml
190
186
  super({}, url)
191
187
  end
192
188
  def download
193
- download_as("medregbm_#{@type.to_s}.txt", 'r:iso-8859-1:utf-8')
189
+ file = "medregbm_#{@type.to_s}.txt"
190
+ download_as(file, 'w+:iso-8859-1:utf-8')
191
+ report_download(@url, file)
192
+ FileUtils.rm_f(file, :verbose => false) # we need it only in the download
193
+ file
194
194
  end
195
195
  end
196
196
  class BagXmlDownloader < Downloader
197
+ include DownloadMethod
197
198
  def init
198
199
  super
199
200
  @url ||= 'http://bag.e-mediat.net/SL2007.Web.External/File.axd?file=XMLPublications.zip'
200
201
  end
201
202
  def download
202
203
  file = File.join(WorkDir, 'XMLPublications.zip')
203
- Oddb2xml.log "BagXmlDownloader #{__LINE__}: #{file}"
204
- unless Oddb2xml.skip_download(file)
205
- Oddb2xml.log "BagXmlDownloader #{__LINE__}: #{file}"
206
- begin
207
- response = @agent.get(@url)
208
- response.save_as(file)
209
- response = nil # win
210
- rescue Timeout::Error, Errno::ETIMEDOUT
211
- retrievable? ? retry : raise
212
- ensure
213
- Oddb2xml.download_finished(file)
214
- end
204
+ download_as(file)
205
+ report_download(@url, file)
206
+ if defined?(RSpec)
207
+ src = File.join(Oddb2xml::SpecData, 'Preparations.xml')
208
+ content = File.read(src)
209
+ FileUtils.cp(src, File.join(Downloads, File.basename(file)))
210
+ else
211
+ content = read_xml_from_zip(/Preparations.xml/, File.join(Downloads, File.basename(file)))
212
+ end
213
+ if @options[:artikelstamm]
214
+ cmd = "xmllint --format --output Preparations.xml Preparations.xml"
215
+ Oddb2xml.log(cmd)
216
+ system(cmd)
215
217
  end
216
- content = read_xml_from_zip(/Preparations.xml/, File.join(Downloads, File.basename(file)))
217
218
  FileUtils.rm_f(file, :verbose => false) unless defined?(RSpec)
218
219
  content
219
220
  end
@@ -236,7 +237,7 @@ module Oddb2xml
236
237
  def download
237
238
  begin
238
239
  filename = "refdata_#{@type}.xml"
239
- file2save = File.join(Downloads, "refdata_#{@type}.xml")
240
+ @file2save = File.join(Downloads, "refdata_#{@type}.xml")
240
241
  soap = %(<?xml version="1.0" encoding="UTF-8"?>
241
242
  <SOAP-ENV:Envelope xmlns:SOAP-ENV="http://schemas.xmlsoap.org/soap/envelope/" xmlns:ns1="http://refdatabase.refdata.ch/Article_in" xmlns:ns2="http://refdatabase.refdata.ch/">
242
243
  <SOAP-ENV:Body>
@@ -247,14 +248,21 @@ module Oddb2xml
247
248
  </SOAP-ENV:Envelope>
248
249
  </ns1:ATYPE></ns2:DownloadArticleInput></SOAP-ENV:Body>
249
250
  )
250
- return IO.read(file2save) if Oddb2xml.skip_download? and File.exists?(file2save)
251
- FileUtils.rm_f(file2save, :verbose => false)
251
+ report_download(@url, @file2save)
252
+ return IO.read(@file2save) if Oddb2xml.skip_download? and File.exists?(@file2save)
253
+ FileUtils.rm_f(@file2save, :verbose => false)
252
254
  response = @client.call(:download, :xml => soap)
253
255
  if response.success?
254
256
  if xml = response.to_xml
257
+ xml = File.read(File.join(Oddb2xml::SpecData, File.basename(@file2save))) if defined?(RSpec)
255
258
  response = nil # win
256
259
  FileUtils.makedirs(Downloads)
257
- File.open(file2save, 'w+') { |file| file.write xml }
260
+ File.open(@file2save, 'w+') { |file| file.write xml }
261
+ if @options[:artikelstamm]
262
+ cmd = "xmllint --format --output #{@file2save} #{@file2save}"
263
+ Oddb2xml.log(cmd)
264
+ system(cmd)
265
+ end
258
266
  else
259
267
  # received broken data or internal error
260
268
  raise StandardError
@@ -271,6 +279,7 @@ module Oddb2xml
271
279
  end
272
280
  end
273
281
  class SwissmedicDownloader < Downloader
282
+ include DownloadMethod
274
283
  def initialize(type=:orphan, options = {})
275
284
  @type = type
276
285
  @options = options
@@ -282,23 +291,28 @@ module Oddb2xml
282
291
  end
283
292
  end
284
293
  def download
285
- @type == file = File.join(Oddb2xml::WorkDir, "swissmedic_#{@type}.xlsx")
286
- if @options[:calc] and @options[:skip_download] and File.exists?(file) and (Time.now-File.ctime(file)).to_i < 24*60*60
287
- Oddb2xml.log "SwissmedicDownloader #{__LINE__}: Skip downloading #{file} #{File.size(file)} bytes"
288
- return File.expand_path(file)
294
+ @file2save = File.join(Oddb2xml::WorkDir, "swissmedic_#{@type}.xlsx")
295
+ report_download(@url, @file2save)
296
+ if @options[:calc] and @options[:skip_download] and File.exists?(@file2save) and (Time.now-File.ctime(@file2save)).to_i < 24*60*60
297
+ Oddb2xml.log "SwissmedicDownloader #{__LINE__}: Skip downloading #{@file2save} #{File.size(@file2save)} bytes"
298
+ return File.expand_path(@file2save)
289
299
  end
290
300
  begin
291
- FileUtils.rm(File.expand_path(file), :verbose => !defined?(RSpec)) if File.exists?(File.expand_path(file))
292
- File.open(file, 'w+') do |output|
293
- output.write open(@direct_url_link).read
301
+ FileUtils.rm(File.expand_path(@file2save), :verbose => !defined?(RSpec)) if File.exists?(File.expand_path(@file2save))
302
+ @url = @direct_url_link
303
+ download_as(@file2save, 'w+')
304
+ if @options[:artikelstamm]
305
+ cmd = "ssconvert '#{@file2save}' '#{File.join(Downloads, File.basename(@file2save).sub(/\.xls.*/, '.csv'))}' 2> /dev/null"
306
+ Oddb2xml.log(cmd)
307
+ system(cmd)
294
308
  end
295
- return File.expand_path(file)
309
+ return File.expand_path(@file2save)
296
310
  rescue Timeout::Error, Errno::ETIMEDOUT
297
311
  retrievable? ? retry : raise
298
312
  ensure
299
- Oddb2xml.download_finished(file, false)
313
+ Oddb2xml.download_finished(@file2save, false)
300
314
  end
301
- return File.expand_path(file)
315
+ return File.expand_path(@file2save)
302
316
  end
303
317
  end
304
318
  class SwissmedicInfoDownloader < Downloader
@@ -309,6 +323,7 @@ module Oddb2xml
309
323
  end
310
324
  def download
311
325
  file = File.join(Downloads, "swissmedic_info.zip")
326
+ report_download(@url, file)
312
327
  FileUtils.rm_f(file, :verbose => false) unless Oddb2xml.skip_download?
313
328
  begin
314
329
  response = nil