oddb2xml 2.5.0 → 2.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. checksums.yaml +4 -4
  2. data/.travis.yml +1 -1
  3. data/Elexis_Artikelstamm_v003.xsd +387 -0
  4. data/Elexis_Artikelstamm_v5.xsd +513 -0
  5. data/Gemfile +2 -6
  6. data/History.txt +11 -0
  7. data/README.md +35 -27
  8. data/artikelstamm.md +68 -0
  9. data/bin/compare_v5 +41 -0
  10. data/bin/oddb2xml +3 -15
  11. data/data/article_overrides.yaml +51859 -0
  12. data/data/gtin2ignore.yaml +30510 -0
  13. data/data/product_overrides.yaml +4 -0
  14. data/lib/oddb2xml/builder.rb +543 -192
  15. data/lib/oddb2xml/cli.rb +82 -62
  16. data/lib/oddb2xml/compare.rb +189 -0
  17. data/lib/oddb2xml/compressor.rb +6 -3
  18. data/lib/oddb2xml/downloader.rb +79 -64
  19. data/lib/oddb2xml/extractor.rb +67 -40
  20. data/lib/oddb2xml/options.rb +76 -77
  21. data/lib/oddb2xml/parslet_compositions.rb +18 -1
  22. data/lib/oddb2xml/util.rb +25 -3
  23. data/lib/oddb2xml/version.rb +1 -1
  24. data/oddb2xml.gemspec +8 -5
  25. data/oddb2xml.xsd +1 -0
  26. data/spec/artikelstamm_spec.rb +383 -0
  27. data/spec/builder_spec.rb +147 -118
  28. data/spec/calc_spec.rb +3 -15
  29. data/spec/cli_spec.rb +24 -35
  30. data/spec/compare_spec.rb +24 -0
  31. data/spec/compressor_spec.rb +1 -3
  32. data/spec/data/Elexis_Artikelstamm_v5.xsd +513 -0
  33. data/spec/data/Preparations.xml +2200 -0
  34. data/spec/data/Publications.xls +0 -0
  35. data/spec/data/artikelstamm_N_010917.xml +39 -0
  36. data/spec/data/artikelstamm_N_011217.xml +17 -0
  37. data/spec/data/artikelstamm_P_010917.xml +86 -0
  38. data/spec/data/artikelstamm_P_011217.xml +63 -0
  39. data/spec/data/oddb2xml_files_lppv.txt +2 -0
  40. data/spec/data/refdata_NonPharma.xml +38 -0
  41. data/spec/data/refdata_Pharma.xml +220 -0
  42. data/spec/data/swissmedic_orphan.xlsx +0 -0
  43. data/spec/data/swissmedic_package.xlsx +0 -0
  44. data/spec/data/transfer.dat +59 -19
  45. data/spec/data/v5_first.xml +102 -0
  46. data/spec/data/v5_second.xml +184 -0
  47. data/spec/data_helper.rb +72 -0
  48. data/spec/downloader_spec.rb +19 -27
  49. data/spec/extractor_spec.rb +27 -33
  50. data/spec/fixtures/vcr_cassettes/artikelstamm.json +1 -0
  51. data/spec/options_spec.rb +73 -66
  52. data/spec/spec_helper.rb +73 -24
  53. data/test_options.rb +4 -2
  54. metadata +100 -21
  55. data/spec/data/XMLPublications.zip +0 -0
  56. data/spec/data/compressor/oddb_article.xml +0 -0
  57. data/spec/data/compressor/oddb_fi.xml +0 -0
  58. data/spec/data/compressor/oddb_fi_product.xml +0 -0
  59. data/spec/data/compressor/oddb_limitation.xml +0 -0
  60. data/spec/data/compressor/oddb_product.xml +0 -0
  61. data/spec/data/compressor/oddb_substance.xml +0 -0
@@ -11,7 +11,7 @@ require 'rubyXL'
11
11
  require 'date' # for today
12
12
 
13
13
  module Oddb2xml
14
-
14
+
15
15
  class Cli
16
16
  attr_reader :options
17
17
  SUBJECTS = %w[product article]
@@ -19,6 +19,7 @@ module Oddb2xml
19
19
  OPTIONALS = %w[fi fi_product]
20
20
  def initialize(args)
21
21
  @options = args
22
+ STDOUT.puts "\nStarting cli with from #{caller[1]} using #{@options}" if defined?(RSpec)
22
23
  Oddb2xml.save_options(@options)
23
24
  @mutex = Mutex.new
24
25
  # product
@@ -38,6 +39,7 @@ module Oddb2xml
38
39
  end
39
40
  def run
40
41
  threads = []
42
+ startTime = Time.now
41
43
  files2rm = Dir.glob(File.join(Downloads, '*'))
42
44
  FileUtils.rm_f(files2rm, :verbose => @options[:log]) if files2rm.size > 0 and not Oddb2xml.skip_download?
43
45
  if @options[:calc] and not @options[:extended]
@@ -79,46 +81,56 @@ module Oddb2xml
79
81
  exit
80
82
  end
81
83
  build
84
+ if @options[:artikelstamm]
85
+ elexis_v5_xsd = File.expand_path(File.join(__FILE__, '..', '..', '..', 'Elexis_Artikelstamm_v5.xsd'))
86
+ cmd = "xmllint --noout --schema #{elexis_v5_xsd} #{@_files[:artikelstamm]}"
87
+ if system(cmd)
88
+ puts "Validatied #{@_files[:artikelstamm]}"
89
+ else
90
+ puts "Validating failed using #{cmd}"
91
+ exit(2)
92
+ end
93
+ end
82
94
  compress if @options[:compress_ext]
83
- report
95
+ res = report
96
+ nrSecs = (Time.now - startTime).to_i
97
+ if defined?(RSpec) && (nrSecs).to_i > 10 then require 'pry'; binding.pry ; end
98
+ res
84
99
  end
85
100
  private
86
101
  def build
87
- Oddb2xml.log("Start build")
88
102
  begin
89
- @_files = {"calc"=>"oddb_calc.xml"} if @options[:calc] and not @options[:extended]
90
- files.each_pair do |sbj, file|
91
- builder = Builder.new(@options) do |builder|
92
- if @options[:calc] and not @options[:extended]
93
- builder.packs = @packs
94
- builder.subject = sbj
95
- elsif @options[:address]
96
- builder.subject = sbj
97
- builder.companies = @companies
98
- builder.people = @people
99
- else # product
100
- if @options[:format] != :dat
101
- refdata = {}
102
- types.each do |type|
103
- refdata.merge!(@refdata_types[type]) if @refdata_types[type]
104
- end
105
- builder.refdata = refdata
106
- builder.subject = sbj
107
- end
108
- # common sources
109
- builder.items = @items
110
- builder.flags = @flags
111
- builder.lppvs = @lppvs
112
- # optional sources
113
- builder.infos = @infos
114
- builder.packs = @packs
115
- # additional sources
116
- %w[actions orphan migel infos_zur_rose].each do |addition|
117
- builder.send("#{addition}=".intern, self.instance_variable_get("@#{addition}"))
103
+ @_files = {"calc"=>"oddb_calc.xml"} if @options[:calc] and not (@options[:extended] || @options[:artikelstamm])
104
+ builder = Builder.new(@options) do |builder|
105
+ if @options[:calc] and not (@options[:extended] || @options[:artikelstamm])
106
+ builder.packs = @packs
107
+ elsif @options[:address]
108
+ builder.companies = @companies
109
+ builder.people = @people
110
+ else # product
111
+ if @options[:format] != :dat
112
+ refdata = {}
113
+ types.each do |type|
114
+ refdata.merge!(@refdata_types[type]) if @refdata_types[type]
118
115
  end
116
+ builder.refdata = refdata
117
+ end
118
+ # common sources
119
+ builder.items = @items
120
+ builder.flags = @flags
121
+ builder.lppvs = @lppvs
122
+ # optional sources
123
+ builder.infos = @infos
124
+ builder.packs = @packs
125
+ # additional sources
126
+ %w[actions orphan migel infos_zur_rose].each do |addition|
127
+ builder.send("#{addition}=".intern, self.instance_variable_get("@#{addition}"))
119
128
  end
120
- builder.tag_suffix = @options[:tag_suffix]
121
129
  end
130
+ builder.tag_suffix = @options[:tag_suffix]
131
+ end
132
+ files.each_pair do |sbj, file|
133
+ builder.subject = sbj
122
134
  output = ''
123
135
  if !@options[:address] and (@options[:format] == :dat)
124
136
  types.each do |type|
@@ -187,14 +199,14 @@ module Oddb2xml
187
199
  when :orphan
188
200
  var = what.to_s
189
201
  begin # instead of Thread.new do
190
- downloader = SwissmedicDownloader.new(what)
202
+ downloader = SwissmedicDownloader.new(what, @options)
191
203
  bin = downloader.download
192
204
  Oddb2xml.log("SwissmedicDownloader #{var} #{bin} #{File.size(bin)} bytes")
193
205
  self.instance_variable_set(
194
206
  "@#{var}",
195
207
  items = SwissmedicExtractor.new(bin, what).to_arry
196
208
  )
197
- Oddb2xml.log("SwissmedicExtractor added #{items.size} #{var}. File #{bin} was #{File.size(bin)} bytes")
209
+ Oddb2xml.log("SwissmedicExtractor added #{items.size}")
198
210
  items
199
211
  end
200
212
  when :interaction
@@ -259,7 +271,7 @@ module Oddb2xml
259
271
  xml = downloader.download
260
272
  Oddb2xml.log("ZurroseDownloader xml #{xml.size} bytes")
261
273
  @mutex.synchronize do
262
- hsh = ZurroseExtractor.new(xml, @options[:extended]).to_hash
274
+ hsh = ZurroseExtractor.new(xml, @options[:extended], @options[:artikelstamm]).to_hash
263
275
  Oddb2xml.log("ZurroseExtractor added #{hsh.size} items from xml with #{xml.size} bytes")
264
276
  @infos_zur_rose = hsh
265
277
  end
@@ -302,7 +314,9 @@ module Oddb2xml
302
314
  unless @_files
303
315
  @_files = {}
304
316
  @_files[:calc] = "oddb_calc.xml" if @options[:calc]
305
- if @options[:address]
317
+ if @options[:artikelstamm]
318
+ @_files[:artikelstamm] = "artikelstamm_#{Date.today.strftime('%d%m%Y')}_v5.xml"
319
+ elsif @options[:address]
306
320
  @_files[:company] = "#{prefix}_betrieb.xml"
307
321
  @_files[:person] = "#{prefix}_medizinalperson.xml"
308
322
  elsif @options[:format] == :dat
@@ -336,41 +350,47 @@ module Oddb2xml
336
350
  lines << Calc.report_conversion
337
351
  lines << ParseComposition.report
338
352
  end
339
- unless @options[:address]
340
- types.each do |type|
341
- if @refdata_types[type]
342
- indices = @refdata_types[type].values.flatten.length
343
- if type == :nonpharma
344
- nonpharmas = @refdata_types[type].keys
345
- if SkipMigelDownloader
346
- indices + nonpharmas.length
353
+ if @options[:artikelstamm]
354
+ lines << "Generated artikelstamm.xml for Elexis"
355
+ lines += Builder.articlestamm_v5_info_lines
356
+ else
357
+ unless @options[:address]
358
+ types.each do |type|
359
+ if @refdata_types[type]
360
+ indices = @refdata_types[type].values.flatten.length
361
+
362
+ if type == :nonpharma
363
+ nonpharmas = @refdata_types[type].keys
364
+ if SkipMigelDownloader
365
+ indices + nonpharmas.length
366
+ else
367
+ migel_xls = @migel.values.compact.select{|m| !m[:pharmacode]}.map{|m| m[:pharmacode] }
368
+ indices += (migel_xls - nonpharmas).length # ignore duplicates, null
369
+ end
370
+ lines << sprintf("\tNonPharma products: %i", indices)
347
371
  else
348
- migel_xls = @migel.values.compact.select{|m| !m[:pharmacode]}.map{|m| m[:pharmacode] }
349
- indices += (migel_xls - nonpharmas).length # ignore duplicates, null
372
+ lines << sprintf("\tPharma products: %i", indices)
350
373
  end
351
- lines << sprintf("\tNonPharma products: %i", indices)
352
- else
353
- lines << sprintf("\tPharma products: %i", indices)
354
374
  end
355
375
  end
356
- end
357
- if @options[:extended]
358
- lines << sprintf("\tInformation items zur Rose: %i", @infos_zur_rose.length)
359
- end
360
- else
361
- {
362
- 'Betrieb' => :@companies,
363
- 'Person' => :@people
364
- }.each do |type, var|
365
- lines << sprintf(
366
- "#{type} addresses: %i", self.instance_variable_get(var).length)
376
+ if (@options[:extended] || @options[:artikelstamm])
377
+ lines << sprintf("\tInformation items zur Rose: %i", @infos_zur_rose.length)
378
+ end
379
+ else
380
+ {
381
+ 'Betrieb' => :@companies,
382
+ 'Person' => :@people
383
+ }.each do |type, var|
384
+ lines << sprintf(
385
+ "#{type} addresses: %i", self.instance_variable_get(var).length)
386
+ end
367
387
  end
368
388
  end
369
389
  puts lines.join("\n")
370
390
  end
371
391
  def types # RefData
372
392
  @_types ||=
373
- if @options[:nonpharma]
393
+ if @options[:nonpharma] || @options[:artikelstamm]
374
394
  [:pharma, :nonpharma]
375
395
  else
376
396
  [:pharma]
@@ -0,0 +1,189 @@
1
+ # encoding: utf-8
2
+ require 'xmlsimple'
3
+
4
+ module Oddb2xml
5
+ def self.log_timestamp(msg)
6
+ full_msg = "#{Time.now.strftime("%H:%M:%S")}: #{msg}"
7
+ puts full_msg
8
+ STDOUT.flush
9
+ full_msg
10
+ end
11
+ class StammXML
12
+ V3_NAME_REG = /_([N,P])_/
13
+ attr_accessor :components
14
+ attr_reader :keys, :sub_key_names, :filename, :basename, :version, :hash
15
+ def initialize(filename, components = ['ITEMS'])
16
+ raise "File #{filename} must exist" unless File.exist?(filename)
17
+ @filename = filename
18
+ @basename = File.basename(filename)
19
+ @version = V3_NAME_REG.match(filename) ? 3 : 5
20
+ @components = components
21
+ if @version == 5
22
+ @hash = load_file(@filename)
23
+ else
24
+ raise "Unsupported version #{@version}"
25
+ end
26
+ end
27
+ def self.get_component_key_name(component_name)
28
+ return 'LIMNAMEBAG' if /LIMITATION/i.match(component_name)
29
+ return 'PRODNO' if /PRODUCT/i.match(component_name)
30
+ return 'GTIN' if /ITEM/i.match(component_name)
31
+ raise "Cannot determine keyname for component #{component_name}"
32
+ end
33
+ def get_limitation_from_v5(item)
34
+ get_item('PRODUCTS', item['PRODNO'].first.to_i)['LIMNAMEBAG'] ? ['true'] : nil
35
+ end
36
+ def get_field_from_v5_product(item, field_name)
37
+ get_item('PRODUCTS', item['PRODNO'].first.to_i)[field_name]
38
+ end
39
+ def get_items(component_name)
40
+ if @version == 3
41
+ items = @hash[component_name]
42
+ else
43
+ items = @hash[component_name].first.values.first
44
+ end
45
+ items
46
+ end
47
+ def get_item(component_name, id)
48
+ keyname = StammXML.get_component_key_name(component_name)
49
+ get_items(component_name).find{|item| item[keyname].first.to_i == id}
50
+ end
51
+ def load_file(name)
52
+ Oddb2xml.log_timestamp "Reading #{name} #{(File.size(name)/1024/1024).to_i} MB. This may take some time"
53
+ XmlSimple.xml_in(IO.read(name))
54
+ end
55
+ end
56
+ class CompareV5
57
+ DEFAULTS = {
58
+ :components => ["PRODUCTS", "LIMITATIONS", "ITEMS",],
59
+ :fields_to_ignore => ['COMP', 'DOSAGE_FORMF', 'MEASUREF'],
60
+ :fields_as_floats => [ 'PEXT', 'PEXF', 'PPUB' ],
61
+ :min_diff_for_floats => 0.01,
62
+ }
63
+ def initialize(left, right, options = DEFAULTS.clone)
64
+ @options = options
65
+ @left = StammXML.new(left, @options[:components])
66
+ @right = StammXML.new(right, @options[:components])
67
+ @diff_stat = {}
68
+ @occurrences = {}
69
+ @report = []
70
+ end
71
+ def get_keys(items, key='GTIN')
72
+ items.collect{|item| item[key].first.to_i }
73
+ end
74
+ def get_names(items)
75
+ items.collect{|item| item.keys}.flatten.uniq.sort
76
+ end
77
+ def compare
78
+ show_header("Start comparing #{@left.filename} with #{@right.filename}")
79
+ (@left.components & @right.components).each do |name|
80
+ begin
81
+ puts "\n#{Time.now.strftime("%H:%M:%S")}: Comparing #{name} in #{@left.basename} with #{@right.basename}"
82
+ key = StammXML.get_component_key_name(name)
83
+ left_items = @left.get_items(name)
84
+ next unless left_items
85
+ right_items = @right.get_items(name)
86
+ next unless right_items
87
+ @diff_stat[name] = {}
88
+ @occurrences[name] = {}
89
+ @diff_stat[name][NR_COMPARED] = 0
90
+ l_names = get_names(left_items)
91
+ r_names = get_names(right_items)
92
+ compare_names = l_names & r_names
93
+ l_keys = get_keys(left_items, key)
94
+ r_keys = get_keys(right_items, key)
95
+ (l_keys & r_keys).each do |id|
96
+ compare_details(name, compare_names, id)
97
+ end
98
+ key_results_details(name, compare_names, l_keys, r_keys)
99
+ rescue => error
100
+ puts "Execution failed with #{error}"
101
+ end
102
+ end
103
+ show_header("Summary comparing #{@left.filename} with #{@right.filename}")
104
+ puts "Ignored differences in #{@options[:fields_to_ignore]}. Signaled when differences in #{@options[:fields_as_floats]} were bigger than #{@options[:min_diff_for_floats]}"
105
+ puts @report.join("\n")
106
+ @diff_stat.each do |component, stats|
107
+ puts "\nFor #{stats[NR_COMPARED]} #{component} we have the following number of differences per field"
108
+ stats.each do |name, nr|
109
+ next if name.eql?(NR_COMPARED)
110
+ next if @options[:fields_to_ignore].index(name)
111
+ puts " #{name.ljust(20)} #{nr} of #{@occurrences[component][name]}"
112
+ end
113
+ end
114
+ @diff_stat
115
+ rescue => error
116
+ puts "Execution failed with #{error}"
117
+ raise error
118
+ end
119
+ private
120
+ NR_COMPARED = 'NR_COMPARED'
121
+ COUNT = '_count'
122
+ def show_header(header)
123
+ text = Oddb2xml.log_timestamp(header)
124
+ pad = 5
125
+ puts
126
+ puts '-'*(text.length+2*pad)
127
+ puts ''.ljust(pad) + text
128
+ puts '-'*(text.length+2*pad)
129
+ puts
130
+ end
131
+ def compare_details(component_name, compare_names, id)
132
+ l_item = @left.get_item(component_name, id)
133
+ r_item = @right.get_item(component_name, id)
134
+ found_one = false
135
+ length = 32
136
+ found = false
137
+ detail_name = l_item['DSCR'] ? l_item['DSCR'].first[0..length-1].rjust(length) : ''.rjust(length)
138
+ details = "Diff in #{id.to_s.ljust(15)} #{detail_name}"
139
+ diff_name = component_name
140
+ diff_name += 'S' unless /S$/.match(diff_name)
141
+ @diff_stat[diff_name] ||= {}
142
+ @occurrences[diff_name] ||= {}
143
+ @diff_stat[diff_name][NR_COMPARED] ||= 0
144
+ @diff_stat[diff_name][NR_COMPARED] += 1
145
+ l_item.keys.each do |sub_key|
146
+ next if @options[:fields_to_ignore].index(sub_key)
147
+ @diff_stat[diff_name][sub_key] ||= 0
148
+ @occurrences[diff_name][sub_key] ||= 0
149
+ @occurrences[diff_name][sub_key] += 1
150
+ r_value = r_item[sub_key]
151
+ l_value = l_item[sub_key]
152
+ if @options[:fields_as_floats].index(sub_key)
153
+ l_float = l_value ? l_value.first.to_f : 0.0
154
+ r_float = r_value ? r_value.first.to_f : 0.0
155
+ next if (l_float - r_float).abs < @options[:min_diff_for_floats]
156
+ end
157
+ next if (r_value.is_a?(Array) && '--missing--'.eql?(r_value.first)) || (l_value.is_a?(Array) && '--missing--'.eql?(l_value.first))
158
+ # TODO: get_field_from_v5_product
159
+ next if r_value.to_s.eql?(l_value.to_s)
160
+ next if r_value.to_s.upcase.eql?(l_value.to_s.upcase) && @options[:case_insensitive]
161
+ details += " #{sub_key}: '#{l_value}' != '#{r_value}'"
162
+ found = found_one = true
163
+ @diff_stat[diff_name][sub_key] += 1
164
+ end
165
+ puts details.gsub(/[\[\]]/,'') if found
166
+ end
167
+
168
+ def show_keys(keys, batch_size = 20)
169
+ 0.upto(keys.size) do |idx|
170
+ next unless idx % batch_size == 0
171
+ puts ' ' + keys[idx..(idx + batch_size-1)].join(' ')
172
+ end
173
+ end
174
+ def key_results_details(component_name, compare_names, l_keys, r_keys)
175
+ component_name += 'S' unless /S$/.match(component_name)
176
+ @report << "#{component_name}: Found #{l_keys.size} items only in #{@left.basename} #{r_keys.size} items only in #{@right.basename}, compared #{@diff_stat[component_name][NR_COMPARED]} items"
177
+ keys = r_keys - l_keys
178
+ head = "#{component_name}: #{(keys).size} keys only in #{@right.basename}"
179
+ puts "#{head}: Keys were #{keys.size}"
180
+ show_keys(keys)
181
+ @report << head
182
+ keys = l_keys - r_keys
183
+ head = "#{component_name}: #{(keys).size} keys only in #{@left.basename}"
184
+ puts "#{head}: Keys were #{keys.size}"
185
+ show_keys(keys)
186
+ @report << head
187
+ end
188
+ end
189
+ end
@@ -1,7 +1,7 @@
1
1
  # encoding: utf-8
2
2
 
3
3
  require 'zlib'
4
- require 'archive/tar/minitar'
4
+ require 'minitar'
5
5
  require 'zip'
6
6
 
7
7
  module Oddb2xml
@@ -35,9 +35,12 @@ module Oddb2xml
35
35
  end
36
36
  end
37
37
  end
38
- if File.exists? @compress_file and not defined?(Rspec)
38
+ if File.exists? @compress_file
39
+ puts "#{__LINE__}: @compress_file"
39
40
  @contents.each do |file|
40
- FileUtils.rm(file)
41
+ @tmpfile = file
42
+ puts "#{__LINE__}: @tmpfile"
43
+ FileUtils.rm(file) if file && File.exists?(file)
41
44
  end
42
45
  end
43
46
  rescue Errno::ENOENT, StandardError => e
@@ -12,24 +12,21 @@ SkipMigelDownloader = true # https://github.com/zdavatz/oddb2xml_files/raw/mast
12
12
  module Oddb2xml
13
13
  module DownloadMethod
14
14
  private
15
- def download_as(file, option='r')
15
+ def download_as(file, option='w+')
16
16
  tempFile = File.join(WorkDir, File.basename(file))
17
- file2save = File.join(Downloads, File.basename(file))
18
- Oddb2xml.log "download_as file #{file2save} via #{tempFile} from #{@url}"
17
+ @file2save = File.join(Downloads, File.basename(file))
18
+ report_download(@url, @file2save)
19
19
  data = nil
20
- FileUtils.rm_f(tempFile, :verbose => false)
21
20
  if Oddb2xml.skip_download(file)
22
21
  io = File.open(file, option)
23
22
  data = io.read
24
23
  else
25
24
  begin
26
- response = @agent.get(@url)
27
- response.save_as(file)
28
- response = nil # win
29
25
  io = File.open(file, option)
30
- data = io.read
31
- rescue Timeout::Error, Errno::ETIMEDOUT
32
- retrievable? ? retry : raise
26
+ data = open(@url).read
27
+ io.write(data)
28
+ rescue => error
29
+ puts "error #{error} while fetching #{@url}"
33
30
  ensure
34
31
  io.close if io and !io.closed? # win
35
32
  Oddb2xml.download_finished(tempFile)
@@ -39,7 +36,7 @@ module Oddb2xml
39
36
  end
40
37
  end
41
38
  class Downloader
42
- attr_reader :type, :agent
39
+ attr_reader :type, :agent, :url; :file2save
43
40
  def initialize(options={}, url=nil)
44
41
  @options = options
45
42
  @url = url
@@ -48,6 +45,12 @@ module Oddb2xml
48
45
  Oddb2xml.log "Downloader from #{@url} for #{self.class}"
49
46
  init
50
47
  end
48
+ def report_download(url, file)
49
+ Oddb2xml.log sprintf("%-20s: download_as %-24s from %s",
50
+ self.class.to_s.split('::').last,
51
+ File.basename(file),
52
+ url)
53
+ end
51
54
  def init
52
55
  @agent = Mechanize.new
53
56
  @agent.user_agent = 'Mozilla/5.0 (X11; Linux x86_64; rv:16.0) Gecko/20100101 Firefox/16.0'
@@ -79,6 +82,7 @@ module Oddb2xml
79
82
  Dir.glob(File.join(Downloads, '*')).each { |name| if target.match(name) then entry = name; break end }
80
83
  if entry
81
84
  dest = "#{Downloads}/#{File.basename(entry)}"
85
+ @file2save = dest
82
86
  if File.exists?(dest)
83
87
  Oddb2xml.log "read_xml_from_zip return content of #{dest} #{File.size(dest)} bytes "
84
88
  return IO.read(dest)
@@ -133,8 +137,9 @@ module Oddb2xml
133
137
  include DownloadMethod
134
138
  def download
135
139
  @url ||= 'https://download.epha.ch/cleaned/matrix.csv'
136
- content = download_as('epha_interactions.csv', 'r')
137
- FileUtils.rm_f('epha_interactions.csv', :verbose => true)
140
+ file = 'epha_interactions.csv'
141
+ content = download_as(file, 'w+')
142
+ FileUtils.rm_f(file, :verbose => false)
138
143
  content
139
144
  end
140
145
  end
@@ -142,36 +147,27 @@ module Oddb2xml
142
147
  include DownloadMethod
143
148
  def download
144
149
  @url ||= 'https://raw.githubusercontent.com/zdavatz/oddb2xml_files/master/LPPV.txt'
145
- download_as('oddb2xml_files_lppv.txt', 'r')
150
+ download_as('oddb2xml_files_lppv.txt', 'w+')
146
151
  end
147
152
  end
148
153
  class ZurroseDownloader < Downloader
149
154
  include DownloadMethod
150
155
  def download
151
156
  @url ||= 'http://pillbox.oddb.org/TRANSFER.ZIP'
152
- unless @url =~ /^http/
153
- io = File.open(@url, 'r:iso-8859-1:utf-8')
154
- content = io.read
155
- Oddb2xml.log("ZurroseDownloader #{__LINE__} download #{@url} @url returns #{content.bytes}")
156
- content
157
- else
158
- file = File.join(Downloads, 'transfer.zip')
159
- unless Oddb2xml.skip_download(file)
160
- Oddb2xml.log "ZurroseDownloader #{__LINE__}: #{file}"
161
- begin
162
- response = @agent.get(@url)
163
- response.save_as(file)
164
- response = nil # win
165
- rescue Timeout::Error, Errno::ETIMEDOUT
166
- retrievable? ? retry : raise
167
- ensure
168
- Oddb2xml.download_finished(file)
169
- end
170
- end
171
- read_xml_from_zip(/transfer.dat/, file)
172
- dest = File.join(Downloads, 'transfer.dat')
173
- File.open(dest, 'r:iso-8859-1:utf-8').read
157
+ zipfile = File.join(WorkDir, 'transfer.zip')
158
+ download_as(zipfile)
159
+ dest = File.join(Downloads, 'transfer.dat')
160
+ cmd = "unzip -o '#{zipfile}' -d '#{Downloads}'"
161
+ system(cmd)
162
+ if @options[:artikelstamm]
163
+ cmd = "iconv -f ISO8859-1 -t utf-8 -o #{dest.sub('.dat','.utf8')} #{dest}"
164
+ Oddb2xml.log(cmd)
165
+ system(cmd)
174
166
  end
167
+ # read file and convert it to utf-8
168
+ File.open(dest, 'r:iso-8859-1:utf-8').read
169
+ ensure
170
+ FileUtils.rm(zipfile) if File.exist?(dest) && File.exist?(zipfile)
175
171
  end
176
172
  end
177
173
  class MedregbmDownloader < Downloader
@@ -190,30 +186,35 @@ module Oddb2xml
190
186
  super({}, url)
191
187
  end
192
188
  def download
193
- download_as("medregbm_#{@type.to_s}.txt", 'r:iso-8859-1:utf-8')
189
+ file = "medregbm_#{@type.to_s}.txt"
190
+ download_as(file, 'w+:iso-8859-1:utf-8')
191
+ report_download(@url, file)
192
+ FileUtils.rm_f(file, :verbose => false) # we need it only in the download
193
+ file
194
194
  end
195
195
  end
196
196
  class BagXmlDownloader < Downloader
197
+ include DownloadMethod
197
198
  def init
198
199
  super
199
200
  @url ||= 'http://bag.e-mediat.net/SL2007.Web.External/File.axd?file=XMLPublications.zip'
200
201
  end
201
202
  def download
202
203
  file = File.join(WorkDir, 'XMLPublications.zip')
203
- Oddb2xml.log "BagXmlDownloader #{__LINE__}: #{file}"
204
- unless Oddb2xml.skip_download(file)
205
- Oddb2xml.log "BagXmlDownloader #{__LINE__}: #{file}"
206
- begin
207
- response = @agent.get(@url)
208
- response.save_as(file)
209
- response = nil # win
210
- rescue Timeout::Error, Errno::ETIMEDOUT
211
- retrievable? ? retry : raise
212
- ensure
213
- Oddb2xml.download_finished(file)
214
- end
204
+ download_as(file)
205
+ report_download(@url, file)
206
+ if defined?(RSpec)
207
+ src = File.join(Oddb2xml::SpecData, 'Preparations.xml')
208
+ content = File.read(src)
209
+ FileUtils.cp(src, File.join(Downloads, File.basename(file)))
210
+ else
211
+ content = read_xml_from_zip(/Preparations.xml/, File.join(Downloads, File.basename(file)))
212
+ end
213
+ if @options[:artikelstamm]
214
+ cmd = "xmllint --format --output Preparations.xml Preparations.xml"
215
+ Oddb2xml.log(cmd)
216
+ system(cmd)
215
217
  end
216
- content = read_xml_from_zip(/Preparations.xml/, File.join(Downloads, File.basename(file)))
217
218
  FileUtils.rm_f(file, :verbose => false) unless defined?(RSpec)
218
219
  content
219
220
  end
@@ -236,7 +237,7 @@ module Oddb2xml
236
237
  def download
237
238
  begin
238
239
  filename = "refdata_#{@type}.xml"
239
- file2save = File.join(Downloads, "refdata_#{@type}.xml")
240
+ @file2save = File.join(Downloads, "refdata_#{@type}.xml")
240
241
  soap = %(<?xml version="1.0" encoding="UTF-8"?>
241
242
  <SOAP-ENV:Envelope xmlns:SOAP-ENV="http://schemas.xmlsoap.org/soap/envelope/" xmlns:ns1="http://refdatabase.refdata.ch/Article_in" xmlns:ns2="http://refdatabase.refdata.ch/">
242
243
  <SOAP-ENV:Body>
@@ -247,14 +248,21 @@ module Oddb2xml
247
248
  </SOAP-ENV:Envelope>
248
249
  </ns1:ATYPE></ns2:DownloadArticleInput></SOAP-ENV:Body>
249
250
  )
250
- return IO.read(file2save) if Oddb2xml.skip_download? and File.exists?(file2save)
251
- FileUtils.rm_f(file2save, :verbose => false)
251
+ report_download(@url, @file2save)
252
+ return IO.read(@file2save) if Oddb2xml.skip_download? and File.exists?(@file2save)
253
+ FileUtils.rm_f(@file2save, :verbose => false)
252
254
  response = @client.call(:download, :xml => soap)
253
255
  if response.success?
254
256
  if xml = response.to_xml
257
+ xml = File.read(File.join(Oddb2xml::SpecData, File.basename(@file2save))) if defined?(RSpec)
255
258
  response = nil # win
256
259
  FileUtils.makedirs(Downloads)
257
- File.open(file2save, 'w+') { |file| file.write xml }
260
+ File.open(@file2save, 'w+') { |file| file.write xml }
261
+ if @options[:artikelstamm]
262
+ cmd = "xmllint --format --output #{@file2save} #{@file2save}"
263
+ Oddb2xml.log(cmd)
264
+ system(cmd)
265
+ end
258
266
  else
259
267
  # received broken data or internal error
260
268
  raise StandardError
@@ -271,6 +279,7 @@ module Oddb2xml
271
279
  end
272
280
  end
273
281
  class SwissmedicDownloader < Downloader
282
+ include DownloadMethod
274
283
  def initialize(type=:orphan, options = {})
275
284
  @type = type
276
285
  @options = options
@@ -282,23 +291,28 @@ module Oddb2xml
282
291
  end
283
292
  end
284
293
  def download
285
- @type == file = File.join(Oddb2xml::WorkDir, "swissmedic_#{@type}.xlsx")
286
- if @options[:calc] and @options[:skip_download] and File.exists?(file) and (Time.now-File.ctime(file)).to_i < 24*60*60
287
- Oddb2xml.log "SwissmedicDownloader #{__LINE__}: Skip downloading #{file} #{File.size(file)} bytes"
288
- return File.expand_path(file)
294
+ @file2save = File.join(Oddb2xml::WorkDir, "swissmedic_#{@type}.xlsx")
295
+ report_download(@url, @file2save)
296
+ if @options[:calc] and @options[:skip_download] and File.exists?(@file2save) and (Time.now-File.ctime(@file2save)).to_i < 24*60*60
297
+ Oddb2xml.log "SwissmedicDownloader #{__LINE__}: Skip downloading #{@file2save} #{File.size(@file2save)} bytes"
298
+ return File.expand_path(@file2save)
289
299
  end
290
300
  begin
291
- FileUtils.rm(File.expand_path(file), :verbose => !defined?(RSpec)) if File.exists?(File.expand_path(file))
292
- File.open(file, 'w+') do |output|
293
- output.write open(@direct_url_link).read
301
+ FileUtils.rm(File.expand_path(@file2save), :verbose => !defined?(RSpec)) if File.exists?(File.expand_path(@file2save))
302
+ @url = @direct_url_link
303
+ download_as(@file2save, 'w+')
304
+ if @options[:artikelstamm]
305
+ cmd = "ssconvert '#{@file2save}' '#{File.join(Downloads, File.basename(@file2save).sub(/\.xls.*/, '.csv'))}' 2> /dev/null"
306
+ Oddb2xml.log(cmd)
307
+ system(cmd)
294
308
  end
295
- return File.expand_path(file)
309
+ return File.expand_path(@file2save)
296
310
  rescue Timeout::Error, Errno::ETIMEDOUT
297
311
  retrievable? ? retry : raise
298
312
  ensure
299
- Oddb2xml.download_finished(file, false)
313
+ Oddb2xml.download_finished(@file2save, false)
300
314
  end
301
- return File.expand_path(file)
315
+ return File.expand_path(@file2save)
302
316
  end
303
317
  end
304
318
  class SwissmedicInfoDownloader < Downloader
@@ -309,6 +323,7 @@ module Oddb2xml
309
323
  end
310
324
  def download
311
325
  file = File.join(Downloads, "swissmedic_info.zip")
326
+ report_download(@url, file)
312
327
  FileUtils.rm_f(file, :verbose => false) unless Oddb2xml.skip_download?
313
328
  begin
314
329
  response = nil