oddb2xml 2.5.0 → 2.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +1 -1
- data/Elexis_Artikelstamm_v003.xsd +387 -0
- data/Elexis_Artikelstamm_v5.xsd +513 -0
- data/Gemfile +2 -6
- data/History.txt +11 -0
- data/README.md +35 -27
- data/artikelstamm.md +68 -0
- data/bin/compare_v5 +41 -0
- data/bin/oddb2xml +3 -15
- data/data/article_overrides.yaml +51859 -0
- data/data/gtin2ignore.yaml +30510 -0
- data/data/product_overrides.yaml +4 -0
- data/lib/oddb2xml/builder.rb +543 -192
- data/lib/oddb2xml/cli.rb +82 -62
- data/lib/oddb2xml/compare.rb +189 -0
- data/lib/oddb2xml/compressor.rb +6 -3
- data/lib/oddb2xml/downloader.rb +79 -64
- data/lib/oddb2xml/extractor.rb +67 -40
- data/lib/oddb2xml/options.rb +76 -77
- data/lib/oddb2xml/parslet_compositions.rb +18 -1
- data/lib/oddb2xml/util.rb +25 -3
- data/lib/oddb2xml/version.rb +1 -1
- data/oddb2xml.gemspec +8 -5
- data/oddb2xml.xsd +1 -0
- data/spec/artikelstamm_spec.rb +383 -0
- data/spec/builder_spec.rb +147 -118
- data/spec/calc_spec.rb +3 -15
- data/spec/cli_spec.rb +24 -35
- data/spec/compare_spec.rb +24 -0
- data/spec/compressor_spec.rb +1 -3
- data/spec/data/Elexis_Artikelstamm_v5.xsd +513 -0
- data/spec/data/Preparations.xml +2200 -0
- data/spec/data/Publications.xls +0 -0
- data/spec/data/artikelstamm_N_010917.xml +39 -0
- data/spec/data/artikelstamm_N_011217.xml +17 -0
- data/spec/data/artikelstamm_P_010917.xml +86 -0
- data/spec/data/artikelstamm_P_011217.xml +63 -0
- data/spec/data/oddb2xml_files_lppv.txt +2 -0
- data/spec/data/refdata_NonPharma.xml +38 -0
- data/spec/data/refdata_Pharma.xml +220 -0
- data/spec/data/swissmedic_orphan.xlsx +0 -0
- data/spec/data/swissmedic_package.xlsx +0 -0
- data/spec/data/transfer.dat +59 -19
- data/spec/data/v5_first.xml +102 -0
- data/spec/data/v5_second.xml +184 -0
- data/spec/data_helper.rb +72 -0
- data/spec/downloader_spec.rb +19 -27
- data/spec/extractor_spec.rb +27 -33
- data/spec/fixtures/vcr_cassettes/artikelstamm.json +1 -0
- data/spec/options_spec.rb +73 -66
- data/spec/spec_helper.rb +73 -24
- data/test_options.rb +4 -2
- metadata +100 -21
- data/spec/data/XMLPublications.zip +0 -0
- data/spec/data/compressor/oddb_article.xml +0 -0
- data/spec/data/compressor/oddb_fi.xml +0 -0
- data/spec/data/compressor/oddb_fi_product.xml +0 -0
- data/spec/data/compressor/oddb_limitation.xml +0 -0
- data/spec/data/compressor/oddb_product.xml +0 -0
- data/spec/data/compressor/oddb_substance.xml +0 -0
data/lib/oddb2xml/cli.rb
CHANGED
@@ -11,7 +11,7 @@ require 'rubyXL'
|
|
11
11
|
require 'date' # for today
|
12
12
|
|
13
13
|
module Oddb2xml
|
14
|
-
|
14
|
+
|
15
15
|
class Cli
|
16
16
|
attr_reader :options
|
17
17
|
SUBJECTS = %w[product article]
|
@@ -19,6 +19,7 @@ module Oddb2xml
|
|
19
19
|
OPTIONALS = %w[fi fi_product]
|
20
20
|
def initialize(args)
|
21
21
|
@options = args
|
22
|
+
STDOUT.puts "\nStarting cli with from #{caller[1]} using #{@options}" if defined?(RSpec)
|
22
23
|
Oddb2xml.save_options(@options)
|
23
24
|
@mutex = Mutex.new
|
24
25
|
# product
|
@@ -38,6 +39,7 @@ module Oddb2xml
|
|
38
39
|
end
|
39
40
|
def run
|
40
41
|
threads = []
|
42
|
+
startTime = Time.now
|
41
43
|
files2rm = Dir.glob(File.join(Downloads, '*'))
|
42
44
|
FileUtils.rm_f(files2rm, :verbose => @options[:log]) if files2rm.size > 0 and not Oddb2xml.skip_download?
|
43
45
|
if @options[:calc] and not @options[:extended]
|
@@ -79,46 +81,56 @@ module Oddb2xml
|
|
79
81
|
exit
|
80
82
|
end
|
81
83
|
build
|
84
|
+
if @options[:artikelstamm]
|
85
|
+
elexis_v5_xsd = File.expand_path(File.join(__FILE__, '..', '..', '..', 'Elexis_Artikelstamm_v5.xsd'))
|
86
|
+
cmd = "xmllint --noout --schema #{elexis_v5_xsd} #{@_files[:artikelstamm]}"
|
87
|
+
if system(cmd)
|
88
|
+
puts "Validatied #{@_files[:artikelstamm]}"
|
89
|
+
else
|
90
|
+
puts "Validating failed using #{cmd}"
|
91
|
+
exit(2)
|
92
|
+
end
|
93
|
+
end
|
82
94
|
compress if @options[:compress_ext]
|
83
|
-
report
|
95
|
+
res = report
|
96
|
+
nrSecs = (Time.now - startTime).to_i
|
97
|
+
if defined?(RSpec) && (nrSecs).to_i > 10 then require 'pry'; binding.pry ; end
|
98
|
+
res
|
84
99
|
end
|
85
100
|
private
|
86
101
|
def build
|
87
|
-
Oddb2xml.log("Start build")
|
88
102
|
begin
|
89
|
-
@_files = {"calc"=>"oddb_calc.xml"} if @options[:calc] and not @options[:extended]
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
refdata = {}
|
102
|
-
types.each do |type|
|
103
|
-
refdata.merge!(@refdata_types[type]) if @refdata_types[type]
|
104
|
-
end
|
105
|
-
builder.refdata = refdata
|
106
|
-
builder.subject = sbj
|
107
|
-
end
|
108
|
-
# common sources
|
109
|
-
builder.items = @items
|
110
|
-
builder.flags = @flags
|
111
|
-
builder.lppvs = @lppvs
|
112
|
-
# optional sources
|
113
|
-
builder.infos = @infos
|
114
|
-
builder.packs = @packs
|
115
|
-
# additional sources
|
116
|
-
%w[actions orphan migel infos_zur_rose].each do |addition|
|
117
|
-
builder.send("#{addition}=".intern, self.instance_variable_get("@#{addition}"))
|
103
|
+
@_files = {"calc"=>"oddb_calc.xml"} if @options[:calc] and not (@options[:extended] || @options[:artikelstamm])
|
104
|
+
builder = Builder.new(@options) do |builder|
|
105
|
+
if @options[:calc] and not (@options[:extended] || @options[:artikelstamm])
|
106
|
+
builder.packs = @packs
|
107
|
+
elsif @options[:address]
|
108
|
+
builder.companies = @companies
|
109
|
+
builder.people = @people
|
110
|
+
else # product
|
111
|
+
if @options[:format] != :dat
|
112
|
+
refdata = {}
|
113
|
+
types.each do |type|
|
114
|
+
refdata.merge!(@refdata_types[type]) if @refdata_types[type]
|
118
115
|
end
|
116
|
+
builder.refdata = refdata
|
117
|
+
end
|
118
|
+
# common sources
|
119
|
+
builder.items = @items
|
120
|
+
builder.flags = @flags
|
121
|
+
builder.lppvs = @lppvs
|
122
|
+
# optional sources
|
123
|
+
builder.infos = @infos
|
124
|
+
builder.packs = @packs
|
125
|
+
# additional sources
|
126
|
+
%w[actions orphan migel infos_zur_rose].each do |addition|
|
127
|
+
builder.send("#{addition}=".intern, self.instance_variable_get("@#{addition}"))
|
119
128
|
end
|
120
|
-
builder.tag_suffix = @options[:tag_suffix]
|
121
129
|
end
|
130
|
+
builder.tag_suffix = @options[:tag_suffix]
|
131
|
+
end
|
132
|
+
files.each_pair do |sbj, file|
|
133
|
+
builder.subject = sbj
|
122
134
|
output = ''
|
123
135
|
if !@options[:address] and (@options[:format] == :dat)
|
124
136
|
types.each do |type|
|
@@ -187,14 +199,14 @@ module Oddb2xml
|
|
187
199
|
when :orphan
|
188
200
|
var = what.to_s
|
189
201
|
begin # instead of Thread.new do
|
190
|
-
downloader = SwissmedicDownloader.new(what)
|
202
|
+
downloader = SwissmedicDownloader.new(what, @options)
|
191
203
|
bin = downloader.download
|
192
204
|
Oddb2xml.log("SwissmedicDownloader #{var} #{bin} #{File.size(bin)} bytes")
|
193
205
|
self.instance_variable_set(
|
194
206
|
"@#{var}",
|
195
207
|
items = SwissmedicExtractor.new(bin, what).to_arry
|
196
208
|
)
|
197
|
-
Oddb2xml.log("SwissmedicExtractor added #{items.size}
|
209
|
+
Oddb2xml.log("SwissmedicExtractor added #{items.size}")
|
198
210
|
items
|
199
211
|
end
|
200
212
|
when :interaction
|
@@ -259,7 +271,7 @@ module Oddb2xml
|
|
259
271
|
xml = downloader.download
|
260
272
|
Oddb2xml.log("ZurroseDownloader xml #{xml.size} bytes")
|
261
273
|
@mutex.synchronize do
|
262
|
-
hsh = ZurroseExtractor.new(xml, @options[:extended]).to_hash
|
274
|
+
hsh = ZurroseExtractor.new(xml, @options[:extended], @options[:artikelstamm]).to_hash
|
263
275
|
Oddb2xml.log("ZurroseExtractor added #{hsh.size} items from xml with #{xml.size} bytes")
|
264
276
|
@infos_zur_rose = hsh
|
265
277
|
end
|
@@ -302,7 +314,9 @@ module Oddb2xml
|
|
302
314
|
unless @_files
|
303
315
|
@_files = {}
|
304
316
|
@_files[:calc] = "oddb_calc.xml" if @options[:calc]
|
305
|
-
if @options[:
|
317
|
+
if @options[:artikelstamm]
|
318
|
+
@_files[:artikelstamm] = "artikelstamm_#{Date.today.strftime('%d%m%Y')}_v5.xml"
|
319
|
+
elsif @options[:address]
|
306
320
|
@_files[:company] = "#{prefix}_betrieb.xml"
|
307
321
|
@_files[:person] = "#{prefix}_medizinalperson.xml"
|
308
322
|
elsif @options[:format] == :dat
|
@@ -336,41 +350,47 @@ module Oddb2xml
|
|
336
350
|
lines << Calc.report_conversion
|
337
351
|
lines << ParseComposition.report
|
338
352
|
end
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
353
|
+
if @options[:artikelstamm]
|
354
|
+
lines << "Generated artikelstamm.xml for Elexis"
|
355
|
+
lines += Builder.articlestamm_v5_info_lines
|
356
|
+
else
|
357
|
+
unless @options[:address]
|
358
|
+
types.each do |type|
|
359
|
+
if @refdata_types[type]
|
360
|
+
indices = @refdata_types[type].values.flatten.length
|
361
|
+
|
362
|
+
if type == :nonpharma
|
363
|
+
nonpharmas = @refdata_types[type].keys
|
364
|
+
if SkipMigelDownloader
|
365
|
+
indices + nonpharmas.length
|
366
|
+
else
|
367
|
+
migel_xls = @migel.values.compact.select{|m| !m[:pharmacode]}.map{|m| m[:pharmacode] }
|
368
|
+
indices += (migel_xls - nonpharmas).length # ignore duplicates, null
|
369
|
+
end
|
370
|
+
lines << sprintf("\tNonPharma products: %i", indices)
|
347
371
|
else
|
348
|
-
|
349
|
-
indices += (migel_xls - nonpharmas).length # ignore duplicates, null
|
372
|
+
lines << sprintf("\tPharma products: %i", indices)
|
350
373
|
end
|
351
|
-
lines << sprintf("\tNonPharma products: %i", indices)
|
352
|
-
else
|
353
|
-
lines << sprintf("\tPharma products: %i", indices)
|
354
374
|
end
|
355
375
|
end
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
376
|
+
if (@options[:extended] || @options[:artikelstamm])
|
377
|
+
lines << sprintf("\tInformation items zur Rose: %i", @infos_zur_rose.length)
|
378
|
+
end
|
379
|
+
else
|
380
|
+
{
|
381
|
+
'Betrieb' => :@companies,
|
382
|
+
'Person' => :@people
|
383
|
+
}.each do |type, var|
|
384
|
+
lines << sprintf(
|
385
|
+
"#{type} addresses: %i", self.instance_variable_get(var).length)
|
386
|
+
end
|
367
387
|
end
|
368
388
|
end
|
369
389
|
puts lines.join("\n")
|
370
390
|
end
|
371
391
|
def types # RefData
|
372
392
|
@_types ||=
|
373
|
-
if @options[:nonpharma]
|
393
|
+
if @options[:nonpharma] || @options[:artikelstamm]
|
374
394
|
[:pharma, :nonpharma]
|
375
395
|
else
|
376
396
|
[:pharma]
|
@@ -0,0 +1,189 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require 'xmlsimple'
|
3
|
+
|
4
|
+
module Oddb2xml
|
5
|
+
def self.log_timestamp(msg)
|
6
|
+
full_msg = "#{Time.now.strftime("%H:%M:%S")}: #{msg}"
|
7
|
+
puts full_msg
|
8
|
+
STDOUT.flush
|
9
|
+
full_msg
|
10
|
+
end
|
11
|
+
class StammXML
|
12
|
+
V3_NAME_REG = /_([N,P])_/
|
13
|
+
attr_accessor :components
|
14
|
+
attr_reader :keys, :sub_key_names, :filename, :basename, :version, :hash
|
15
|
+
def initialize(filename, components = ['ITEMS'])
|
16
|
+
raise "File #{filename} must exist" unless File.exist?(filename)
|
17
|
+
@filename = filename
|
18
|
+
@basename = File.basename(filename)
|
19
|
+
@version = V3_NAME_REG.match(filename) ? 3 : 5
|
20
|
+
@components = components
|
21
|
+
if @version == 5
|
22
|
+
@hash = load_file(@filename)
|
23
|
+
else
|
24
|
+
raise "Unsupported version #{@version}"
|
25
|
+
end
|
26
|
+
end
|
27
|
+
def self.get_component_key_name(component_name)
|
28
|
+
return 'LIMNAMEBAG' if /LIMITATION/i.match(component_name)
|
29
|
+
return 'PRODNO' if /PRODUCT/i.match(component_name)
|
30
|
+
return 'GTIN' if /ITEM/i.match(component_name)
|
31
|
+
raise "Cannot determine keyname for component #{component_name}"
|
32
|
+
end
|
33
|
+
def get_limitation_from_v5(item)
|
34
|
+
get_item('PRODUCTS', item['PRODNO'].first.to_i)['LIMNAMEBAG'] ? ['true'] : nil
|
35
|
+
end
|
36
|
+
def get_field_from_v5_product(item, field_name)
|
37
|
+
get_item('PRODUCTS', item['PRODNO'].first.to_i)[field_name]
|
38
|
+
end
|
39
|
+
def get_items(component_name)
|
40
|
+
if @version == 3
|
41
|
+
items = @hash[component_name]
|
42
|
+
else
|
43
|
+
items = @hash[component_name].first.values.first
|
44
|
+
end
|
45
|
+
items
|
46
|
+
end
|
47
|
+
def get_item(component_name, id)
|
48
|
+
keyname = StammXML.get_component_key_name(component_name)
|
49
|
+
get_items(component_name).find{|item| item[keyname].first.to_i == id}
|
50
|
+
end
|
51
|
+
def load_file(name)
|
52
|
+
Oddb2xml.log_timestamp "Reading #{name} #{(File.size(name)/1024/1024).to_i} MB. This may take some time"
|
53
|
+
XmlSimple.xml_in(IO.read(name))
|
54
|
+
end
|
55
|
+
end
|
56
|
+
class CompareV5
|
57
|
+
DEFAULTS = {
|
58
|
+
:components => ["PRODUCTS", "LIMITATIONS", "ITEMS",],
|
59
|
+
:fields_to_ignore => ['COMP', 'DOSAGE_FORMF', 'MEASUREF'],
|
60
|
+
:fields_as_floats => [ 'PEXT', 'PEXF', 'PPUB' ],
|
61
|
+
:min_diff_for_floats => 0.01,
|
62
|
+
}
|
63
|
+
def initialize(left, right, options = DEFAULTS.clone)
|
64
|
+
@options = options
|
65
|
+
@left = StammXML.new(left, @options[:components])
|
66
|
+
@right = StammXML.new(right, @options[:components])
|
67
|
+
@diff_stat = {}
|
68
|
+
@occurrences = {}
|
69
|
+
@report = []
|
70
|
+
end
|
71
|
+
def get_keys(items, key='GTIN')
|
72
|
+
items.collect{|item| item[key].first.to_i }
|
73
|
+
end
|
74
|
+
def get_names(items)
|
75
|
+
items.collect{|item| item.keys}.flatten.uniq.sort
|
76
|
+
end
|
77
|
+
def compare
|
78
|
+
show_header("Start comparing #{@left.filename} with #{@right.filename}")
|
79
|
+
(@left.components & @right.components).each do |name|
|
80
|
+
begin
|
81
|
+
puts "\n#{Time.now.strftime("%H:%M:%S")}: Comparing #{name} in #{@left.basename} with #{@right.basename}"
|
82
|
+
key = StammXML.get_component_key_name(name)
|
83
|
+
left_items = @left.get_items(name)
|
84
|
+
next unless left_items
|
85
|
+
right_items = @right.get_items(name)
|
86
|
+
next unless right_items
|
87
|
+
@diff_stat[name] = {}
|
88
|
+
@occurrences[name] = {}
|
89
|
+
@diff_stat[name][NR_COMPARED] = 0
|
90
|
+
l_names = get_names(left_items)
|
91
|
+
r_names = get_names(right_items)
|
92
|
+
compare_names = l_names & r_names
|
93
|
+
l_keys = get_keys(left_items, key)
|
94
|
+
r_keys = get_keys(right_items, key)
|
95
|
+
(l_keys & r_keys).each do |id|
|
96
|
+
compare_details(name, compare_names, id)
|
97
|
+
end
|
98
|
+
key_results_details(name, compare_names, l_keys, r_keys)
|
99
|
+
rescue => error
|
100
|
+
puts "Execution failed with #{error}"
|
101
|
+
end
|
102
|
+
end
|
103
|
+
show_header("Summary comparing #{@left.filename} with #{@right.filename}")
|
104
|
+
puts "Ignored differences in #{@options[:fields_to_ignore]}. Signaled when differences in #{@options[:fields_as_floats]} were bigger than #{@options[:min_diff_for_floats]}"
|
105
|
+
puts @report.join("\n")
|
106
|
+
@diff_stat.each do |component, stats|
|
107
|
+
puts "\nFor #{stats[NR_COMPARED]} #{component} we have the following number of differences per field"
|
108
|
+
stats.each do |name, nr|
|
109
|
+
next if name.eql?(NR_COMPARED)
|
110
|
+
next if @options[:fields_to_ignore].index(name)
|
111
|
+
puts " #{name.ljust(20)} #{nr} of #{@occurrences[component][name]}"
|
112
|
+
end
|
113
|
+
end
|
114
|
+
@diff_stat
|
115
|
+
rescue => error
|
116
|
+
puts "Execution failed with #{error}"
|
117
|
+
raise error
|
118
|
+
end
|
119
|
+
private
|
120
|
+
NR_COMPARED = 'NR_COMPARED'
|
121
|
+
COUNT = '_count'
|
122
|
+
def show_header(header)
|
123
|
+
text = Oddb2xml.log_timestamp(header)
|
124
|
+
pad = 5
|
125
|
+
puts
|
126
|
+
puts '-'*(text.length+2*pad)
|
127
|
+
puts ''.ljust(pad) + text
|
128
|
+
puts '-'*(text.length+2*pad)
|
129
|
+
puts
|
130
|
+
end
|
131
|
+
def compare_details(component_name, compare_names, id)
|
132
|
+
l_item = @left.get_item(component_name, id)
|
133
|
+
r_item = @right.get_item(component_name, id)
|
134
|
+
found_one = false
|
135
|
+
length = 32
|
136
|
+
found = false
|
137
|
+
detail_name = l_item['DSCR'] ? l_item['DSCR'].first[0..length-1].rjust(length) : ''.rjust(length)
|
138
|
+
details = "Diff in #{id.to_s.ljust(15)} #{detail_name}"
|
139
|
+
diff_name = component_name
|
140
|
+
diff_name += 'S' unless /S$/.match(diff_name)
|
141
|
+
@diff_stat[diff_name] ||= {}
|
142
|
+
@occurrences[diff_name] ||= {}
|
143
|
+
@diff_stat[diff_name][NR_COMPARED] ||= 0
|
144
|
+
@diff_stat[diff_name][NR_COMPARED] += 1
|
145
|
+
l_item.keys.each do |sub_key|
|
146
|
+
next if @options[:fields_to_ignore].index(sub_key)
|
147
|
+
@diff_stat[diff_name][sub_key] ||= 0
|
148
|
+
@occurrences[diff_name][sub_key] ||= 0
|
149
|
+
@occurrences[diff_name][sub_key] += 1
|
150
|
+
r_value = r_item[sub_key]
|
151
|
+
l_value = l_item[sub_key]
|
152
|
+
if @options[:fields_as_floats].index(sub_key)
|
153
|
+
l_float = l_value ? l_value.first.to_f : 0.0
|
154
|
+
r_float = r_value ? r_value.first.to_f : 0.0
|
155
|
+
next if (l_float - r_float).abs < @options[:min_diff_for_floats]
|
156
|
+
end
|
157
|
+
next if (r_value.is_a?(Array) && '--missing--'.eql?(r_value.first)) || (l_value.is_a?(Array) && '--missing--'.eql?(l_value.first))
|
158
|
+
# TODO: get_field_from_v5_product
|
159
|
+
next if r_value.to_s.eql?(l_value.to_s)
|
160
|
+
next if r_value.to_s.upcase.eql?(l_value.to_s.upcase) && @options[:case_insensitive]
|
161
|
+
details += " #{sub_key}: '#{l_value}' != '#{r_value}'"
|
162
|
+
found = found_one = true
|
163
|
+
@diff_stat[diff_name][sub_key] += 1
|
164
|
+
end
|
165
|
+
puts details.gsub(/[\[\]]/,'') if found
|
166
|
+
end
|
167
|
+
|
168
|
+
def show_keys(keys, batch_size = 20)
|
169
|
+
0.upto(keys.size) do |idx|
|
170
|
+
next unless idx % batch_size == 0
|
171
|
+
puts ' ' + keys[idx..(idx + batch_size-1)].join(' ')
|
172
|
+
end
|
173
|
+
end
|
174
|
+
def key_results_details(component_name, compare_names, l_keys, r_keys)
|
175
|
+
component_name += 'S' unless /S$/.match(component_name)
|
176
|
+
@report << "#{component_name}: Found #{l_keys.size} items only in #{@left.basename} #{r_keys.size} items only in #{@right.basename}, compared #{@diff_stat[component_name][NR_COMPARED]} items"
|
177
|
+
keys = r_keys - l_keys
|
178
|
+
head = "#{component_name}: #{(keys).size} keys only in #{@right.basename}"
|
179
|
+
puts "#{head}: Keys were #{keys.size}"
|
180
|
+
show_keys(keys)
|
181
|
+
@report << head
|
182
|
+
keys = l_keys - r_keys
|
183
|
+
head = "#{component_name}: #{(keys).size} keys only in #{@left.basename}"
|
184
|
+
puts "#{head}: Keys were #{keys.size}"
|
185
|
+
show_keys(keys)
|
186
|
+
@report << head
|
187
|
+
end
|
188
|
+
end
|
189
|
+
end
|
data/lib/oddb2xml/compressor.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
|
3
3
|
require 'zlib'
|
4
|
-
require '
|
4
|
+
require 'minitar'
|
5
5
|
require 'zip'
|
6
6
|
|
7
7
|
module Oddb2xml
|
@@ -35,9 +35,12 @@ module Oddb2xml
|
|
35
35
|
end
|
36
36
|
end
|
37
37
|
end
|
38
|
-
if File.exists? @compress_file
|
38
|
+
if File.exists? @compress_file
|
39
|
+
puts "#{__LINE__}: @compress_file"
|
39
40
|
@contents.each do |file|
|
40
|
-
|
41
|
+
@tmpfile = file
|
42
|
+
puts "#{__LINE__}: @tmpfile"
|
43
|
+
FileUtils.rm(file) if file && File.exists?(file)
|
41
44
|
end
|
42
45
|
end
|
43
46
|
rescue Errno::ENOENT, StandardError => e
|
data/lib/oddb2xml/downloader.rb
CHANGED
@@ -12,24 +12,21 @@ SkipMigelDownloader = true # https://github.com/zdavatz/oddb2xml_files/raw/mast
|
|
12
12
|
module Oddb2xml
|
13
13
|
module DownloadMethod
|
14
14
|
private
|
15
|
-
def download_as(file, option='
|
15
|
+
def download_as(file, option='w+')
|
16
16
|
tempFile = File.join(WorkDir, File.basename(file))
|
17
|
-
file2save = File.join(Downloads, File.basename(file))
|
18
|
-
|
17
|
+
@file2save = File.join(Downloads, File.basename(file))
|
18
|
+
report_download(@url, @file2save)
|
19
19
|
data = nil
|
20
|
-
FileUtils.rm_f(tempFile, :verbose => false)
|
21
20
|
if Oddb2xml.skip_download(file)
|
22
21
|
io = File.open(file, option)
|
23
22
|
data = io.read
|
24
23
|
else
|
25
24
|
begin
|
26
|
-
response = @agent.get(@url)
|
27
|
-
response.save_as(file)
|
28
|
-
response = nil # win
|
29
25
|
io = File.open(file, option)
|
30
|
-
data =
|
31
|
-
|
32
|
-
|
26
|
+
data = open(@url).read
|
27
|
+
io.write(data)
|
28
|
+
rescue => error
|
29
|
+
puts "error #{error} while fetching #{@url}"
|
33
30
|
ensure
|
34
31
|
io.close if io and !io.closed? # win
|
35
32
|
Oddb2xml.download_finished(tempFile)
|
@@ -39,7 +36,7 @@ module Oddb2xml
|
|
39
36
|
end
|
40
37
|
end
|
41
38
|
class Downloader
|
42
|
-
attr_reader :type, :agent
|
39
|
+
attr_reader :type, :agent, :url; :file2save
|
43
40
|
def initialize(options={}, url=nil)
|
44
41
|
@options = options
|
45
42
|
@url = url
|
@@ -48,6 +45,12 @@ module Oddb2xml
|
|
48
45
|
Oddb2xml.log "Downloader from #{@url} for #{self.class}"
|
49
46
|
init
|
50
47
|
end
|
48
|
+
def report_download(url, file)
|
49
|
+
Oddb2xml.log sprintf("%-20s: download_as %-24s from %s",
|
50
|
+
self.class.to_s.split('::').last,
|
51
|
+
File.basename(file),
|
52
|
+
url)
|
53
|
+
end
|
51
54
|
def init
|
52
55
|
@agent = Mechanize.new
|
53
56
|
@agent.user_agent = 'Mozilla/5.0 (X11; Linux x86_64; rv:16.0) Gecko/20100101 Firefox/16.0'
|
@@ -79,6 +82,7 @@ module Oddb2xml
|
|
79
82
|
Dir.glob(File.join(Downloads, '*')).each { |name| if target.match(name) then entry = name; break end }
|
80
83
|
if entry
|
81
84
|
dest = "#{Downloads}/#{File.basename(entry)}"
|
85
|
+
@file2save = dest
|
82
86
|
if File.exists?(dest)
|
83
87
|
Oddb2xml.log "read_xml_from_zip return content of #{dest} #{File.size(dest)} bytes "
|
84
88
|
return IO.read(dest)
|
@@ -133,8 +137,9 @@ module Oddb2xml
|
|
133
137
|
include DownloadMethod
|
134
138
|
def download
|
135
139
|
@url ||= 'https://download.epha.ch/cleaned/matrix.csv'
|
136
|
-
|
137
|
-
|
140
|
+
file = 'epha_interactions.csv'
|
141
|
+
content = download_as(file, 'w+')
|
142
|
+
FileUtils.rm_f(file, :verbose => false)
|
138
143
|
content
|
139
144
|
end
|
140
145
|
end
|
@@ -142,36 +147,27 @@ module Oddb2xml
|
|
142
147
|
include DownloadMethod
|
143
148
|
def download
|
144
149
|
@url ||= 'https://raw.githubusercontent.com/zdavatz/oddb2xml_files/master/LPPV.txt'
|
145
|
-
download_as('oddb2xml_files_lppv.txt', '
|
150
|
+
download_as('oddb2xml_files_lppv.txt', 'w+')
|
146
151
|
end
|
147
152
|
end
|
148
153
|
class ZurroseDownloader < Downloader
|
149
154
|
include DownloadMethod
|
150
155
|
def download
|
151
156
|
@url ||= 'http://pillbox.oddb.org/TRANSFER.ZIP'
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
begin
|
162
|
-
response = @agent.get(@url)
|
163
|
-
response.save_as(file)
|
164
|
-
response = nil # win
|
165
|
-
rescue Timeout::Error, Errno::ETIMEDOUT
|
166
|
-
retrievable? ? retry : raise
|
167
|
-
ensure
|
168
|
-
Oddb2xml.download_finished(file)
|
169
|
-
end
|
170
|
-
end
|
171
|
-
read_xml_from_zip(/transfer.dat/, file)
|
172
|
-
dest = File.join(Downloads, 'transfer.dat')
|
173
|
-
File.open(dest, 'r:iso-8859-1:utf-8').read
|
157
|
+
zipfile = File.join(WorkDir, 'transfer.zip')
|
158
|
+
download_as(zipfile)
|
159
|
+
dest = File.join(Downloads, 'transfer.dat')
|
160
|
+
cmd = "unzip -o '#{zipfile}' -d '#{Downloads}'"
|
161
|
+
system(cmd)
|
162
|
+
if @options[:artikelstamm]
|
163
|
+
cmd = "iconv -f ISO8859-1 -t utf-8 -o #{dest.sub('.dat','.utf8')} #{dest}"
|
164
|
+
Oddb2xml.log(cmd)
|
165
|
+
system(cmd)
|
174
166
|
end
|
167
|
+
# read file and convert it to utf-8
|
168
|
+
File.open(dest, 'r:iso-8859-1:utf-8').read
|
169
|
+
ensure
|
170
|
+
FileUtils.rm(zipfile) if File.exist?(dest) && File.exist?(zipfile)
|
175
171
|
end
|
176
172
|
end
|
177
173
|
class MedregbmDownloader < Downloader
|
@@ -190,30 +186,35 @@ module Oddb2xml
|
|
190
186
|
super({}, url)
|
191
187
|
end
|
192
188
|
def download
|
193
|
-
|
189
|
+
file = "medregbm_#{@type.to_s}.txt"
|
190
|
+
download_as(file, 'w+:iso-8859-1:utf-8')
|
191
|
+
report_download(@url, file)
|
192
|
+
FileUtils.rm_f(file, :verbose => false) # we need it only in the download
|
193
|
+
file
|
194
194
|
end
|
195
195
|
end
|
196
196
|
class BagXmlDownloader < Downloader
|
197
|
+
include DownloadMethod
|
197
198
|
def init
|
198
199
|
super
|
199
200
|
@url ||= 'http://bag.e-mediat.net/SL2007.Web.External/File.axd?file=XMLPublications.zip'
|
200
201
|
end
|
201
202
|
def download
|
202
203
|
file = File.join(WorkDir, 'XMLPublications.zip')
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
204
|
+
download_as(file)
|
205
|
+
report_download(@url, file)
|
206
|
+
if defined?(RSpec)
|
207
|
+
src = File.join(Oddb2xml::SpecData, 'Preparations.xml')
|
208
|
+
content = File.read(src)
|
209
|
+
FileUtils.cp(src, File.join(Downloads, File.basename(file)))
|
210
|
+
else
|
211
|
+
content = read_xml_from_zip(/Preparations.xml/, File.join(Downloads, File.basename(file)))
|
212
|
+
end
|
213
|
+
if @options[:artikelstamm]
|
214
|
+
cmd = "xmllint --format --output Preparations.xml Preparations.xml"
|
215
|
+
Oddb2xml.log(cmd)
|
216
|
+
system(cmd)
|
215
217
|
end
|
216
|
-
content = read_xml_from_zip(/Preparations.xml/, File.join(Downloads, File.basename(file)))
|
217
218
|
FileUtils.rm_f(file, :verbose => false) unless defined?(RSpec)
|
218
219
|
content
|
219
220
|
end
|
@@ -236,7 +237,7 @@ module Oddb2xml
|
|
236
237
|
def download
|
237
238
|
begin
|
238
239
|
filename = "refdata_#{@type}.xml"
|
239
|
-
file2save = File.join(Downloads, "refdata_#{@type}.xml")
|
240
|
+
@file2save = File.join(Downloads, "refdata_#{@type}.xml")
|
240
241
|
soap = %(<?xml version="1.0" encoding="UTF-8"?>
|
241
242
|
<SOAP-ENV:Envelope xmlns:SOAP-ENV="http://schemas.xmlsoap.org/soap/envelope/" xmlns:ns1="http://refdatabase.refdata.ch/Article_in" xmlns:ns2="http://refdatabase.refdata.ch/">
|
242
243
|
<SOAP-ENV:Body>
|
@@ -247,14 +248,21 @@ module Oddb2xml
|
|
247
248
|
</SOAP-ENV:Envelope>
|
248
249
|
</ns1:ATYPE></ns2:DownloadArticleInput></SOAP-ENV:Body>
|
249
250
|
)
|
250
|
-
|
251
|
-
|
251
|
+
report_download(@url, @file2save)
|
252
|
+
return IO.read(@file2save) if Oddb2xml.skip_download? and File.exists?(@file2save)
|
253
|
+
FileUtils.rm_f(@file2save, :verbose => false)
|
252
254
|
response = @client.call(:download, :xml => soap)
|
253
255
|
if response.success?
|
254
256
|
if xml = response.to_xml
|
257
|
+
xml = File.read(File.join(Oddb2xml::SpecData, File.basename(@file2save))) if defined?(RSpec)
|
255
258
|
response = nil # win
|
256
259
|
FileUtils.makedirs(Downloads)
|
257
|
-
File.open(file2save, 'w+') { |file| file.write xml }
|
260
|
+
File.open(@file2save, 'w+') { |file| file.write xml }
|
261
|
+
if @options[:artikelstamm]
|
262
|
+
cmd = "xmllint --format --output #{@file2save} #{@file2save}"
|
263
|
+
Oddb2xml.log(cmd)
|
264
|
+
system(cmd)
|
265
|
+
end
|
258
266
|
else
|
259
267
|
# received broken data or internal error
|
260
268
|
raise StandardError
|
@@ -271,6 +279,7 @@ module Oddb2xml
|
|
271
279
|
end
|
272
280
|
end
|
273
281
|
class SwissmedicDownloader < Downloader
|
282
|
+
include DownloadMethod
|
274
283
|
def initialize(type=:orphan, options = {})
|
275
284
|
@type = type
|
276
285
|
@options = options
|
@@ -282,23 +291,28 @@ module Oddb2xml
|
|
282
291
|
end
|
283
292
|
end
|
284
293
|
def download
|
285
|
-
@
|
286
|
-
|
287
|
-
|
288
|
-
|
294
|
+
@file2save = File.join(Oddb2xml::WorkDir, "swissmedic_#{@type}.xlsx")
|
295
|
+
report_download(@url, @file2save)
|
296
|
+
if @options[:calc] and @options[:skip_download] and File.exists?(@file2save) and (Time.now-File.ctime(@file2save)).to_i < 24*60*60
|
297
|
+
Oddb2xml.log "SwissmedicDownloader #{__LINE__}: Skip downloading #{@file2save} #{File.size(@file2save)} bytes"
|
298
|
+
return File.expand_path(@file2save)
|
289
299
|
end
|
290
300
|
begin
|
291
|
-
FileUtils.rm(File.expand_path(
|
292
|
-
|
293
|
-
|
301
|
+
FileUtils.rm(File.expand_path(@file2save), :verbose => !defined?(RSpec)) if File.exists?(File.expand_path(@file2save))
|
302
|
+
@url = @direct_url_link
|
303
|
+
download_as(@file2save, 'w+')
|
304
|
+
if @options[:artikelstamm]
|
305
|
+
cmd = "ssconvert '#{@file2save}' '#{File.join(Downloads, File.basename(@file2save).sub(/\.xls.*/, '.csv'))}' 2> /dev/null"
|
306
|
+
Oddb2xml.log(cmd)
|
307
|
+
system(cmd)
|
294
308
|
end
|
295
|
-
return File.expand_path(
|
309
|
+
return File.expand_path(@file2save)
|
296
310
|
rescue Timeout::Error, Errno::ETIMEDOUT
|
297
311
|
retrievable? ? retry : raise
|
298
312
|
ensure
|
299
|
-
Oddb2xml.download_finished(
|
313
|
+
Oddb2xml.download_finished(@file2save, false)
|
300
314
|
end
|
301
|
-
return File.expand_path(
|
315
|
+
return File.expand_path(@file2save)
|
302
316
|
end
|
303
317
|
end
|
304
318
|
class SwissmedicInfoDownloader < Downloader
|
@@ -309,6 +323,7 @@ module Oddb2xml
|
|
309
323
|
end
|
310
324
|
def download
|
311
325
|
file = File.join(Downloads, "swissmedic_info.zip")
|
326
|
+
report_download(@url, file)
|
312
327
|
FileUtils.rm_f(file, :verbose => false) unless Oddb2xml.skip_download?
|
313
328
|
begin
|
314
329
|
response = nil
|