oddb2xml 2.5.0 → 2.5.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +1 -1
- data/Elexis_Artikelstamm_v003.xsd +387 -0
- data/Elexis_Artikelstamm_v5.xsd +513 -0
- data/Gemfile +2 -6
- data/History.txt +11 -0
- data/README.md +35 -27
- data/artikelstamm.md +68 -0
- data/bin/compare_v5 +41 -0
- data/bin/oddb2xml +3 -15
- data/data/article_overrides.yaml +51859 -0
- data/data/gtin2ignore.yaml +30510 -0
- data/data/product_overrides.yaml +4 -0
- data/lib/oddb2xml/builder.rb +543 -192
- data/lib/oddb2xml/cli.rb +82 -62
- data/lib/oddb2xml/compare.rb +189 -0
- data/lib/oddb2xml/compressor.rb +6 -3
- data/lib/oddb2xml/downloader.rb +79 -64
- data/lib/oddb2xml/extractor.rb +67 -40
- data/lib/oddb2xml/options.rb +76 -77
- data/lib/oddb2xml/parslet_compositions.rb +18 -1
- data/lib/oddb2xml/util.rb +25 -3
- data/lib/oddb2xml/version.rb +1 -1
- data/oddb2xml.gemspec +8 -5
- data/oddb2xml.xsd +1 -0
- data/spec/artikelstamm_spec.rb +383 -0
- data/spec/builder_spec.rb +147 -118
- data/spec/calc_spec.rb +3 -15
- data/spec/cli_spec.rb +24 -35
- data/spec/compare_spec.rb +24 -0
- data/spec/compressor_spec.rb +1 -3
- data/spec/data/Elexis_Artikelstamm_v5.xsd +513 -0
- data/spec/data/Preparations.xml +2200 -0
- data/spec/data/Publications.xls +0 -0
- data/spec/data/artikelstamm_N_010917.xml +39 -0
- data/spec/data/artikelstamm_N_011217.xml +17 -0
- data/spec/data/artikelstamm_P_010917.xml +86 -0
- data/spec/data/artikelstamm_P_011217.xml +63 -0
- data/spec/data/oddb2xml_files_lppv.txt +2 -0
- data/spec/data/refdata_NonPharma.xml +38 -0
- data/spec/data/refdata_Pharma.xml +220 -0
- data/spec/data/swissmedic_orphan.xlsx +0 -0
- data/spec/data/swissmedic_package.xlsx +0 -0
- data/spec/data/transfer.dat +59 -19
- data/spec/data/v5_first.xml +102 -0
- data/spec/data/v5_second.xml +184 -0
- data/spec/data_helper.rb +72 -0
- data/spec/downloader_spec.rb +19 -27
- data/spec/extractor_spec.rb +27 -33
- data/spec/fixtures/vcr_cassettes/artikelstamm.json +1 -0
- data/spec/options_spec.rb +73 -66
- data/spec/spec_helper.rb +73 -24
- data/test_options.rb +4 -2
- metadata +100 -21
- data/spec/data/XMLPublications.zip +0 -0
- data/spec/data/compressor/oddb_article.xml +0 -0
- data/spec/data/compressor/oddb_fi.xml +0 -0
- data/spec/data/compressor/oddb_fi_product.xml +0 -0
- data/spec/data/compressor/oddb_limitation.xml +0 -0
- data/spec/data/compressor/oddb_product.xml +0 -0
- data/spec/data/compressor/oddb_substance.xml +0 -0
data/lib/oddb2xml/cli.rb
CHANGED
@@ -11,7 +11,7 @@ require 'rubyXL'
|
|
11
11
|
require 'date' # for today
|
12
12
|
|
13
13
|
module Oddb2xml
|
14
|
-
|
14
|
+
|
15
15
|
class Cli
|
16
16
|
attr_reader :options
|
17
17
|
SUBJECTS = %w[product article]
|
@@ -19,6 +19,7 @@ module Oddb2xml
|
|
19
19
|
OPTIONALS = %w[fi fi_product]
|
20
20
|
def initialize(args)
|
21
21
|
@options = args
|
22
|
+
STDOUT.puts "\nStarting cli with from #{caller[1]} using #{@options}" if defined?(RSpec)
|
22
23
|
Oddb2xml.save_options(@options)
|
23
24
|
@mutex = Mutex.new
|
24
25
|
# product
|
@@ -38,6 +39,7 @@ module Oddb2xml
|
|
38
39
|
end
|
39
40
|
def run
|
40
41
|
threads = []
|
42
|
+
startTime = Time.now
|
41
43
|
files2rm = Dir.glob(File.join(Downloads, '*'))
|
42
44
|
FileUtils.rm_f(files2rm, :verbose => @options[:log]) if files2rm.size > 0 and not Oddb2xml.skip_download?
|
43
45
|
if @options[:calc] and not @options[:extended]
|
@@ -79,46 +81,56 @@ module Oddb2xml
|
|
79
81
|
exit
|
80
82
|
end
|
81
83
|
build
|
84
|
+
if @options[:artikelstamm]
|
85
|
+
elexis_v5_xsd = File.expand_path(File.join(__FILE__, '..', '..', '..', 'Elexis_Artikelstamm_v5.xsd'))
|
86
|
+
cmd = "xmllint --noout --schema #{elexis_v5_xsd} #{@_files[:artikelstamm]}"
|
87
|
+
if system(cmd)
|
88
|
+
puts "Validatied #{@_files[:artikelstamm]}"
|
89
|
+
else
|
90
|
+
puts "Validating failed using #{cmd}"
|
91
|
+
exit(2)
|
92
|
+
end
|
93
|
+
end
|
82
94
|
compress if @options[:compress_ext]
|
83
|
-
report
|
95
|
+
res = report
|
96
|
+
nrSecs = (Time.now - startTime).to_i
|
97
|
+
if defined?(RSpec) && (nrSecs).to_i > 10 then require 'pry'; binding.pry ; end
|
98
|
+
res
|
84
99
|
end
|
85
100
|
private
|
86
101
|
def build
|
87
|
-
Oddb2xml.log("Start build")
|
88
102
|
begin
|
89
|
-
@_files = {"calc"=>"oddb_calc.xml"} if @options[:calc] and not @options[:extended]
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
refdata = {}
|
102
|
-
types.each do |type|
|
103
|
-
refdata.merge!(@refdata_types[type]) if @refdata_types[type]
|
104
|
-
end
|
105
|
-
builder.refdata = refdata
|
106
|
-
builder.subject = sbj
|
107
|
-
end
|
108
|
-
# common sources
|
109
|
-
builder.items = @items
|
110
|
-
builder.flags = @flags
|
111
|
-
builder.lppvs = @lppvs
|
112
|
-
# optional sources
|
113
|
-
builder.infos = @infos
|
114
|
-
builder.packs = @packs
|
115
|
-
# additional sources
|
116
|
-
%w[actions orphan migel infos_zur_rose].each do |addition|
|
117
|
-
builder.send("#{addition}=".intern, self.instance_variable_get("@#{addition}"))
|
103
|
+
@_files = {"calc"=>"oddb_calc.xml"} if @options[:calc] and not (@options[:extended] || @options[:artikelstamm])
|
104
|
+
builder = Builder.new(@options) do |builder|
|
105
|
+
if @options[:calc] and not (@options[:extended] || @options[:artikelstamm])
|
106
|
+
builder.packs = @packs
|
107
|
+
elsif @options[:address]
|
108
|
+
builder.companies = @companies
|
109
|
+
builder.people = @people
|
110
|
+
else # product
|
111
|
+
if @options[:format] != :dat
|
112
|
+
refdata = {}
|
113
|
+
types.each do |type|
|
114
|
+
refdata.merge!(@refdata_types[type]) if @refdata_types[type]
|
118
115
|
end
|
116
|
+
builder.refdata = refdata
|
117
|
+
end
|
118
|
+
# common sources
|
119
|
+
builder.items = @items
|
120
|
+
builder.flags = @flags
|
121
|
+
builder.lppvs = @lppvs
|
122
|
+
# optional sources
|
123
|
+
builder.infos = @infos
|
124
|
+
builder.packs = @packs
|
125
|
+
# additional sources
|
126
|
+
%w[actions orphan migel infos_zur_rose].each do |addition|
|
127
|
+
builder.send("#{addition}=".intern, self.instance_variable_get("@#{addition}"))
|
119
128
|
end
|
120
|
-
builder.tag_suffix = @options[:tag_suffix]
|
121
129
|
end
|
130
|
+
builder.tag_suffix = @options[:tag_suffix]
|
131
|
+
end
|
132
|
+
files.each_pair do |sbj, file|
|
133
|
+
builder.subject = sbj
|
122
134
|
output = ''
|
123
135
|
if !@options[:address] and (@options[:format] == :dat)
|
124
136
|
types.each do |type|
|
@@ -187,14 +199,14 @@ module Oddb2xml
|
|
187
199
|
when :orphan
|
188
200
|
var = what.to_s
|
189
201
|
begin # instead of Thread.new do
|
190
|
-
downloader = SwissmedicDownloader.new(what)
|
202
|
+
downloader = SwissmedicDownloader.new(what, @options)
|
191
203
|
bin = downloader.download
|
192
204
|
Oddb2xml.log("SwissmedicDownloader #{var} #{bin} #{File.size(bin)} bytes")
|
193
205
|
self.instance_variable_set(
|
194
206
|
"@#{var}",
|
195
207
|
items = SwissmedicExtractor.new(bin, what).to_arry
|
196
208
|
)
|
197
|
-
Oddb2xml.log("SwissmedicExtractor added #{items.size}
|
209
|
+
Oddb2xml.log("SwissmedicExtractor added #{items.size}")
|
198
210
|
items
|
199
211
|
end
|
200
212
|
when :interaction
|
@@ -259,7 +271,7 @@ module Oddb2xml
|
|
259
271
|
xml = downloader.download
|
260
272
|
Oddb2xml.log("ZurroseDownloader xml #{xml.size} bytes")
|
261
273
|
@mutex.synchronize do
|
262
|
-
hsh = ZurroseExtractor.new(xml, @options[:extended]).to_hash
|
274
|
+
hsh = ZurroseExtractor.new(xml, @options[:extended], @options[:artikelstamm]).to_hash
|
263
275
|
Oddb2xml.log("ZurroseExtractor added #{hsh.size} items from xml with #{xml.size} bytes")
|
264
276
|
@infos_zur_rose = hsh
|
265
277
|
end
|
@@ -302,7 +314,9 @@ module Oddb2xml
|
|
302
314
|
unless @_files
|
303
315
|
@_files = {}
|
304
316
|
@_files[:calc] = "oddb_calc.xml" if @options[:calc]
|
305
|
-
if @options[:
|
317
|
+
if @options[:artikelstamm]
|
318
|
+
@_files[:artikelstamm] = "artikelstamm_#{Date.today.strftime('%d%m%Y')}_v5.xml"
|
319
|
+
elsif @options[:address]
|
306
320
|
@_files[:company] = "#{prefix}_betrieb.xml"
|
307
321
|
@_files[:person] = "#{prefix}_medizinalperson.xml"
|
308
322
|
elsif @options[:format] == :dat
|
@@ -336,41 +350,47 @@ module Oddb2xml
|
|
336
350
|
lines << Calc.report_conversion
|
337
351
|
lines << ParseComposition.report
|
338
352
|
end
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
353
|
+
if @options[:artikelstamm]
|
354
|
+
lines << "Generated artikelstamm.xml for Elexis"
|
355
|
+
lines += Builder.articlestamm_v5_info_lines
|
356
|
+
else
|
357
|
+
unless @options[:address]
|
358
|
+
types.each do |type|
|
359
|
+
if @refdata_types[type]
|
360
|
+
indices = @refdata_types[type].values.flatten.length
|
361
|
+
|
362
|
+
if type == :nonpharma
|
363
|
+
nonpharmas = @refdata_types[type].keys
|
364
|
+
if SkipMigelDownloader
|
365
|
+
indices + nonpharmas.length
|
366
|
+
else
|
367
|
+
migel_xls = @migel.values.compact.select{|m| !m[:pharmacode]}.map{|m| m[:pharmacode] }
|
368
|
+
indices += (migel_xls - nonpharmas).length # ignore duplicates, null
|
369
|
+
end
|
370
|
+
lines << sprintf("\tNonPharma products: %i", indices)
|
347
371
|
else
|
348
|
-
|
349
|
-
indices += (migel_xls - nonpharmas).length # ignore duplicates, null
|
372
|
+
lines << sprintf("\tPharma products: %i", indices)
|
350
373
|
end
|
351
|
-
lines << sprintf("\tNonPharma products: %i", indices)
|
352
|
-
else
|
353
|
-
lines << sprintf("\tPharma products: %i", indices)
|
354
374
|
end
|
355
375
|
end
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
376
|
+
if (@options[:extended] || @options[:artikelstamm])
|
377
|
+
lines << sprintf("\tInformation items zur Rose: %i", @infos_zur_rose.length)
|
378
|
+
end
|
379
|
+
else
|
380
|
+
{
|
381
|
+
'Betrieb' => :@companies,
|
382
|
+
'Person' => :@people
|
383
|
+
}.each do |type, var|
|
384
|
+
lines << sprintf(
|
385
|
+
"#{type} addresses: %i", self.instance_variable_get(var).length)
|
386
|
+
end
|
367
387
|
end
|
368
388
|
end
|
369
389
|
puts lines.join("\n")
|
370
390
|
end
|
371
391
|
def types # RefData
|
372
392
|
@_types ||=
|
373
|
-
if @options[:nonpharma]
|
393
|
+
if @options[:nonpharma] || @options[:artikelstamm]
|
374
394
|
[:pharma, :nonpharma]
|
375
395
|
else
|
376
396
|
[:pharma]
|
@@ -0,0 +1,189 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require 'xmlsimple'
|
3
|
+
|
4
|
+
module Oddb2xml
|
5
|
+
def self.log_timestamp(msg)
|
6
|
+
full_msg = "#{Time.now.strftime("%H:%M:%S")}: #{msg}"
|
7
|
+
puts full_msg
|
8
|
+
STDOUT.flush
|
9
|
+
full_msg
|
10
|
+
end
|
11
|
+
class StammXML
|
12
|
+
V3_NAME_REG = /_([N,P])_/
|
13
|
+
attr_accessor :components
|
14
|
+
attr_reader :keys, :sub_key_names, :filename, :basename, :version, :hash
|
15
|
+
def initialize(filename, components = ['ITEMS'])
|
16
|
+
raise "File #{filename} must exist" unless File.exist?(filename)
|
17
|
+
@filename = filename
|
18
|
+
@basename = File.basename(filename)
|
19
|
+
@version = V3_NAME_REG.match(filename) ? 3 : 5
|
20
|
+
@components = components
|
21
|
+
if @version == 5
|
22
|
+
@hash = load_file(@filename)
|
23
|
+
else
|
24
|
+
raise "Unsupported version #{@version}"
|
25
|
+
end
|
26
|
+
end
|
27
|
+
def self.get_component_key_name(component_name)
|
28
|
+
return 'LIMNAMEBAG' if /LIMITATION/i.match(component_name)
|
29
|
+
return 'PRODNO' if /PRODUCT/i.match(component_name)
|
30
|
+
return 'GTIN' if /ITEM/i.match(component_name)
|
31
|
+
raise "Cannot determine keyname for component #{component_name}"
|
32
|
+
end
|
33
|
+
def get_limitation_from_v5(item)
|
34
|
+
get_item('PRODUCTS', item['PRODNO'].first.to_i)['LIMNAMEBAG'] ? ['true'] : nil
|
35
|
+
end
|
36
|
+
def get_field_from_v5_product(item, field_name)
|
37
|
+
get_item('PRODUCTS', item['PRODNO'].first.to_i)[field_name]
|
38
|
+
end
|
39
|
+
def get_items(component_name)
|
40
|
+
if @version == 3
|
41
|
+
items = @hash[component_name]
|
42
|
+
else
|
43
|
+
items = @hash[component_name].first.values.first
|
44
|
+
end
|
45
|
+
items
|
46
|
+
end
|
47
|
+
def get_item(component_name, id)
|
48
|
+
keyname = StammXML.get_component_key_name(component_name)
|
49
|
+
get_items(component_name).find{|item| item[keyname].first.to_i == id}
|
50
|
+
end
|
51
|
+
def load_file(name)
|
52
|
+
Oddb2xml.log_timestamp "Reading #{name} #{(File.size(name)/1024/1024).to_i} MB. This may take some time"
|
53
|
+
XmlSimple.xml_in(IO.read(name))
|
54
|
+
end
|
55
|
+
end
|
56
|
+
class CompareV5
|
57
|
+
DEFAULTS = {
|
58
|
+
:components => ["PRODUCTS", "LIMITATIONS", "ITEMS",],
|
59
|
+
:fields_to_ignore => ['COMP', 'DOSAGE_FORMF', 'MEASUREF'],
|
60
|
+
:fields_as_floats => [ 'PEXT', 'PEXF', 'PPUB' ],
|
61
|
+
:min_diff_for_floats => 0.01,
|
62
|
+
}
|
63
|
+
def initialize(left, right, options = DEFAULTS.clone)
|
64
|
+
@options = options
|
65
|
+
@left = StammXML.new(left, @options[:components])
|
66
|
+
@right = StammXML.new(right, @options[:components])
|
67
|
+
@diff_stat = {}
|
68
|
+
@occurrences = {}
|
69
|
+
@report = []
|
70
|
+
end
|
71
|
+
def get_keys(items, key='GTIN')
|
72
|
+
items.collect{|item| item[key].first.to_i }
|
73
|
+
end
|
74
|
+
def get_names(items)
|
75
|
+
items.collect{|item| item.keys}.flatten.uniq.sort
|
76
|
+
end
|
77
|
+
def compare
|
78
|
+
show_header("Start comparing #{@left.filename} with #{@right.filename}")
|
79
|
+
(@left.components & @right.components).each do |name|
|
80
|
+
begin
|
81
|
+
puts "\n#{Time.now.strftime("%H:%M:%S")}: Comparing #{name} in #{@left.basename} with #{@right.basename}"
|
82
|
+
key = StammXML.get_component_key_name(name)
|
83
|
+
left_items = @left.get_items(name)
|
84
|
+
next unless left_items
|
85
|
+
right_items = @right.get_items(name)
|
86
|
+
next unless right_items
|
87
|
+
@diff_stat[name] = {}
|
88
|
+
@occurrences[name] = {}
|
89
|
+
@diff_stat[name][NR_COMPARED] = 0
|
90
|
+
l_names = get_names(left_items)
|
91
|
+
r_names = get_names(right_items)
|
92
|
+
compare_names = l_names & r_names
|
93
|
+
l_keys = get_keys(left_items, key)
|
94
|
+
r_keys = get_keys(right_items, key)
|
95
|
+
(l_keys & r_keys).each do |id|
|
96
|
+
compare_details(name, compare_names, id)
|
97
|
+
end
|
98
|
+
key_results_details(name, compare_names, l_keys, r_keys)
|
99
|
+
rescue => error
|
100
|
+
puts "Execution failed with #{error}"
|
101
|
+
end
|
102
|
+
end
|
103
|
+
show_header("Summary comparing #{@left.filename} with #{@right.filename}")
|
104
|
+
puts "Ignored differences in #{@options[:fields_to_ignore]}. Signaled when differences in #{@options[:fields_as_floats]} were bigger than #{@options[:min_diff_for_floats]}"
|
105
|
+
puts @report.join("\n")
|
106
|
+
@diff_stat.each do |component, stats|
|
107
|
+
puts "\nFor #{stats[NR_COMPARED]} #{component} we have the following number of differences per field"
|
108
|
+
stats.each do |name, nr|
|
109
|
+
next if name.eql?(NR_COMPARED)
|
110
|
+
next if @options[:fields_to_ignore].index(name)
|
111
|
+
puts " #{name.ljust(20)} #{nr} of #{@occurrences[component][name]}"
|
112
|
+
end
|
113
|
+
end
|
114
|
+
@diff_stat
|
115
|
+
rescue => error
|
116
|
+
puts "Execution failed with #{error}"
|
117
|
+
raise error
|
118
|
+
end
|
119
|
+
private
|
120
|
+
NR_COMPARED = 'NR_COMPARED'
|
121
|
+
COUNT = '_count'
|
122
|
+
def show_header(header)
|
123
|
+
text = Oddb2xml.log_timestamp(header)
|
124
|
+
pad = 5
|
125
|
+
puts
|
126
|
+
puts '-'*(text.length+2*pad)
|
127
|
+
puts ''.ljust(pad) + text
|
128
|
+
puts '-'*(text.length+2*pad)
|
129
|
+
puts
|
130
|
+
end
|
131
|
+
def compare_details(component_name, compare_names, id)
|
132
|
+
l_item = @left.get_item(component_name, id)
|
133
|
+
r_item = @right.get_item(component_name, id)
|
134
|
+
found_one = false
|
135
|
+
length = 32
|
136
|
+
found = false
|
137
|
+
detail_name = l_item['DSCR'] ? l_item['DSCR'].first[0..length-1].rjust(length) : ''.rjust(length)
|
138
|
+
details = "Diff in #{id.to_s.ljust(15)} #{detail_name}"
|
139
|
+
diff_name = component_name
|
140
|
+
diff_name += 'S' unless /S$/.match(diff_name)
|
141
|
+
@diff_stat[diff_name] ||= {}
|
142
|
+
@occurrences[diff_name] ||= {}
|
143
|
+
@diff_stat[diff_name][NR_COMPARED] ||= 0
|
144
|
+
@diff_stat[diff_name][NR_COMPARED] += 1
|
145
|
+
l_item.keys.each do |sub_key|
|
146
|
+
next if @options[:fields_to_ignore].index(sub_key)
|
147
|
+
@diff_stat[diff_name][sub_key] ||= 0
|
148
|
+
@occurrences[diff_name][sub_key] ||= 0
|
149
|
+
@occurrences[diff_name][sub_key] += 1
|
150
|
+
r_value = r_item[sub_key]
|
151
|
+
l_value = l_item[sub_key]
|
152
|
+
if @options[:fields_as_floats].index(sub_key)
|
153
|
+
l_float = l_value ? l_value.first.to_f : 0.0
|
154
|
+
r_float = r_value ? r_value.first.to_f : 0.0
|
155
|
+
next if (l_float - r_float).abs < @options[:min_diff_for_floats]
|
156
|
+
end
|
157
|
+
next if (r_value.is_a?(Array) && '--missing--'.eql?(r_value.first)) || (l_value.is_a?(Array) && '--missing--'.eql?(l_value.first))
|
158
|
+
# TODO: get_field_from_v5_product
|
159
|
+
next if r_value.to_s.eql?(l_value.to_s)
|
160
|
+
next if r_value.to_s.upcase.eql?(l_value.to_s.upcase) && @options[:case_insensitive]
|
161
|
+
details += " #{sub_key}: '#{l_value}' != '#{r_value}'"
|
162
|
+
found = found_one = true
|
163
|
+
@diff_stat[diff_name][sub_key] += 1
|
164
|
+
end
|
165
|
+
puts details.gsub(/[\[\]]/,'') if found
|
166
|
+
end
|
167
|
+
|
168
|
+
def show_keys(keys, batch_size = 20)
|
169
|
+
0.upto(keys.size) do |idx|
|
170
|
+
next unless idx % batch_size == 0
|
171
|
+
puts ' ' + keys[idx..(idx + batch_size-1)].join(' ')
|
172
|
+
end
|
173
|
+
end
|
174
|
+
def key_results_details(component_name, compare_names, l_keys, r_keys)
|
175
|
+
component_name += 'S' unless /S$/.match(component_name)
|
176
|
+
@report << "#{component_name}: Found #{l_keys.size} items only in #{@left.basename} #{r_keys.size} items only in #{@right.basename}, compared #{@diff_stat[component_name][NR_COMPARED]} items"
|
177
|
+
keys = r_keys - l_keys
|
178
|
+
head = "#{component_name}: #{(keys).size} keys only in #{@right.basename}"
|
179
|
+
puts "#{head}: Keys were #{keys.size}"
|
180
|
+
show_keys(keys)
|
181
|
+
@report << head
|
182
|
+
keys = l_keys - r_keys
|
183
|
+
head = "#{component_name}: #{(keys).size} keys only in #{@left.basename}"
|
184
|
+
puts "#{head}: Keys were #{keys.size}"
|
185
|
+
show_keys(keys)
|
186
|
+
@report << head
|
187
|
+
end
|
188
|
+
end
|
189
|
+
end
|
data/lib/oddb2xml/compressor.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
|
3
3
|
require 'zlib'
|
4
|
-
require '
|
4
|
+
require 'minitar'
|
5
5
|
require 'zip'
|
6
6
|
|
7
7
|
module Oddb2xml
|
@@ -35,9 +35,12 @@ module Oddb2xml
|
|
35
35
|
end
|
36
36
|
end
|
37
37
|
end
|
38
|
-
if File.exists? @compress_file
|
38
|
+
if File.exists? @compress_file
|
39
|
+
puts "#{__LINE__}: @compress_file"
|
39
40
|
@contents.each do |file|
|
40
|
-
|
41
|
+
@tmpfile = file
|
42
|
+
puts "#{__LINE__}: @tmpfile"
|
43
|
+
FileUtils.rm(file) if file && File.exists?(file)
|
41
44
|
end
|
42
45
|
end
|
43
46
|
rescue Errno::ENOENT, StandardError => e
|
data/lib/oddb2xml/downloader.rb
CHANGED
@@ -12,24 +12,21 @@ SkipMigelDownloader = true # https://github.com/zdavatz/oddb2xml_files/raw/mast
|
|
12
12
|
module Oddb2xml
|
13
13
|
module DownloadMethod
|
14
14
|
private
|
15
|
-
def download_as(file, option='
|
15
|
+
def download_as(file, option='w+')
|
16
16
|
tempFile = File.join(WorkDir, File.basename(file))
|
17
|
-
file2save = File.join(Downloads, File.basename(file))
|
18
|
-
|
17
|
+
@file2save = File.join(Downloads, File.basename(file))
|
18
|
+
report_download(@url, @file2save)
|
19
19
|
data = nil
|
20
|
-
FileUtils.rm_f(tempFile, :verbose => false)
|
21
20
|
if Oddb2xml.skip_download(file)
|
22
21
|
io = File.open(file, option)
|
23
22
|
data = io.read
|
24
23
|
else
|
25
24
|
begin
|
26
|
-
response = @agent.get(@url)
|
27
|
-
response.save_as(file)
|
28
|
-
response = nil # win
|
29
25
|
io = File.open(file, option)
|
30
|
-
data =
|
31
|
-
|
32
|
-
|
26
|
+
data = open(@url).read
|
27
|
+
io.write(data)
|
28
|
+
rescue => error
|
29
|
+
puts "error #{error} while fetching #{@url}"
|
33
30
|
ensure
|
34
31
|
io.close if io and !io.closed? # win
|
35
32
|
Oddb2xml.download_finished(tempFile)
|
@@ -39,7 +36,7 @@ module Oddb2xml
|
|
39
36
|
end
|
40
37
|
end
|
41
38
|
class Downloader
|
42
|
-
attr_reader :type, :agent
|
39
|
+
attr_reader :type, :agent, :url; :file2save
|
43
40
|
def initialize(options={}, url=nil)
|
44
41
|
@options = options
|
45
42
|
@url = url
|
@@ -48,6 +45,12 @@ module Oddb2xml
|
|
48
45
|
Oddb2xml.log "Downloader from #{@url} for #{self.class}"
|
49
46
|
init
|
50
47
|
end
|
48
|
+
def report_download(url, file)
|
49
|
+
Oddb2xml.log sprintf("%-20s: download_as %-24s from %s",
|
50
|
+
self.class.to_s.split('::').last,
|
51
|
+
File.basename(file),
|
52
|
+
url)
|
53
|
+
end
|
51
54
|
def init
|
52
55
|
@agent = Mechanize.new
|
53
56
|
@agent.user_agent = 'Mozilla/5.0 (X11; Linux x86_64; rv:16.0) Gecko/20100101 Firefox/16.0'
|
@@ -79,6 +82,7 @@ module Oddb2xml
|
|
79
82
|
Dir.glob(File.join(Downloads, '*')).each { |name| if target.match(name) then entry = name; break end }
|
80
83
|
if entry
|
81
84
|
dest = "#{Downloads}/#{File.basename(entry)}"
|
85
|
+
@file2save = dest
|
82
86
|
if File.exists?(dest)
|
83
87
|
Oddb2xml.log "read_xml_from_zip return content of #{dest} #{File.size(dest)} bytes "
|
84
88
|
return IO.read(dest)
|
@@ -133,8 +137,9 @@ module Oddb2xml
|
|
133
137
|
include DownloadMethod
|
134
138
|
def download
|
135
139
|
@url ||= 'https://download.epha.ch/cleaned/matrix.csv'
|
136
|
-
|
137
|
-
|
140
|
+
file = 'epha_interactions.csv'
|
141
|
+
content = download_as(file, 'w+')
|
142
|
+
FileUtils.rm_f(file, :verbose => false)
|
138
143
|
content
|
139
144
|
end
|
140
145
|
end
|
@@ -142,36 +147,27 @@ module Oddb2xml
|
|
142
147
|
include DownloadMethod
|
143
148
|
def download
|
144
149
|
@url ||= 'https://raw.githubusercontent.com/zdavatz/oddb2xml_files/master/LPPV.txt'
|
145
|
-
download_as('oddb2xml_files_lppv.txt', '
|
150
|
+
download_as('oddb2xml_files_lppv.txt', 'w+')
|
146
151
|
end
|
147
152
|
end
|
148
153
|
class ZurroseDownloader < Downloader
|
149
154
|
include DownloadMethod
|
150
155
|
def download
|
151
156
|
@url ||= 'http://pillbox.oddb.org/TRANSFER.ZIP'
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
begin
|
162
|
-
response = @agent.get(@url)
|
163
|
-
response.save_as(file)
|
164
|
-
response = nil # win
|
165
|
-
rescue Timeout::Error, Errno::ETIMEDOUT
|
166
|
-
retrievable? ? retry : raise
|
167
|
-
ensure
|
168
|
-
Oddb2xml.download_finished(file)
|
169
|
-
end
|
170
|
-
end
|
171
|
-
read_xml_from_zip(/transfer.dat/, file)
|
172
|
-
dest = File.join(Downloads, 'transfer.dat')
|
173
|
-
File.open(dest, 'r:iso-8859-1:utf-8').read
|
157
|
+
zipfile = File.join(WorkDir, 'transfer.zip')
|
158
|
+
download_as(zipfile)
|
159
|
+
dest = File.join(Downloads, 'transfer.dat')
|
160
|
+
cmd = "unzip -o '#{zipfile}' -d '#{Downloads}'"
|
161
|
+
system(cmd)
|
162
|
+
if @options[:artikelstamm]
|
163
|
+
cmd = "iconv -f ISO8859-1 -t utf-8 -o #{dest.sub('.dat','.utf8')} #{dest}"
|
164
|
+
Oddb2xml.log(cmd)
|
165
|
+
system(cmd)
|
174
166
|
end
|
167
|
+
# read file and convert it to utf-8
|
168
|
+
File.open(dest, 'r:iso-8859-1:utf-8').read
|
169
|
+
ensure
|
170
|
+
FileUtils.rm(zipfile) if File.exist?(dest) && File.exist?(zipfile)
|
175
171
|
end
|
176
172
|
end
|
177
173
|
class MedregbmDownloader < Downloader
|
@@ -190,30 +186,35 @@ module Oddb2xml
|
|
190
186
|
super({}, url)
|
191
187
|
end
|
192
188
|
def download
|
193
|
-
|
189
|
+
file = "medregbm_#{@type.to_s}.txt"
|
190
|
+
download_as(file, 'w+:iso-8859-1:utf-8')
|
191
|
+
report_download(@url, file)
|
192
|
+
FileUtils.rm_f(file, :verbose => false) # we need it only in the download
|
193
|
+
file
|
194
194
|
end
|
195
195
|
end
|
196
196
|
class BagXmlDownloader < Downloader
|
197
|
+
include DownloadMethod
|
197
198
|
def init
|
198
199
|
super
|
199
200
|
@url ||= 'http://bag.e-mediat.net/SL2007.Web.External/File.axd?file=XMLPublications.zip'
|
200
201
|
end
|
201
202
|
def download
|
202
203
|
file = File.join(WorkDir, 'XMLPublications.zip')
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
204
|
+
download_as(file)
|
205
|
+
report_download(@url, file)
|
206
|
+
if defined?(RSpec)
|
207
|
+
src = File.join(Oddb2xml::SpecData, 'Preparations.xml')
|
208
|
+
content = File.read(src)
|
209
|
+
FileUtils.cp(src, File.join(Downloads, File.basename(file)))
|
210
|
+
else
|
211
|
+
content = read_xml_from_zip(/Preparations.xml/, File.join(Downloads, File.basename(file)))
|
212
|
+
end
|
213
|
+
if @options[:artikelstamm]
|
214
|
+
cmd = "xmllint --format --output Preparations.xml Preparations.xml"
|
215
|
+
Oddb2xml.log(cmd)
|
216
|
+
system(cmd)
|
215
217
|
end
|
216
|
-
content = read_xml_from_zip(/Preparations.xml/, File.join(Downloads, File.basename(file)))
|
217
218
|
FileUtils.rm_f(file, :verbose => false) unless defined?(RSpec)
|
218
219
|
content
|
219
220
|
end
|
@@ -236,7 +237,7 @@ module Oddb2xml
|
|
236
237
|
def download
|
237
238
|
begin
|
238
239
|
filename = "refdata_#{@type}.xml"
|
239
|
-
file2save = File.join(Downloads, "refdata_#{@type}.xml")
|
240
|
+
@file2save = File.join(Downloads, "refdata_#{@type}.xml")
|
240
241
|
soap = %(<?xml version="1.0" encoding="UTF-8"?>
|
241
242
|
<SOAP-ENV:Envelope xmlns:SOAP-ENV="http://schemas.xmlsoap.org/soap/envelope/" xmlns:ns1="http://refdatabase.refdata.ch/Article_in" xmlns:ns2="http://refdatabase.refdata.ch/">
|
242
243
|
<SOAP-ENV:Body>
|
@@ -247,14 +248,21 @@ module Oddb2xml
|
|
247
248
|
</SOAP-ENV:Envelope>
|
248
249
|
</ns1:ATYPE></ns2:DownloadArticleInput></SOAP-ENV:Body>
|
249
250
|
)
|
250
|
-
|
251
|
-
|
251
|
+
report_download(@url, @file2save)
|
252
|
+
return IO.read(@file2save) if Oddb2xml.skip_download? and File.exists?(@file2save)
|
253
|
+
FileUtils.rm_f(@file2save, :verbose => false)
|
252
254
|
response = @client.call(:download, :xml => soap)
|
253
255
|
if response.success?
|
254
256
|
if xml = response.to_xml
|
257
|
+
xml = File.read(File.join(Oddb2xml::SpecData, File.basename(@file2save))) if defined?(RSpec)
|
255
258
|
response = nil # win
|
256
259
|
FileUtils.makedirs(Downloads)
|
257
|
-
File.open(file2save, 'w+') { |file| file.write xml }
|
260
|
+
File.open(@file2save, 'w+') { |file| file.write xml }
|
261
|
+
if @options[:artikelstamm]
|
262
|
+
cmd = "xmllint --format --output #{@file2save} #{@file2save}"
|
263
|
+
Oddb2xml.log(cmd)
|
264
|
+
system(cmd)
|
265
|
+
end
|
258
266
|
else
|
259
267
|
# received broken data or internal error
|
260
268
|
raise StandardError
|
@@ -271,6 +279,7 @@ module Oddb2xml
|
|
271
279
|
end
|
272
280
|
end
|
273
281
|
class SwissmedicDownloader < Downloader
|
282
|
+
include DownloadMethod
|
274
283
|
def initialize(type=:orphan, options = {})
|
275
284
|
@type = type
|
276
285
|
@options = options
|
@@ -282,23 +291,28 @@ module Oddb2xml
|
|
282
291
|
end
|
283
292
|
end
|
284
293
|
def download
|
285
|
-
@
|
286
|
-
|
287
|
-
|
288
|
-
|
294
|
+
@file2save = File.join(Oddb2xml::WorkDir, "swissmedic_#{@type}.xlsx")
|
295
|
+
report_download(@url, @file2save)
|
296
|
+
if @options[:calc] and @options[:skip_download] and File.exists?(@file2save) and (Time.now-File.ctime(@file2save)).to_i < 24*60*60
|
297
|
+
Oddb2xml.log "SwissmedicDownloader #{__LINE__}: Skip downloading #{@file2save} #{File.size(@file2save)} bytes"
|
298
|
+
return File.expand_path(@file2save)
|
289
299
|
end
|
290
300
|
begin
|
291
|
-
FileUtils.rm(File.expand_path(
|
292
|
-
|
293
|
-
|
301
|
+
FileUtils.rm(File.expand_path(@file2save), :verbose => !defined?(RSpec)) if File.exists?(File.expand_path(@file2save))
|
302
|
+
@url = @direct_url_link
|
303
|
+
download_as(@file2save, 'w+')
|
304
|
+
if @options[:artikelstamm]
|
305
|
+
cmd = "ssconvert '#{@file2save}' '#{File.join(Downloads, File.basename(@file2save).sub(/\.xls.*/, '.csv'))}' 2> /dev/null"
|
306
|
+
Oddb2xml.log(cmd)
|
307
|
+
system(cmd)
|
294
308
|
end
|
295
|
-
return File.expand_path(
|
309
|
+
return File.expand_path(@file2save)
|
296
310
|
rescue Timeout::Error, Errno::ETIMEDOUT
|
297
311
|
retrievable? ? retry : raise
|
298
312
|
ensure
|
299
|
-
Oddb2xml.download_finished(
|
313
|
+
Oddb2xml.download_finished(@file2save, false)
|
300
314
|
end
|
301
|
-
return File.expand_path(
|
315
|
+
return File.expand_path(@file2save)
|
302
316
|
end
|
303
317
|
end
|
304
318
|
class SwissmedicInfoDownloader < Downloader
|
@@ -309,6 +323,7 @@ module Oddb2xml
|
|
309
323
|
end
|
310
324
|
def download
|
311
325
|
file = File.join(Downloads, "swissmedic_info.zip")
|
326
|
+
report_download(@url, file)
|
312
327
|
FileUtils.rm_f(file, :verbose => false) unless Oddb2xml.skip_download?
|
313
328
|
begin
|
314
329
|
response = nil
|