oddb2xml 2.2.3 → 2.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,6 +3,55 @@ require 'spec_helper'
3
3
 
4
4
  VCR.eject_cassette # we use insert/eject around each example
5
5
 
6
+ # not used but, as I still don't know how to generate
7
+ def filter_aips_xml(filename='AipsDownload_ng.xml', ids_to_keep = [55558, 61848])
8
+ puts "File #{filename} exists? #{File.exists?(filename)}"
9
+ tst = %(<?xml version="1.0" encoding="utf-8"?>
10
+ <medicalInformations>
11
+ <medicalInformation type="fi" version="5" lang="de" safetyRelevant="false" informationUpdate="07.2008">
12
+ <title>Zyvoxid®</title>
13
+ <authHolder>Pfizer AG</authHolder>
14
+ <atcCode>J01XX08</atcCode>
15
+ <substances>Linezolid</substances>
16
+ <authNrs>55558, 55559, 55560</authNrs>
17
+ )
18
+ @xml = IO.read(filename)
19
+ ausgabe = File.open('tst.out', 'w+')
20
+ data = {}
21
+ result = MedicalInformationsContent.parse(@xml.sub(Strip_For_Sax_Machine, ''), :lazy => true)
22
+ result.medicalInformation.each do |pac|
23
+ lang = pac.lang.to_s
24
+ next unless lang =~ /de|fr/
25
+ item = {}
26
+ keepIt = false
27
+ pac.authNrs.split(/[, ]+/).each{
28
+ |id|
29
+ if ids_to_keep.index(id.to_i)
30
+ data[ [lang, id.to_i] ] = pac
31
+ keepIt = true;
32
+ ausgabe.puts
33
+ break
34
+ end
35
+ }
36
+ html = Nokogiri::HTML.fragment(pac.content.force_encoding('UTF-8'))
37
+ item[:paragraph] = html
38
+ numbers = /(\d{5})[,\s]*(\d{5})?|(\d{5})[,\s]*(\d{5})?[,\s]*(\d{5})?/.match(html)
39
+ if numbers
40
+ [$1, $2, $3].compact.each {
41
+ |id|
42
+ if ids_to_keep.index(id.to_i)
43
+ data[ [lang, id.to_i] ] = pac
44
+ keepIt = true;
45
+ break
46
+ end
47
+ }
48
+ puts "Must keep #{keepIt} #{pac.authNrs}"
49
+ end
50
+ end
51
+ puts data.size
52
+ puts data.keys
53
+ end
54
+
6
55
  XML_VERSION_1_0 = /xml\sversion=["']1.0["']/
7
56
  PREP_XML = 'Preparations.xml'
8
57
  shared_examples_for 'any downloader' do
@@ -29,6 +78,12 @@ end
29
78
  def common_after
30
79
  Dir.chdir(@savedDir) if @savedDir and File.directory?(@savedDir)
31
80
  VCR.eject_cassette
81
+ vcr_file = File.expand_path(File.join(Oddb2xml::SpecData, '..', 'fixtures', 'vcr_cassettes', 'oddb2xml.json'))
82
+ puts "Pretty-printing #{vcr_file} exists? #{File.exists?(vcr_file)}"
83
+ vcr_file_new = vcr_file.sub('.json', '.new')
84
+ cmd = "cat #{vcr_file} | python -mjson.tool > #{vcr_file_new}"
85
+ res = system(cmd)
86
+ FileUtils.mv(vcr_file_new, vcr_file)
32
87
  end
33
88
 
34
89
  # Zips input_filenames (using the basename)
@@ -43,7 +98,7 @@ def zip_files(zipfile_name, input_filenames)
43
98
  end
44
99
 
45
100
  # Unzips into a specific directory
46
- def unzip_files(zipfile_name, directory)
101
+ def unzip_files(zipfile_name, directory=Dir.pwd)
47
102
  savedDir = Dir.pwd
48
103
  FileUtils.makedirs(directory)
49
104
  Dir.chdir(directory)
@@ -51,7 +106,8 @@ def unzip_files(zipfile_name, directory)
51
106
  # Handle entries one by one
52
107
  zip_file.each do |entry|
53
108
  # Extract to file/directory/symlink
54
- puts "Extracting #{entry.name} into #{directory}"
109
+ puts "downloader_spec.rb: Extracting #{entry.name} exists? #{File.exists?(entry.name)} into #{directory}"
110
+ FileUtils.rm_f(entry.name, :verbose => true) if File.exists?(entry.name)
55
111
  entry.extract(entry.name)
56
112
  end
57
113
  end
@@ -59,6 +115,7 @@ ensure
59
115
  Dir.chdir(savedDir)
60
116
  end
61
117
 
118
+
62
119
  describe Oddb2xml::RefdataDownloader do
63
120
  include ServerMockHelper
64
121
  before(:all) do
@@ -66,7 +123,7 @@ describe Oddb2xml::RefdataDownloader do
66
123
  VCR.configure do |c|
67
124
  c.before_record(:Refdata_DE) do |i|
68
125
  if not /WSDL$/.match(i.request.uri) and /refdatabase.refdata.ch\/Service/.match(i.request.uri) and i.response.body.size > 1024*1024
69
- puts "#{Time.now}: #{__LINE__}: Parsing response.body (#{i.response.body.size} bytes) will take some time. URI was #{i.request.uri}"
126
+ puts "#{Time.now}: #{__LINE__}: Parsing response.body (#{i.response.body.size/(1024*1024)} MB ) will take some time. URI was #{i.request.uri}"
70
127
  doc = REXML::Document.new(i.response.body)
71
128
  items = doc.root.children.first.elements.first
72
129
  nrItems = doc.root.children.first.elements.first.elements.size
@@ -79,7 +136,7 @@ describe Oddb2xml::RefdataDownloader do
79
136
  items.delete x unless x.elements['GTIN'] and Oddb2xml::GTINS_DRUGS.index(x.elements['GTIN'].text)
80
137
  }
81
138
  i.response.body = doc.to_s
82
- puts "#{Time.now}: response.body is now #{i.response.body.size} bytes long"
139
+ puts "#{Time.now}: response.body is now #{i.response.body.size/(1024*1024)} MB long"
83
140
  i.response.headers['Content-Length'] = i.response.body.size
84
141
  end
85
142
  end
@@ -129,25 +186,25 @@ describe Oddb2xml::RefdataDownloader do
129
186
  end
130
187
  end
131
188
 
132
- if true
133
189
  describe Oddb2xml::SwissmedicDownloader do
134
190
  include ServerMockHelper
135
- before(:all) do VCR.eject_cassette end
136
191
  before(:each) do
137
192
  VCR.configure do |c|
138
193
  c.before_record(:swissmedic) do |i|
139
- if i.response.headers['Content-Disposition'] and /www.swissmedic.ch/.match(i.request.uri)
140
- puts "#{Time.now}: URI was #{i.request.uri}"
194
+ if i.response.headers['Content-Disposition'] and /www.swissmedic.ch/.match(i.request.uri) and i.response.body.size > 1024*1024
195
+ puts "#{Time.now}: #{__LINE__} URI was #{i.request.uri}"
141
196
  m = /filename=.([^\d]+)/.match(i.response.headers['Content-Disposition'][0])
142
- puts "#{Time.now}: SwissmedicDownloader #{m[1]} (#{i.response.body.size} bytes)."
197
+ puts "#{Time.now}: #{__LINE__} SwissmedicDownloader #{m[1]} (#{i.response.body.size/(1024*1024)} MB )."
143
198
  if m and true
144
199
  name = m[1].chomp('_')
145
200
  swissmedic_dir = File.join(Oddb2xml::WorkDir, 'swissmedic')
146
201
  FileUtils.makedirs(swissmedic_dir)
147
202
  xlsx_name = File.join(swissmedic_dir, name + '.xlsx')
148
203
  if /Packungen/i.match(xlsx_name)
204
+ FileUtils.rm(xlsx_name, :verbose => true) if File.exists?(xlsx_name)
149
205
  File.open(xlsx_name, 'wb+') { |f| f.write(i.response.body) }
150
- puts "#{Time.now}: Openening saved #{xlsx_name} (#{File.size(xlsx_name)} bytes) will take some time. URI was #{i.request.uri}"
206
+ FileUtils.cp(xlsx_name, File.join(Oddb2xml::SpecData, 'swissmedic_package_downloaded.xlsx'), :verbose => true, :preserve => true)
207
+ puts "#{Time.now}: #{__LINE__}: Openening saved #{xlsx_name} (#{File.size(xlsx_name)} bytes) will take some time. URI was #{i.request.uri}"
151
208
  workbook = RubyXL::Parser.parse(xlsx_name)
152
209
  worksheet = workbook[0]
153
210
  drugs = []
@@ -156,18 +213,19 @@ if true
156
213
  puts "#{Time.now}: Finding items to delete will take some time"
157
214
  while (worksheet.sheet_data[idx])
158
215
  idx += 1
159
- next unless worksheet.sheet_data[idx-1][0]
160
- to_delete << (idx-1) unless drugs.find{ |x| x[0]== worksheet.sheet_data[idx-1][0].value.to_i and
161
- x[1]== worksheet.sheet_data[idx-1][10].value.to_i
216
+ next unless worksheet.sheet_data[idx-1][Oddb2xml::COLUMNS_JULY_2015.keys.index(:iksnr)]
217
+ to_delete << (idx-1) unless drugs.find{ |x| x[0]== worksheet.sheet_data[idx-1][Oddb2xml::COLUMNS_JULY_2015.keys.index(:iksnr)].value.to_i and
218
+ x[1]== worksheet.sheet_data[idx-1][Oddb2xml::COLUMNS_JULY_2015.keys.index(:ikscd)].value.to_i
162
219
  }
163
220
  end
164
221
  if to_delete.size > 0
165
222
  puts "#{Time.now}: Deleting #{to_delete.size} of the #{idx} items will take some time"
166
223
  to_delete.reverse.each{ |row_id| worksheet.delete_row(row_id) }
167
224
  workbook.write(xlsx_name)
225
+ FileUtils.cp(xlsx_name, File.join(Oddb2xml::SpecData, 'swissmedic_package_shortened.xlsx'), :verbose => true, :preserve => true)
168
226
  i.response.body = IO.binread(xlsx_name)
169
227
  i.response.headers['Content-Length'] = i.response.body.size
170
- puts "#{Time.now}: response.body is now #{i.response.body.size} bytes long. #{xlsx_name} was #{File.size(xlsx_name)}"
228
+ puts "#{Time.now}: response.body is now #{i.response.body.size/(1024*1024)} MB long. #{xlsx_name} was #{File.size(xlsx_name)}"
171
229
  end
172
230
  end
173
231
  end
@@ -179,6 +237,7 @@ if true
179
237
 
180
238
  context 'orphan' do
181
239
  before(:each) do
240
+ VCR.eject_cassette
182
241
  VCR.insert_cassette('oddb2xml', :tag => :swissmedic, :exclusive => false)
183
242
  common_before
184
243
  @downloader = Oddb2xml::SwissmedicDownloader.new(:orphan)
@@ -205,6 +264,7 @@ if true
205
264
  end
206
265
  context 'fridge' do
207
266
  before(:each) do
267
+ VCR.eject_cassette
208
268
  VCR.insert_cassette('oddb2xml', :tag => :swissmedic, :exclusive => false)
209
269
  common_before
210
270
  @downloader = Oddb2xml::SwissmedicDownloader.new(:fridge)
@@ -222,8 +282,8 @@ if true
222
282
  end
223
283
  context 'package' do
224
284
  before(:each) do
285
+ VCR.eject_cassette
225
286
  VCR.insert_cassette('oddb2xml', :tag => :swissmedic, :exclusive => false)
226
- # VCR.insert_cassette('oddb2xml', :tag => :swissmedic, :record => :all)
227
287
  common_before
228
288
  @downloader = Oddb2xml::SwissmedicDownloader.new(:package)
229
289
  end
@@ -242,8 +302,7 @@ end
242
302
 
243
303
  describe Oddb2xml::EphaDownloader do
244
304
  include ServerMockHelper
245
- before(:all) do VCR.eject_cassette end
246
- before(:each) do
305
+ before(:all) do
247
306
  VCR.configure do |c|
248
307
  c.before_record(:epha) do |i|
249
308
  if /epha/.match(i.request.uri)
@@ -258,11 +317,12 @@ describe Oddb2xml::EphaDownloader do
258
317
  end
259
318
  end
260
319
  end
320
+ VCR.eject_cassette
261
321
  VCR.insert_cassette('oddb2xml', :tag => :epha)
262
322
  @downloader = Oddb2xml::EphaDownloader.new
263
323
  common_before
264
324
  end
265
- after(:each) do
325
+ after(:all) do
266
326
  common_after
267
327
  end
268
328
  it_behaves_like 'any downloader'
@@ -335,7 +395,7 @@ describe Oddb2xml::BagXmlDownloader do
335
395
  puts "Saved #{tmp_zip} (#{File.size(tmp_zip)} bytes)"
336
396
  i.response.body = IO.binread(tmp_zip)
337
397
  i.response.headers['Content-Length'] = i.response.body.size
338
- puts "#{Time.now}: response.body is now #{i.response.body.size} bytes long. #{tmp_zip} was #{File.size(tmp_zip)}"
398
+ puts "#{Time.now}: response.body is now #{i.response.body.size/(1024*1024)} MB long. #{tmp_zip} was #{File.size(tmp_zip)}"
339
399
  end
340
400
  end
341
401
  end
@@ -456,7 +516,7 @@ describe Oddb2xml::MedregbmDownloader do
456
516
  VCR.configure do |c|
457
517
  c.before_record(:medreg) do |i|
458
518
  if /medregbm.admin.ch/i.match(i.request.uri)
459
- puts "#{Time.now}: #{__LINE__}: URI was #{i.request.uri} containing #{i.response.body.size} bytes"
519
+ puts "#{Time.now}: #{__LINE__}: URI was #{i.request.uri} containing #{i.response.body.size/(1024*1024)} MB "
460
520
  medreg_dir = File.join(Oddb2xml::WorkDir, 'medreg')
461
521
  FileUtils.makedirs(medreg_dir)
462
522
  xlsx_name = File.join(medreg_dir, /ListBetrieb/.match(i.request.uri) ? 'Betriebe.xlsx' : 'Personen.xlsx')
@@ -476,7 +536,7 @@ describe Oddb2xml::MedregbmDownloader do
476
536
  workbook.write(xlsx_name)
477
537
  i.response.body = IO.binread(xlsx_name)
478
538
  i.response.headers['Content-Length'] = i.response.body.size
479
- puts "#{Time.now}: response.body is now #{i.response.body.size} bytes long. #{xlsx_name} was #{File.size(xlsx_name)}"
539
+ puts "#{Time.now}: response.body is now #{i.response.body.size/(1024*1024)} MB long. #{xlsx_name} was #{File.size(xlsx_name)}"
480
540
  end
481
541
  end
482
542
  end
@@ -532,28 +592,21 @@ end
532
592
 
533
593
  describe Oddb2xml::SwissmedicInfoDownloader do
534
594
  include ServerMockHelper
535
- before(:all) do VCR.eject_cassette end
536
- before(:each) do
595
+ before(:all) do
537
596
  VCR.configure do |c|
538
597
  c.before_record(:swissmedicInfo) do |i|
539
- puts "#{Time.now}: #{__LINE__}: URI was #{i.request.uri} returning #{i.response.body.size} bytes"
598
+ puts "#{Time.now}: #{__LINE__}: URI was #{i.request.uri} returning #{i.response.body.size/(1024*1024)} MB "
540
599
  if i.response.headers['Content-Disposition']
541
600
  m = /filename=([^\d]+)/.match(i.response.headers['Content-Disposition'][0])
542
601
  if m
543
602
  name = m[1].chomp('_')
544
603
  if /AipsDownload/i.match(name)
545
- swissmedic_dir = File.join(Oddb2xml::WorkDir, 'swissmedicInfo')
546
- # as reading the unzipped xml takes over 15 minutes using rexml,
547
- # we read the xml from the spec/data
548
- spec_xml = Dir.glob("#{Oddb2xml::SpecData}/AipsDownload.xml")[0]
549
- tmp_zip = File.join(Oddb2xml::WorkDir, 'AipsDownload.zip')
550
- File.open(tmp_zip, 'wb+') { |f| f.write(i.response.body) }
551
- unzip_files(tmp_zip, swissmedic_dir)
552
- FileUtils.cp(spec_xml, Dir.glob("#{swissmedic_dir}/*.xml")[0], :verbose => true)
553
- zip_files(tmp_zip, Dir.glob("#{swissmedic_dir}/*.x??"))
604
+ # we replace this by manually reduced xml file from spec/data
605
+ # As we only use to create the fachinfo, we don't need many elements
606
+ tmp_zip = File.join(Oddb2xml::SpecData, 'AipsDownload.zip')
554
607
  i.response.body = IO.binread(tmp_zip)
555
608
  i.response.headers['Content-Length'] = i.response.body.size
556
- puts "#{Time.now}: #{__LINE__}: response.body is now #{i.response.body.size} bytes long. #{tmp_zip} was #{File.size(tmp_zip)}"
609
+ puts "#{Time.now}: #{__LINE__}: response.body is now #{i.response.body.size/(1024*1024)} MB long. #{tmp_zip} was #{File.size(tmp_zip)}"
557
610
  end
558
611
  end
559
612
  end
@@ -564,7 +617,7 @@ describe Oddb2xml::SwissmedicInfoDownloader do
564
617
  common_before
565
618
  @downloader = Oddb2xml::SwissmedicInfoDownloader.new
566
619
  end
567
- after(:each) do common_after end
620
+ after(:all) do common_after end
568
621
  it_behaves_like 'any downloader'
569
622
  context 'when download is called' do
570
623
  let(:xml) { @downloader.download }
@@ -583,4 +636,4 @@ describe Oddb2xml::SwissmedicInfoDownloader do
583
636
  end
584
637
  end
585
638
  end
586
- end
639
+
@@ -3,7 +3,7 @@
3
3
  require 'spec_helper'
4
4
  require "#{Dir.pwd}/lib/oddb2xml/downloader"
5
5
  ENV['TZ'] = 'UTC' # needed for last_change
6
-
6
+ LAST_CHANGE = "2015-07-03 00:00:00 +0000"
7
7
  describe Oddb2xml::BMUpdateExtractor do
8
8
  before(:all) { VCR.eject_cassette; VCR.insert_cassette('oddb2xml') }
9
9
  after(:all) { VCR.eject_cassette }
@@ -51,6 +51,8 @@ end
51
51
  describe Oddb2xml::RefdataExtractor do
52
52
  before(:all) { VCR.eject_cassette; VCR.insert_cassette('oddb2xml') }
53
53
  after(:all) { VCR.eject_cassette }
54
+ @@last_change = '2015-09-09 00:00:00 +0000'
55
+
54
56
  context 'should handle pharma articles' do
55
57
  subject do
56
58
  @downloader = Oddb2xml::RefdataDownloader.new({}, :pharma)
@@ -67,7 +69,7 @@ describe Oddb2xml::RefdataExtractor do
67
69
  :_type=>:pharma,
68
70
  :ean=> Oddb2xml::LEVETIRACETAM_GTIN.to_i,
69
71
  :pharmacode=> pharma_code_LEVETIRACETAM,
70
- :last_change => "2015-06-04 00:00:00 +0000",
72
+ :last_change => @@last_change,
71
73
  :desc_de=>"LEVETIRACETAM DESITIN Mini Filmtab 250 mg 30 Stk",
72
74
  :desc_fr=>"LEVETIRACETAM DESITIN mini cpr pel 250 mg 30 pce",
73
75
  :atc_code=>"N03AX14",
@@ -93,7 +95,7 @@ describe Oddb2xml::RefdataExtractor do
93
95
  :_type=>:nonpharma,
94
96
  :ean=>7611600441020,
95
97
  :pharmacode=>pharma_code_TUBEGAZE,
96
- :last_change => "2015-06-04 00:00:00 +0000",
98
+ :last_change => @@last_change,
97
99
  :desc_de=>"TUBEGAZE Verband weiss Nr 12 20m Finger gross",
98
100
  :desc_fr=>"TUBEGAZE pans tubul blanc Nr 12 20m doigts grands",
99
101
  :atc_code=>"",
@@ -149,9 +151,8 @@ describe Oddb2xml::SwissmedicInfoExtractor do
149
151
  it {
150
152
  xml = @downloader.download
151
153
  @infos = Oddb2xml::SwissmedicInfoExtractor.new(xml).to_hash
152
- expect(@infos.keys).to eq ['de', 'fr']
153
- expect(@infos['de'].size).to eq 5
154
- expect(@infos['fr'].size).to eq 2
154
+ expect(@infos.keys).to eq ['de']
155
+ expect(@infos['de'].size).to eq 2
155
156
  levetiracetam = nil
156
157
  @infos['de'].each{|info|
157
158
  levetiracetam = info if /Levetiracetam/.match(info[:name])
@@ -175,7 +176,7 @@ describe Oddb2xml::SwissmedicExtractor do
175
176
  cleanup_directories_before_run
176
177
  filename = File.join(Oddb2xml::SpecData, 'swissmedic_package.xlsx')
177
178
  @packs = Oddb2xml::SwissmedicExtractor.new(filename, :package).to_hash
178
- expect(@packs.size).to eq(17)
179
+ expect(@packs.size).to eq(15)
179
180
  serocytol = nil
180
181
  @packs.each{|pack|
181
182
  serocytol = pack[1] if pack[1][:ean] == '7680620690084'
@@ -331,5 +332,4 @@ describe Oddb2xml::ZurroseExtractor do
331
332
  }
332
333
 
333
334
  end
334
-
335
- end
335
+ end