oddb2xml 3.0.19 → 3.0.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: '081c86c751d4c29fa2ce616abdb6043707e50fab79e3b74a67aab568b2084519'
4
- data.tar.gz: e3c91770aa8ccd4de0714c45f5a20eb7f87fc989ea84932a8e4cf1a5a3813a7c
3
+ metadata.gz: fe41f3e5cfb0e100f111a3071f32ae8dd4b4103d3f5726bf4eab061f80389323
4
+ data.tar.gz: 785dee3549e33031069aceac4f9be31cc844de603d3b183d94b55f976b0c482c
5
5
  SHA512:
6
- metadata.gz: b8a928d127496fb06cba79dc630c19bd5edc3b09dd3df0c0dda6ca058cd1897643818d4a8aa9e2ab125023d0ebb319dddee2ab1be0fcdabc8e4708299a136dee
7
- data.tar.gz: 27c6ef5bcb9fa0df5b7861496468a88b416209a8b83e7c08e3a995407a75944f0d81849d75399254b7ee404787128ed9e4742ef5f6a78107d90f895845afe707
6
+ metadata.gz: b385af5fc0643374e878da0cd6e1b262e4d7521c5ac5dd3a3670971f8f130ca3492b4aa296410c5e9e6627021bcbb86b508dbd3e80bf1fdd044c61851df69664
7
+ data.tar.gz: 40a8baa557cdb31905f8711702bbe52f8d968b67488ae88f293233ea3bd1db689c9ea75d27b4b17019cf2369fb593eed3268c3867277daf5cb73c763b6a74791
data/History.txt CHANGED
@@ -1,3 +1,6 @@
1
+ === 3.0.20 / 09.06.2026
2
+ * Bugfix/robustness: handle truncated or failed Swissmedic .xlsx downloads instead of crashing later in the parser (issue #121). A download through a scanning/allow-list proxy can return an incomplete .xlsx -- a valid ZIP/Excel header (so `file` still reports "Microsoft Excel 2007+") but a missing end-of-central-directory record -- which made RubyXL die deep in rubyzip with the cryptic "Zip end of central directory signature not found". New Oddb2xml.valid_zip? verifies an archive is complete (PK header + EOCD signature in the tail). SwissmedicDownloader now validates the downloaded file and automatically re-fetches it (up to the normal retry count) when it is empty or truncated, and no longer reuses a cached broken file under --skip-download. As a last resort SwissmedicExtractor raises a clear, actionable error (pointing at connectivity / "oddb2xml --proxy-check") instead of the rubyzip backtrace.
3
+
1
4
  === 3.0.19 / 09.06.2026
2
5
  * New option --proxy-check: probe connectivity/proxy reachability for every host oddb2xml could need, print a full OK/BLOCKED/UNREACHABLE report (honouring http(s)_proxy) and exit without downloading or building. Exits 0 if all hosts are reachable, 1 otherwise — handy for cron/deploy preflight on allow-list proxies (e.g. "oddb2xml --proxy-check"). Reuses the 3.0.18 proxy checker; checks run concurrently.
3
6
 
@@ -279,13 +279,21 @@ module Oddb2xml
279
279
  def download
280
280
  @file2save = File.join(DOWNLOADS, "swissmedic_#{@type}.xlsx")
281
281
  report_download(@url, @file2save)
282
- if @options[:calc] && @options[:skip_download] && File.exist?(@file2save) && ((Time.now - File.ctime(@file2save)).to_i < 24 * 60 * 60)
282
+ if @options[:calc] && @options[:skip_download] && File.exist?(@file2save) && ((Time.now - File.ctime(@file2save)).to_i < 24 * 60 * 60) && Oddb2xml.valid_zip?(@file2save)
283
283
  Oddb2xml.log "SwissmedicDownloader #{__LINE__}: Skip downloading #{@file2save} #{File.size(@file2save)} bytes"
284
284
  return File.expand_path(@file2save)
285
285
  end
286
286
  begin
287
287
  @url = @direct_url_link
288
288
  download_as(@file2save, "w+")
289
+ # The Swissmedic file is an .xlsx (a ZIP). Downloads through scanning
290
+ # proxies are sometimes truncated (valid header, missing EOCD), which
291
+ # would later crash RubyXL with a cryptic rubyzip error. Verify the
292
+ # archive is complete and just fetch it again if not. (issue #121)
293
+ unless Oddb2xml.valid_zip?(@file2save)
294
+ raise Oddb2xml::IncompleteDownloadError,
295
+ "Swissmedic #{@type} xlsx is empty or truncated (#{File.size(@file2save)} bytes)"
296
+ end
289
297
  if @options[:artikelstamm]
290
298
  # ssconvert is in the package gnumeric (Debian)
291
299
  cmd = "ssconvert '#{@file2save}' '#{File.join(DOWNLOADS, File.basename(@file2save).sub(/\.xls.*/, ".csv"))}' 2> /dev/null"
@@ -293,8 +301,12 @@ module Oddb2xml
293
301
  system(cmd)
294
302
  end
295
303
  return File.expand_path(@file2save)
296
- rescue Timeout::Error, Errno::ETIMEDOUT
297
- retrievable? ? retry : raise
304
+ rescue Timeout::Error, Errno::ETIMEDOUT, Oddb2xml::IncompleteDownloadError => error
305
+ if retrievable?
306
+ Oddb2xml.log("Retrying Swissmedic #{@type} download: #{error.message}")
307
+ retry
308
+ end
309
+ raise
298
310
  ensure
299
311
  Oddb2xml.download_finished(@file2save, false)
300
312
  end
@@ -260,6 +260,14 @@ module Oddb2xml
260
260
  @type = type
261
261
  Oddb2xml.log("SwissmedicExtractor #{@filename} #{File.size(@filename)} bytes")
262
262
  return unless File.exist?(@filename)
263
+ unless Oddb2xml.valid_zip?(@filename)
264
+ raise "SwissmedicExtractor: '#{@filename}' is not a usable .xlsx " \
265
+ "(#{File.size(@filename)} bytes). The Swissmedic '#{@type}' download failed or was " \
266
+ "truncated -- the file is empty, an HTML error/proxy page, or an incomplete ZIP " \
267
+ "(valid header but missing end-of-central-directory). Check connectivity to " \
268
+ "www.swissmedic.ch (run 'oddb2xml --proxy-check'); a too-small file usually means " \
269
+ "the proxy cut off a large download."
270
+ end
263
271
  @sheet = RubyXL::Parser.parse(File.expand_path(@filename)).worksheets[0]
264
272
  end
265
273
 
data/lib/oddb2xml/util.rb CHANGED
@@ -3,10 +3,29 @@ require "htmlentities"
3
3
 
4
4
  module Oddb2xml
5
5
  FAKE_GTIN_START = "999999"
6
+
7
+ # Raised when a downloaded archive (zip/xlsx) is empty or truncated, so the
8
+ # caller can retry the download instead of crashing later in the parser.
9
+ class IncompleteDownloadError < StandardError; end
10
+
6
11
  def self.gen_prodno(iksnr, seqnr)
7
12
  sprintf("%05d", iksnr) + sprintf("%02d", seqnr)
8
13
  end
9
14
 
15
+ # True only for a *complete* ZIP/xlsx: a valid local-file header (PK) plus the
16
+ # End Of Central Directory record (PK\x05\x06) near the end. A truncated
17
+ # download keeps the header (so `file` still says "Microsoft Excel 2007+") but
18
+ # loses the EOCD, which makes rubyzip fail with the cryptic "end of central
19
+ # directory signature not found". See issue #121.
20
+ def self.valid_zip?(file)
21
+ return false unless file && File.exist?(file)
22
+ size = File.size(file)
23
+ return false unless size > 100
24
+ return false unless File.binread(file, 2) == "PK"
25
+ offset = [size - 66_000, 0].max
26
+ File.binread(file, size - offset, offset).include?("PK\x05\x06".b)
27
+ end
28
+
10
29
  def self.uri_open(url, max_retries: 3)
11
30
  retries = 0
12
31
  begin
@@ -1,3 +1,3 @@
1
1
  module Oddb2xml
2
- VERSION = "3.0.19"
2
+ VERSION = "3.0.20"
3
3
  end