oddb2xml 3.0.19 → 3.0.20
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/History.txt +3 -0
- data/lib/oddb2xml/downloader.rb +15 -3
- data/lib/oddb2xml/extractor.rb +8 -0
- data/lib/oddb2xml/util.rb +19 -0
- data/lib/oddb2xml/version.rb +1 -1
- data/spec/fixtures/vcr_cassettes/oddb2xml.json +2092 -2092
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: fe41f3e5cfb0e100f111a3071f32ae8dd4b4103d3f5726bf4eab061f80389323
|
|
4
|
+
data.tar.gz: 785dee3549e33031069aceac4f9be31cc844de603d3b183d94b55f976b0c482c
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: b385af5fc0643374e878da0cd6e1b262e4d7521c5ac5dd3a3670971f8f130ca3492b4aa296410c5e9e6627021bcbb86b508dbd3e80bf1fdd044c61851df69664
|
|
7
|
+
data.tar.gz: 40a8baa557cdb31905f8711702bbe52f8d968b67488ae88f293233ea3bd1db689c9ea75d27b4b17019cf2369fb593eed3268c3867277daf5cb73c763b6a74791
|
data/History.txt
CHANGED
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
=== 3.0.20 / 09.06.2026
|
|
2
|
+
* Bugfix/robustness: handle truncated or failed Swissmedic .xlsx downloads instead of crashing later in the parser (issue #121). A download through a scanning/allow-list proxy can return an incomplete .xlsx -- a valid ZIP/Excel header (so `file` still reports "Microsoft Excel 2007+") but a missing end-of-central-directory record -- which made RubyXL die deep in rubyzip with the cryptic "Zip end of central directory signature not found". New Oddb2xml.valid_zip? verifies an archive is complete (PK header + EOCD signature in the tail). SwissmedicDownloader now validates the downloaded file and automatically re-fetches it (up to the normal retry count) when it is empty or truncated, and no longer reuses a cached broken file under --skip-download. As a last resort SwissmedicExtractor raises a clear, actionable error (pointing at connectivity / "oddb2xml --proxy-check") instead of the rubyzip backtrace.
|
|
3
|
+
|
|
1
4
|
=== 3.0.19 / 09.06.2026
|
|
2
5
|
* New option --proxy-check: probe connectivity/proxy reachability for every host oddb2xml could need, print a full OK/BLOCKED/UNREACHABLE report (honouring http(s)_proxy) and exit without downloading or building. Exits 0 if all hosts are reachable, 1 otherwise — handy for cron/deploy preflight on allow-list proxies (e.g. "oddb2xml --proxy-check"). Reuses the 3.0.18 proxy checker; checks run concurrently.
|
|
3
6
|
|
data/lib/oddb2xml/downloader.rb
CHANGED
|
@@ -279,13 +279,21 @@ module Oddb2xml
|
|
|
279
279
|
def download
|
|
280
280
|
@file2save = File.join(DOWNLOADS, "swissmedic_#{@type}.xlsx")
|
|
281
281
|
report_download(@url, @file2save)
|
|
282
|
-
if @options[:calc] && @options[:skip_download] && File.exist?(@file2save) && ((Time.now - File.ctime(@file2save)).to_i < 24 * 60 * 60)
|
|
282
|
+
if @options[:calc] && @options[:skip_download] && File.exist?(@file2save) && ((Time.now - File.ctime(@file2save)).to_i < 24 * 60 * 60) && Oddb2xml.valid_zip?(@file2save)
|
|
283
283
|
Oddb2xml.log "SwissmedicDownloader #{__LINE__}: Skip downloading #{@file2save} #{File.size(@file2save)} bytes"
|
|
284
284
|
return File.expand_path(@file2save)
|
|
285
285
|
end
|
|
286
286
|
begin
|
|
287
287
|
@url = @direct_url_link
|
|
288
288
|
download_as(@file2save, "w+")
|
|
289
|
+
# The Swissmedic file is an .xlsx (a ZIP). Downloads through scanning
|
|
290
|
+
# proxies are sometimes truncated (valid header, missing EOCD), which
|
|
291
|
+
# would later crash RubyXL with a cryptic rubyzip error. Verify the
|
|
292
|
+
# archive is complete and just fetch it again if not. (issue #121)
|
|
293
|
+
unless Oddb2xml.valid_zip?(@file2save)
|
|
294
|
+
raise Oddb2xml::IncompleteDownloadError,
|
|
295
|
+
"Swissmedic #{@type} xlsx is empty or truncated (#{File.size(@file2save)} bytes)"
|
|
296
|
+
end
|
|
289
297
|
if @options[:artikelstamm]
|
|
290
298
|
# ssconvert is in the package gnumeric (Debian)
|
|
291
299
|
cmd = "ssconvert '#{@file2save}' '#{File.join(DOWNLOADS, File.basename(@file2save).sub(/\.xls.*/, ".csv"))}' 2> /dev/null"
|
|
@@ -293,8 +301,12 @@ module Oddb2xml
|
|
|
293
301
|
system(cmd)
|
|
294
302
|
end
|
|
295
303
|
return File.expand_path(@file2save)
|
|
296
|
-
rescue Timeout::Error, Errno::ETIMEDOUT
|
|
297
|
-
retrievable?
|
|
304
|
+
rescue Timeout::Error, Errno::ETIMEDOUT, Oddb2xml::IncompleteDownloadError => error
|
|
305
|
+
if retrievable?
|
|
306
|
+
Oddb2xml.log("Retrying Swissmedic #{@type} download: #{error.message}")
|
|
307
|
+
retry
|
|
308
|
+
end
|
|
309
|
+
raise
|
|
298
310
|
ensure
|
|
299
311
|
Oddb2xml.download_finished(@file2save, false)
|
|
300
312
|
end
|
data/lib/oddb2xml/extractor.rb
CHANGED
|
@@ -260,6 +260,14 @@ module Oddb2xml
|
|
|
260
260
|
@type = type
|
|
261
261
|
Oddb2xml.log("SwissmedicExtractor #{@filename} #{File.size(@filename)} bytes")
|
|
262
262
|
return unless File.exist?(@filename)
|
|
263
|
+
unless Oddb2xml.valid_zip?(@filename)
|
|
264
|
+
raise "SwissmedicExtractor: '#{@filename}' is not a usable .xlsx " \
|
|
265
|
+
"(#{File.size(@filename)} bytes). The Swissmedic '#{@type}' download failed or was " \
|
|
266
|
+
"truncated -- the file is empty, an HTML error/proxy page, or an incomplete ZIP " \
|
|
267
|
+
"(valid header but missing end-of-central-directory). Check connectivity to " \
|
|
268
|
+
"www.swissmedic.ch (run 'oddb2xml --proxy-check'); a too-small file usually means " \
|
|
269
|
+
"the proxy cut off a large download."
|
|
270
|
+
end
|
|
263
271
|
@sheet = RubyXL::Parser.parse(File.expand_path(@filename)).worksheets[0]
|
|
264
272
|
end
|
|
265
273
|
|
data/lib/oddb2xml/util.rb
CHANGED
|
@@ -3,10 +3,29 @@ require "htmlentities"
|
|
|
3
3
|
|
|
4
4
|
module Oddb2xml
|
|
5
5
|
FAKE_GTIN_START = "999999"
|
|
6
|
+
|
|
7
|
+
# Raised when a downloaded archive (zip/xlsx) is empty or truncated, so the
|
|
8
|
+
# caller can retry the download instead of crashing later in the parser.
|
|
9
|
+
class IncompleteDownloadError < StandardError; end
|
|
10
|
+
|
|
6
11
|
def self.gen_prodno(iksnr, seqnr)
|
|
7
12
|
sprintf("%05d", iksnr) + sprintf("%02d", seqnr)
|
|
8
13
|
end
|
|
9
14
|
|
|
15
|
+
# True only for a *complete* ZIP/xlsx: a valid local-file header (PK) plus the
|
|
16
|
+
# End Of Central Directory record (PK\x05\x06) near the end. A truncated
|
|
17
|
+
# download keeps the header (so `file` still says "Microsoft Excel 2007+") but
|
|
18
|
+
# loses the EOCD, which makes rubyzip fail with the cryptic "end of central
|
|
19
|
+
# directory signature not found". See issue #121.
|
|
20
|
+
def self.valid_zip?(file)
|
|
21
|
+
return false unless file && File.exist?(file)
|
|
22
|
+
size = File.size(file)
|
|
23
|
+
return false unless size > 100
|
|
24
|
+
return false unless File.binread(file, 2) == "PK"
|
|
25
|
+
offset = [size - 66_000, 0].max
|
|
26
|
+
File.binread(file, size - offset, offset).include?("PK\x05\x06".b)
|
|
27
|
+
end
|
|
28
|
+
|
|
10
29
|
def self.uri_open(url, max_retries: 3)
|
|
11
30
|
retries = 0
|
|
12
31
|
begin
|
data/lib/oddb2xml/version.rb
CHANGED