oddb2xml 3.0.23 → 3.0.25
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.ruby-version +1 -1
- data/Gemfile.lock +1 -1
- data/History.txt +6 -0
- data/lib/oddb2xml/downloader.rb +7 -1
- data/lib/oddb2xml/extractor.rb +4 -1
- data/lib/oddb2xml/version.rb +1 -1
- data/scripts/run_oddb2xml.sh +99 -0
- data/scripts/transfer.sh +45 -0
- data/spec/fixtures/vcr_cassettes/oddb2xml.json +2092 -2092
- data/spec/proxy_check_spec.rb +74 -0
- metadata +5 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 1c80413abe4bd24c2c3555aa6eb4407c7464e381275a118c7929c686fe73f494
|
|
4
|
+
data.tar.gz: 7bf43ba8cf901d4d2de42d0e21e95bf2e25e4f1a88360b5cc4451427c52adccb
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: b20195b2d9b1d6ddd0e51b7e8062f71e2bc8c2c7b2d74c817b058c1c56f18ef203b119a39cf80e55310bd9c6d06204f15bb3c0e7d2d988837967fa1381a10735
|
|
7
|
+
data.tar.gz: 26b6d78eddd658b8dbed3949b0df277639636a66b8ed86a3a54f9478a19dfbdc81874b88afdafc3221114a41f59f7184c271206f14b6c1f5135f93b895ef3fa3
|
data/.ruby-version
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
3.
|
|
1
|
+
3.3.6
|
data/Gemfile.lock
CHANGED
data/History.txt
CHANGED
|
@@ -1,3 +1,9 @@
|
|
|
1
|
+
=== 3.0.25 / 17.06.2026
|
|
2
|
+
* Bugfix (-b/--firstbase): the GS1 Switzerland firstbase CSV (id.gs1.ch) is served with a UTF-8 BOM. FirstbaseExtractor read it with encoding "UTF-8", so the BOM glued onto the first header ("Gtin") and row["Gtin"] returned nil for every row — every line was skipped as having an empty GTIN. The result: @firstbase came out empty and all NONPHARMA articles from the firstbase feed were silently missing, so oddb_article.xml shipped only the ~17k Refdata/Swissmedic base instead of the ~190k GS1 set. Now read with encoding "bom|utf-8", which strips the BOM (live file: all 192'807 rows parse).
|
|
3
|
+
|
|
4
|
+
=== 3.0.24 / 16.06.2026
|
|
5
|
+
* Bugfix (--skip-download): cached files fetched with a write mode were silently emptied on every --skip-download run. DownloadMethod#download_as restored the file from the ./downloads cache and then re-opened it with the caller's mode (e.g. "w+"), which truncated it to zero bytes before the read — so the returned data was empty. This blanked epha_interactions.csv (oddb_interaction.xml came out with NBR_RECORD=0) and any other source pulled with a "w+"-style mode (LPPV, Weleda/WALA SL, BAG SL group prices) whenever its file was present in the cache. It surfaced in deploy scripts that download once and rebuild several price increments from the shared cache (e.g. -b -I 45/50/55): the first build was correct, every --skip-download rebuild lost the interactions. The skip branch now opens the restored file read-only, preserving any encoding suffix ("w+:iso-8859-1:utf-8" -> "r:iso-8859-1:utf-8").
|
|
6
|
+
|
|
1
7
|
=== 3.0.23 / 12.06.2026
|
|
2
8
|
* Bugfix (--proxy-check): the connectivity check now follows HTTP redirects to other hosts and reports the real "forwarder" target an allow-list proxy must permit, instead of stopping at the first hop. GS1 Switzerland turned id.gs1.ch into a 301 redirect to the global resolver id.gs1.org, which 307-redirects again to apitools.gs1.ch — so allowing only id.gs1.ch is no longer enough, and the firstbase download dies on the blocked target. Previously any 3xx answer was reported as "OK", so the check was falsely green; it now shows e.g. "[BLOCKED] id.gs1.ch -> apitools.gs1.ch" plus a "must be on the proxy allow-list too" note for every cross-host redirect. id.gs1.org is also probed explicitly (added to --firstbase's host set and to the full --proxy-check report).
|
|
3
9
|
* Improvement (--proxy-check): each host is now probed with the actual resource path the downloader fetches (e.g. raw.githubusercontent.com/zdavatz/…, www.spezialitaetenliste.ch/File.axd, files.refdata.ch/…/Refdata.Articles.zip) rather than "/". Probing "/" produced misleading host redirects (raw.githubusercontent.com/ -> github.com) for hosts whose real download path returns 200 directly; the genuine paths also reveal real forwarders such as www.spezialitaetenliste.ch -> sl.bag.admin.ch.
|
data/lib/oddb2xml/downloader.rb
CHANGED
|
@@ -18,7 +18,13 @@ module Oddb2xml
|
|
|
18
18
|
data = nil
|
|
19
19
|
FileUtils.makedirs(File.dirname(file), verbose: true)
|
|
20
20
|
if Oddb2xml.skip_download(file)
|
|
21
|
-
|
|
21
|
+
# The file has just been restored from the download cache. Open it
|
|
22
|
+
# read-only: a write mode like "w+" would truncate the cached file to
|
|
23
|
+
# zero bytes before the read, silently emptying it (e.g. it blanked
|
|
24
|
+
# epha_interactions.csv on every --skip-download run). Preserve any
|
|
25
|
+
# encoding suffix (e.g. "w+:iso-8859-1:utf-8" -> "r:iso-8859-1:utf-8").
|
|
26
|
+
read_option = option.sub(/\A[wa]\+?/, "r")
|
|
27
|
+
io = File.open(file, read_option)
|
|
22
28
|
data = io.read
|
|
23
29
|
else
|
|
24
30
|
begin
|
data/lib/oddb2xml/extractor.rb
CHANGED
|
@@ -622,7 +622,10 @@ module Oddb2xml
|
|
|
622
622
|
def to_hash
|
|
623
623
|
data = {}
|
|
624
624
|
return data unless @file && File.exist?(@file)
|
|
625
|
-
CSV
|
|
625
|
+
# The GS1 firstbase CSV is served with a UTF-8 BOM. Without "bom|" the BOM
|
|
626
|
+
# glues onto the first header ("Gtin"), so row["Gtin"] is nil and every
|
|
627
|
+
# row is skipped — dropping all -b/firstbase NONPHARMA articles.
|
|
628
|
+
CSV.foreach(@file, headers: true, encoding: "bom|utf-8") do |row|
|
|
626
629
|
gtin = row["Gtin"].to_s.gsub(/^0+/, "")
|
|
627
630
|
next if gtin.empty?
|
|
628
631
|
data[gtin] = {
|
data/lib/oddb2xml/version.rb
CHANGED
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
#
|
|
3
|
+
# run_oddb2xml — build the firstbase (-b) oddb2xml feed at several price
|
|
4
|
+
# increments and stage the results for transfer.
|
|
5
|
+
#
|
|
6
|
+
# The upstream sources are downloaded ONCE: the first build fetches them, and
|
|
7
|
+
# every subsequent increment re-uses the cached ./downloads via --skip-download.
|
|
8
|
+
# All builds therefore run in a single shared working directory ($BUILD_DIR) —
|
|
9
|
+
# the original deploy script cd'd into a separate dir per increment, where
|
|
10
|
+
# --skip-download could not find downloads/ (DOWNLOADS is cwd-relative) and
|
|
11
|
+
# silently re-downloaded everything each time.
|
|
12
|
+
#
|
|
13
|
+
# Output layout (under $OUT_DIR, default /home/zdavatz/oddb2xml):
|
|
14
|
+
# <OUT_DIR>/45/ oddb_*.xml built with +45 %
|
|
15
|
+
# <OUT_DIR>/50/ oddb_*.xml built with +50 %
|
|
16
|
+
# <OUT_DIR>/55/ oddb_*.xml built with +55 %
|
|
17
|
+
# <OUT_DIR>/default/ oddb_*.xml built with no increment
|
|
18
|
+
# Each destination dir also keeps the source archive as oddb2xml.zip.
|
|
19
|
+
# The working dir ($BUILD_DIR, default <OUT_DIR>-build) holds the shared
|
|
20
|
+
# downloads/ cache and the transient zip; it lives OUTSIDE $OUT_DIR so the
|
|
21
|
+
# transfer's `scp -r $OUT_DIR/*` never uploads the multi-hundred-MB cache.
|
|
22
|
+
#
|
|
23
|
+
# Configurable via environment:
|
|
24
|
+
# OUT_DIR destination root (default /home/zdavatz/oddb2xml)
|
|
25
|
+
# BUILD_DIR working dir (default <OUT_DIR>-build)
|
|
26
|
+
# INCREMENTS space-separated percents (default "45 50 55")
|
|
27
|
+
# ODDB2XML_BIN oddb2xml executable (default oddb2xml)
|
|
28
|
+
# SKIP_GEM_INSTALL set to 1 to skip `gem install oddb2xml`
|
|
29
|
+
# RUN_TRANSFER set to 1 to run the transfer (scripts/transfer.sh) at the end
|
|
30
|
+
# TRANSFER_CMD transfer command (default: sudo, preserving
|
|
31
|
+
# ODDB2XML_TRANSFER_DIR, scripts/transfer.sh next to this file)
|
|
32
|
+
#
|
|
33
|
+
set -euo pipefail
|
|
34
|
+
|
|
35
|
+
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
36
|
+
OUT_DIR="${OUT_DIR:-/home/zdavatz/oddb2xml}"
|
|
37
|
+
BUILD_DIR="${BUILD_DIR:-${OUT_DIR%/}-build}"
|
|
38
|
+
INCREMENTS="${INCREMENTS:-45 50 55}"
|
|
39
|
+
ODDB2XML_BIN="${ODDB2XML_BIN:-oddb2xml}"
|
|
40
|
+
TRANSFER_CMD="${TRANSFER_CMD:-$SCRIPT_DIR/transfer.sh}"
|
|
41
|
+
|
|
42
|
+
log() { printf '%s %s\n' "$(date '+%Y-%m-%d %H:%M:%S')" "$*"; }
|
|
43
|
+
|
|
44
|
+
# 1. Install / update the published gem unless told otherwise.
|
|
45
|
+
if [[ "${SKIP_GEM_INSTALL:-0}" != "1" ]]; then
|
|
46
|
+
log "Installing oddb2xml gem"
|
|
47
|
+
gem install oddb2xml
|
|
48
|
+
fi
|
|
49
|
+
|
|
50
|
+
# 2. Fresh working dir (keeps a shared downloads/ cache across increments).
|
|
51
|
+
log "Preparing build dir $BUILD_DIR"
|
|
52
|
+
rm -rf "$BUILD_DIR"
|
|
53
|
+
mkdir -p "$BUILD_DIR"
|
|
54
|
+
cd "$BUILD_DIR"
|
|
55
|
+
|
|
56
|
+
first=1
|
|
57
|
+
|
|
58
|
+
# build_one <increment-percent|""> <destination-subdir>
|
|
59
|
+
build_one() {
|
|
60
|
+
local inc="$1" name="$2" dest="$OUT_DIR/$2"
|
|
61
|
+
local inc_opt=() dl_opt=()
|
|
62
|
+
[[ -n "$inc" ]] && inc_opt=(-I "$inc")
|
|
63
|
+
|
|
64
|
+
if [[ $first -eq 1 ]]; then
|
|
65
|
+
first=0 # first build downloads the sources
|
|
66
|
+
else
|
|
67
|
+
dl_opt=(--skip-download) # the rest re-use the cached downloads/
|
|
68
|
+
fi
|
|
69
|
+
|
|
70
|
+
log "Building increment '${inc:-none}' -> $dest"
|
|
71
|
+
rm -f oddb*.zip
|
|
72
|
+
"$ODDB2XML_BIN" "${dl_opt[@]}" -b "${inc_opt[@]}" -c zip
|
|
73
|
+
|
|
74
|
+
shopt -s nullglob
|
|
75
|
+
local zips=(oddb*.zip)
|
|
76
|
+
shopt -u nullglob
|
|
77
|
+
[[ ${#zips[@]} -ge 1 ]] || { log "ERROR: no zip produced for increment '${inc:-none}'"; exit 1; }
|
|
78
|
+
local zip="${zips[0]}"
|
|
79
|
+
|
|
80
|
+
rm -rf "$dest"
|
|
81
|
+
mkdir -p "$dest"
|
|
82
|
+
unzip -o -q -d "$dest" "$zip"
|
|
83
|
+
mv "$zip" "$dest/oddb2xml.zip"
|
|
84
|
+
log "Staged $dest"
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
for inc in $INCREMENTS; do
|
|
88
|
+
build_one "$inc" "$inc"
|
|
89
|
+
done
|
|
90
|
+
build_one "" "default" # final run with no increment
|
|
91
|
+
|
|
92
|
+
# 3. Optional hand-off to the transfer step (scripts/transfer.sh).
|
|
93
|
+
if [[ "${RUN_TRANSFER:-0}" == "1" ]]; then
|
|
94
|
+
log "Running transfer: $TRANSFER_CMD"
|
|
95
|
+
export ODDB2XML_TRANSFER_DIR="$OUT_DIR" # keep transfer.sh in sync with OUT_DIR
|
|
96
|
+
$TRANSFER_CMD
|
|
97
|
+
fi
|
|
98
|
+
|
|
99
|
+
log "Done. Output under $OUT_DIR"
|
data/scripts/transfer.sh
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
#!/bin/bash
|
|
2
|
+
#
|
|
3
|
+
# transfer.sh — push the generated oddb2xml feeds (plus the aips2sqlite
|
|
4
|
+
# Fachinformation XML and the swissmedic-sequences CSV) to the HIN download
|
|
5
|
+
# server via scp. Runs on the ywesee host; everything is user-owned, so no sudo.
|
|
6
|
+
#
|
|
7
|
+
# Paths default to this host's layout and can be overridden via environment:
|
|
8
|
+
# ODDB2XML_TRANSFER_DIR dir whose contents go to .../download/oddb2xml/
|
|
9
|
+
# (default /home/zdavatz/oddb2xml)
|
|
10
|
+
# AIPS2SQLITE_DIR aips2sqlite output dir
|
|
11
|
+
# (default /home/zdavatz/software/aips2sqlite/jars/output)
|
|
12
|
+
# SSH_KEY scp identity file (default ~/.ssh/id_ed25519)
|
|
13
|
+
# SCP_DEST scp destination base, e.g. user@host:/path/download
|
|
14
|
+
# (REQUIRED — no default yet; set the new download server)
|
|
15
|
+
|
|
16
|
+
ODDB2XML_TRANSFER_DIR="${ODDB2XML_TRANSFER_DIR:-/home/zdavatz/oddb2xml}"
|
|
17
|
+
AIPS2SQLITE_DIR="${AIPS2SQLITE_DIR:-/home/zdavatz/software/aips2sqlite/jars/output}"
|
|
18
|
+
SSH_KEY="${SSH_KEY:-$HOME/.ssh/id_ed25519}"
|
|
19
|
+
# TODO: set the new download-server destination.
|
|
20
|
+
SCP_DEST="${SCP_DEST:?set SCP_DEST to the scp target, e.g. user@host:/var/www/.../download}"
|
|
21
|
+
|
|
22
|
+
###
|
|
23
|
+
### ODDB2XML
|
|
24
|
+
###
|
|
25
|
+
|
|
26
|
+
find "$ODDB2XML_TRANSFER_DIR/" -type d -exec chmod 755 {} \;
|
|
27
|
+
find "$ODDB2XML_TRANSFER_DIR/" -type f -exec chmod 644 {} \;
|
|
28
|
+
|
|
29
|
+
scp -r -i "$SSH_KEY" "$ODDB2XML_TRANSFER_DIR"/* "$SCP_DEST/oddb2xml/"
|
|
30
|
+
|
|
31
|
+
###
|
|
32
|
+
### aips2sqlite
|
|
33
|
+
###
|
|
34
|
+
|
|
35
|
+
if [ -d "$AIPS2SQLITE_DIR/fis" ]; then
|
|
36
|
+
find "$AIPS2SQLITE_DIR/fis" -name '*.xml' -type f -exec chmod 644 {} \;
|
|
37
|
+
scp -r -i "$SSH_KEY" "$AIPS2SQLITE_DIR"/fis/*.xml "$SCP_DEST/mediupdate-xml/"
|
|
38
|
+
fi
|
|
39
|
+
|
|
40
|
+
if [ -f "$AIPS2SQLITE_DIR/oddb2xml_swissmedic_sequences.csv" ]; then
|
|
41
|
+
chmod 644 "$AIPS2SQLITE_DIR/oddb2xml_swissmedic_sequences.csv"
|
|
42
|
+
scp -r -i "$SSH_KEY" "$AIPS2SQLITE_DIR/oddb2xml_swissmedic_sequences.csv" "$SCP_DEST/oddb2xml/"
|
|
43
|
+
fi
|
|
44
|
+
|
|
45
|
+
exit 0
|