oddb2xml 3.0.22 → 3.0.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/History.txt +4 -0
- data/lib/oddb2xml/proxy_check.rb +73 -17
- data/lib/oddb2xml/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: b99346243a08ae356f8e123953b8aa8c0475918e79e184d9de028a74b6a572a3
|
|
4
|
+
data.tar.gz: d72e4c0d0f9e66576d6488425d91113f0842035fc7e94ccfa6a1fe392dbb3dee
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 721a6e109a22dbd57951ccf250b675ad0da0318157dd1ccd3a419a153a926690a5590552856d8e59ab9e2683ff6677a877543109a1cc71d80bb5e4a91c25647b
|
|
7
|
+
data.tar.gz: 46d8484d674fa5f7e7b81bc9f5de81490eceeb0657d5b23f50d2cb97028901b24a9030c1d790a252f0a342ec7931c8f5251bcc1e1bc015d76821a38d2911d47f
|
data/Gemfile.lock
CHANGED
data/History.txt
CHANGED
|
@@ -1,3 +1,7 @@
|
|
|
1
|
+
=== 3.0.23 / 12.06.2026
|
|
2
|
+
* Bugfix (--proxy-check): the connectivity check now follows HTTP redirects to other hosts and reports the real "forwarder" target an allow-list proxy must permit, instead of stopping at the first hop. GS1 Switzerland turned id.gs1.ch into a 301 redirect to the global resolver id.gs1.org, which 307-redirects again to apitools.gs1.ch — so allowing only id.gs1.ch is no longer enough, and the firstbase download dies on the blocked target. Previously any 3xx answer was reported as "OK", so the check was falsely green; it now shows e.g. "[BLOCKED] id.gs1.ch -> apitools.gs1.ch" plus a "must be on the proxy allow-list too" note for every cross-host redirect. id.gs1.org is also probed explicitly (added to --firstbase's host set and to the full --proxy-check report).
|
|
3
|
+
* Improvement (--proxy-check): each host is now probed with the actual resource path the downloader fetches (e.g. raw.githubusercontent.com/zdavatz/…, www.spezialitaetenliste.ch/File.axd, files.refdata.ch/…/Refdata.Articles.zip) rather than "/". Probing "/" produced misleading host redirects (raw.githubusercontent.com/ -> github.com) for hosts whose real download path returns 200 directly; the genuine paths also reveal real forwarders such as www.spezialitaetenliste.ch -> sl.bag.admin.ch.
|
|
4
|
+
|
|
1
5
|
=== 3.0.22 / 11.06.2026
|
|
2
6
|
* New (Artikelstamm / -e / -b): extend the 3.0.21 "Kapitel 70" SL recovery to WALA products (GTIN prefix 7640187…), in addition to Weleda. A third runtime CSV in github.com/zdavatz/oddb2xml_files, wala_arzneimittel.csv (bundled fallback under data/), is mapped exactly like the Weleda file: for any GTIN absent from the FHIR NDJSON, oddb2xml fills <SL_ENTRY>true</SL_ENTRY> and the BAG SL public price (<PPUB> in Artikelstamm, the standard <ARTPRI><PTYP>PPUB</PTYP> entry in oddb_article.xml for -e/-b). The WALA layout differs from Weleda: it is ";"-separated with a BOM, has no "/ SL" column (a row is SL when it carries a CSL-Code = Kapitel-70.01 group code), and the public package price is given inline in the "CSL 70.01." column -- already multiplied for the pack size (the multiplier appears only in the galenic-form text, e.g. "Solutio ad inj. 10 x 1 ml"), so it is taken verbatim instead of being re-joined against bag_sl_group_prices.csv (which would yield the 1/10 per-unit price). The FHIR/ZurRose price still always wins; this only fills a gap. Live file: 320 WALA SL products with prices.
|
|
3
7
|
|
data/lib/oddb2xml/proxy_check.rb
CHANGED
|
@@ -25,6 +25,25 @@ module Oddb2xml
|
|
|
25
25
|
|
|
26
26
|
TIMEOUT = 6 # seconds, per host (open + read); checks run concurrently
|
|
27
27
|
|
|
28
|
+
# Representative resource path per host -- the actual file the downloader
|
|
29
|
+
# fetches, NOT "/". Probing "/" gives misleading host redirects (e.g.
|
|
30
|
+
# raw.githubusercontent.com/ -> github.com, while the real raw file path
|
|
31
|
+
# returns 200), whereas the genuine download paths reveal the real
|
|
32
|
+
# forwarder chain the proxy must allow (id.gs1.ch -> id.gs1.org ->
|
|
33
|
+
# apitools.gs1.ch; www.spezialitaetenliste.ch/File.axd -> sl.bag.admin.ch).
|
|
34
|
+
PROBE_PATHS = {
|
|
35
|
+
"files.refdata.ch" => "/simis-public-prod/Articles/1.0/Refdata.Articles.zip",
|
|
36
|
+
"raw.githubusercontent.com" => "/zdavatz/oddb2xml_files/master/LPPV.txt",
|
|
37
|
+
"id.gs1.ch" => "/01/07612345000961",
|
|
38
|
+
"id.gs1.org" => "/01/07612345000961",
|
|
39
|
+
"www.spezialitaetenliste.ch" => "/File.axd?file=XMLPublications.zip",
|
|
40
|
+
"www.medregbm.admin.ch" => "/Publikation/"
|
|
41
|
+
}.freeze
|
|
42
|
+
|
|
43
|
+
def probe_path(host)
|
|
44
|
+
PROBE_PATHS[host] || "/"
|
|
45
|
+
end
|
|
46
|
+
|
|
28
47
|
def proxy_uri
|
|
29
48
|
env = ENV["https_proxy"] || ENV["HTTPS_PROXY"] || ENV["http_proxy"] || ENV["HTTP_PROXY"]
|
|
30
49
|
return nil if env.nil? || env.empty?
|
|
@@ -34,10 +53,25 @@ module Oddb2xml
|
|
|
34
53
|
nil
|
|
35
54
|
end
|
|
36
55
|
|
|
56
|
+
# Redirect targets ("forwarders") that an allow-list proxy must permit in
|
|
57
|
+
# addition to the host we actually request. id.gs1.ch 301-redirects every
|
|
58
|
+
# path to the global resolver id.gs1.org, so allowing only id.gs1.ch is not
|
|
59
|
+
# enough -- the firstbase download follows the redirect and dies on the
|
|
60
|
+
# blocked target. The real firstbase chain is id.gs1.ch -> id.gs1.org ->
|
|
61
|
+
# apitools.gs1.ch, so the redirect is followed dynamically too (see
|
|
62
|
+
# check_host); this list just guarantees the known target is probed even
|
|
63
|
+
# when the redirect probe is short-circuited.
|
|
64
|
+
FORWARDERS = {
|
|
65
|
+
"id.gs1.org" => "GS1 global resolver (id.gs1.ch redirect target, --firstbase / -b)"
|
|
66
|
+
}.freeze
|
|
67
|
+
|
|
37
68
|
def hosts_for(options = {})
|
|
38
69
|
hosts = BASE_HOSTS.dup
|
|
39
70
|
hosts["epl.bag.admin.ch"] = "BAG FHIR data (--fhir)" if options[:fhir]
|
|
40
|
-
|
|
71
|
+
if options[:firstbase]
|
|
72
|
+
hosts["id.gs1.ch"] = "GS1 NONPHARMA (--firstbase / -b)"
|
|
73
|
+
hosts["id.gs1.org"] = FORWARDERS["id.gs1.org"]
|
|
74
|
+
end
|
|
41
75
|
hosts["www.spezialitaetenliste.ch"] = "BAG Spezialitätenliste" unless options[:fhir]
|
|
42
76
|
hosts["www.medregbm.admin.ch"] = "Medizinalberuferegister (-x address)" if options[:address]
|
|
43
77
|
hosts
|
|
@@ -51,7 +85,7 @@ module Oddb2xml
|
|
|
51
85
|
"id.gs1.ch" => "GS1 NONPHARMA (--firstbase / -b)",
|
|
52
86
|
"www.spezialitaetenliste.ch" => "BAG Spezialitätenliste",
|
|
53
87
|
"www.medregbm.admin.ch" => "Medizinalberuferegister (-x address)"
|
|
54
|
-
)
|
|
88
|
+
).merge(FORWARDERS)
|
|
55
89
|
end
|
|
56
90
|
|
|
57
91
|
# Probe every host and print a full OK/BLOCKED/UNREACHABLE table.
|
|
@@ -59,32 +93,40 @@ module Oddb2xml
|
|
|
59
93
|
def report(_options = {})
|
|
60
94
|
proxy = proxy_uri
|
|
61
95
|
results = all_hosts.map do |host, desc|
|
|
62
|
-
Thread.new { [host, desc, check_host(host, proxy)] }
|
|
96
|
+
Thread.new { [host, desc, check_host(host, proxy, probe_path(host))] }
|
|
63
97
|
end.map(&:value).sort_by { |(host, _desc, _status)| host }
|
|
64
98
|
|
|
65
99
|
header = "oddb2xml connectivity check"
|
|
66
100
|
header += proxy ? " (via proxy #{proxy.host}:#{proxy.port})" : " (no proxy configured)"
|
|
67
101
|
puts header
|
|
68
102
|
results.each do |(host, desc, status)|
|
|
69
|
-
tag = case status
|
|
103
|
+
tag = case status[:result]
|
|
70
104
|
when :ok then "OK "
|
|
71
105
|
when :blocked then "BLOCKED" # proxy returned 407
|
|
72
106
|
else "UNREACH"
|
|
73
107
|
end
|
|
74
|
-
|
|
108
|
+
label = status[:via] ? "#{host} -> #{status[:via]}" : host
|
|
109
|
+
puts format(" [%s] %-36s %s", tag, label, desc)
|
|
75
110
|
end
|
|
76
|
-
unreachable = results.reject { |(_host, _desc, status)| status == :ok }
|
|
111
|
+
unreachable = results.reject { |(_host, _desc, status)| status[:result] == :ok }
|
|
77
112
|
if unreachable.empty?
|
|
78
113
|
puts "All #{results.size} hosts reachable."
|
|
79
114
|
true
|
|
80
115
|
else
|
|
81
116
|
puts "#{unreachable.size} of #{results.size} host(s) NOT reachable -- downloads using them will fail."
|
|
117
|
+
results.select { |(_host, _desc, status)| status[:via] }.each do |(host, _desc, status)|
|
|
118
|
+
puts " note: #{host} redirects to #{status[:via]} -- that host must be on the proxy allow-list too."
|
|
119
|
+
end
|
|
82
120
|
false
|
|
83
121
|
end
|
|
84
122
|
end
|
|
85
123
|
|
|
86
|
-
#
|
|
87
|
-
|
|
124
|
+
# Probe a host (following HTTP redirects to other hosts) and return a Hash:
|
|
125
|
+
# { result: :ok | :blocked | :unreachable, via: "final.host" | nil }
|
|
126
|
+
# `:via` is set only when the host redirected to a *different* host, so the
|
|
127
|
+
# caller can surface that the redirect target (e.g. id.gs1.ch -> id.gs1.org)
|
|
128
|
+
# must be reachable too -- a 301 to a blocked host used to be reported as OK.
|
|
129
|
+
def check_host(host, proxy, path = "/", hops = 4, origin = nil)
|
|
88
130
|
http =
|
|
89
131
|
if proxy
|
|
90
132
|
Net::HTTP.new(host, 443, proxy.host, proxy.port, proxy.user, proxy.password)
|
|
@@ -96,14 +138,27 @@ module Oddb2xml
|
|
|
96
138
|
http.open_timeout = TIMEOUT
|
|
97
139
|
http.read_timeout = TIMEOUT
|
|
98
140
|
http.start do |h|
|
|
99
|
-
res = h.head(
|
|
100
|
-
return :blocked if res.code.to_s == "407"
|
|
101
|
-
|
|
141
|
+
res = h.head(path)
|
|
142
|
+
return {result: :blocked, via: via_for(origin, host)} if res.code.to_s == "407"
|
|
143
|
+
if res.code.to_s.start_with?("3") && res["location"] && hops > 0
|
|
144
|
+
loc = URI.parse(res["location"])
|
|
145
|
+
if loc.host && loc.host != host
|
|
146
|
+
next_path = (loc.respond_to?(:request_uri) && loc.request_uri) ? loc.request_uri : "/"
|
|
147
|
+
return check_host(loc.host, proxy, next_path, hops - 1, origin || host)
|
|
148
|
+
end
|
|
149
|
+
end
|
|
150
|
+
# any other HTTP answer (200/403/404/...) means this host is reachable
|
|
151
|
+
return {result: :ok, via: via_for(origin, host)}
|
|
102
152
|
end
|
|
103
153
|
rescue => error
|
|
104
154
|
msg = error.message.to_s.downcase
|
|
105
|
-
|
|
106
|
-
:unreachable
|
|
155
|
+
blocked = msg.include?("407") || msg.include?("authenticationrequired") || msg.include?("proxy")
|
|
156
|
+
{result: blocked ? :blocked : :unreachable, via: via_for(origin, host)}
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
# The final host reached, but only when it differs from where we started.
|
|
160
|
+
def via_for(origin, host)
|
|
161
|
+
(origin && origin != host) ? host : nil
|
|
107
162
|
end
|
|
108
163
|
|
|
109
164
|
# Probe all relevant hosts concurrently and warn about any that fail.
|
|
@@ -114,10 +169,10 @@ module Oddb2xml
|
|
|
114
169
|
proxy = proxy_uri
|
|
115
170
|
hosts = hosts_for(options)
|
|
116
171
|
results = hosts.map do |host, desc|
|
|
117
|
-
Thread.new { [host, desc, check_host(host, proxy)] }
|
|
172
|
+
Thread.new { [host, desc, check_host(host, proxy, probe_path(host))] }
|
|
118
173
|
end.map(&:value)
|
|
119
174
|
|
|
120
|
-
problems = results.reject { |(_host, _desc, status)| status == :ok }
|
|
175
|
+
problems = results.reject { |(_host, _desc, status)| status[:result] == :ok }
|
|
121
176
|
return if problems.empty?
|
|
122
177
|
|
|
123
178
|
warn_about(problems, proxy)
|
|
@@ -130,8 +185,9 @@ module Oddb2xml
|
|
|
130
185
|
warn " The following hosts could not be reached -- the corresponding"
|
|
131
186
|
warn " downloads will FAIL or produce incomplete data:"
|
|
132
187
|
problems.each do |(host, desc, status)|
|
|
133
|
-
tag = (status == :blocked) ? "BLOCKED by proxy (407)" : "UNREACHABLE "
|
|
134
|
-
|
|
188
|
+
tag = (status[:result] == :blocked) ? "BLOCKED by proxy (407)" : "UNREACHABLE "
|
|
189
|
+
label = status[:via] ? "#{host} -> #{status[:via]}" : host
|
|
190
|
+
warn format(" [%s] %-34s %s", tag, label, desc)
|
|
135
191
|
end
|
|
136
192
|
if proxy
|
|
137
193
|
warn ""
|
data/lib/oddb2xml/version.rb
CHANGED