oddb2xml 3.0.4 → 3.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.ruby-version +1 -1
- data/CLAUDE.md +3 -1
- data/Gemfile.lock +89 -68
- data/README.md +23 -2
- data/lib/oddb2xml/builder.rb +30 -0
- data/lib/oddb2xml/refdata_cleanup.rb +34 -0
- data/lib/oddb2xml/version.rb +1 -1
- data/lib/oddb2xml.rb +1 -0
- data/spec/refdata_cleanup_spec.rb +151 -0
- metadata +6 -6
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: ad986fc08f7abf1826b696378727e91dcb94cc53d5cdcdc2e0d009b755b92410
|
|
4
|
+
data.tar.gz: 7df4f478dc0cb3cfbd2dfcb796b87ad047aadfdbc37bc9eab80a58b9943078b1
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 9b7a3266179a31992706cd2d3c999cc73b94cacf293617935475d801c1cc00ed5d23ed91daec53cf2c4f4a234d7057dfe82390980465f9b5e7a19bbff6bcca6c
|
|
7
|
+
data.tar.gz: d223b395fa978c6361c3f6c76ab2111558b029f97e9fad09b2c53d379f24f42f46c9b667da531c7505bcf7c121b5bd21cdd588ec64b530e80c718654c605a27f
|
data/.ruby-version
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
3.
|
|
1
|
+
3.4.5
|
data/CLAUDE.md
CHANGED
|
@@ -49,6 +49,8 @@ The system follows a **download → extract → build → compress** pipeline:
|
|
|
49
49
|
|
|
50
50
|
7. **FHIR support** (`lib/oddb2xml/fhir_support.rb`) — Self-contained module providing `FhirDownloader` and FHIR NDJSON parsing. Activated via `--fhir` (or `--fhir-url=<URL>`). Downloads per-language NDJSON files (`foph-sl-export-latest-{de,fr,it}.ndjson`) from `epl.bag.admin.ch` to populate French and Italian product names/descriptions. Maps legal status codes `756005022007` and `756005022008` to Swissmedic category D.
|
|
51
51
|
|
|
52
|
+
8. **Refdata cleanup** (`lib/oddb2xml/refdata_cleanup.rb`) — Compensates for known data-quality issues in upstream Refdata.Articles.xml before they reach the output. Each fix is guarded by a Swissmedic-side heuristic (e.g. comma in `substance_swissmedic` to distinguish mono products from real combinations). Currently fixes the doubled-dose template bug (`X mg / X mg / Stk`). Called from `Builder#apply_refdata_description_cleanups!` at the start of `prepare_articles`. See GitHub issue #112 for the catalogue.
|
|
53
|
+
|
|
52
54
|
### Key data identifiers
|
|
53
55
|
- **GTIN/EAN13**: Primary article identifier (13-digit barcode)
|
|
54
56
|
- **Pharmacode**: Swiss pharmacy code
|
|
@@ -68,4 +70,4 @@ YAML files in `data/` provide manual overrides and mappings: `article_overrides.
|
|
|
68
70
|
## Ruby Version
|
|
69
71
|
|
|
70
72
|
- Minimum: Ruby >= 2.5.0 (gemspec)
|
|
71
|
-
- Current development: Ruby 3.
|
|
73
|
+
- Current development: Ruby 3.3.6 (`.ruby-version`)
|
data/Gemfile.lock
CHANGED
|
@@ -28,32 +28,37 @@ GEM
|
|
|
28
28
|
specs:
|
|
29
29
|
addressable (2.8.8)
|
|
30
30
|
public_suffix (>= 2.0.2, < 8.0)
|
|
31
|
-
akami (1.3.
|
|
31
|
+
akami (1.3.3)
|
|
32
|
+
base64
|
|
32
33
|
gyoku (>= 0.4.0)
|
|
33
34
|
nokogiri
|
|
34
|
-
ast (2.4.
|
|
35
|
+
ast (2.4.3)
|
|
35
36
|
base64 (0.3.0)
|
|
36
|
-
|
|
37
|
-
|
|
37
|
+
bigdecimal (4.0.1)
|
|
38
|
+
builder (3.3.0)
|
|
39
|
+
byebug (13.0.0)
|
|
40
|
+
reline (>= 0.6.0)
|
|
38
41
|
coderay (1.1.3)
|
|
39
42
|
connection_pool (3.0.2)
|
|
40
|
-
crack (0.
|
|
43
|
+
crack (1.0.1)
|
|
44
|
+
bigdecimal
|
|
41
45
|
rexml
|
|
42
|
-
diff-lcs (1.
|
|
46
|
+
diff-lcs (1.6.2)
|
|
43
47
|
domain_name (0.6.20240107)
|
|
44
|
-
flexmock (
|
|
48
|
+
flexmock (3.0.2)
|
|
45
49
|
gyoku (1.4.0)
|
|
46
50
|
builder (>= 2.1.2)
|
|
47
51
|
rexml (~> 3.0)
|
|
48
|
-
hashdiff (1.
|
|
49
|
-
htmlentities (4.
|
|
52
|
+
hashdiff (1.2.1)
|
|
53
|
+
htmlentities (4.4.2)
|
|
50
54
|
http-cookie (1.1.0)
|
|
51
55
|
domain_name (~> 0.5)
|
|
52
56
|
httpi (2.5.0)
|
|
53
57
|
rack
|
|
54
58
|
socksify
|
|
55
|
-
|
|
56
|
-
|
|
59
|
+
io-console (0.8.2)
|
|
60
|
+
json (2.18.1)
|
|
61
|
+
language_server-protocol (3.17.0.5)
|
|
57
62
|
lint_roller (1.1.0)
|
|
58
63
|
logger (1.7.0)
|
|
59
64
|
mechanize (2.14.0)
|
|
@@ -69,84 +74,95 @@ GEM
|
|
|
69
74
|
rubyntlm (~> 0.6, >= 0.6.3)
|
|
70
75
|
webrick (~> 1.7)
|
|
71
76
|
webrobots (~> 0.1.2)
|
|
72
|
-
method_source (1.
|
|
77
|
+
method_source (1.1.0)
|
|
73
78
|
mime-types (3.7.0)
|
|
74
79
|
logger
|
|
75
80
|
mime-types-data (~> 3.2025, >= 3.2025.0507)
|
|
76
81
|
mime-types-data (3.2026.0203)
|
|
77
82
|
mini_portile2 (2.8.9)
|
|
78
|
-
minitar (0
|
|
79
|
-
multi_json (1.
|
|
83
|
+
minitar (1.1.0)
|
|
84
|
+
multi_json (1.19.1)
|
|
80
85
|
mutex_m (0.3.0)
|
|
81
86
|
net-http-digest_auth (1.4.1)
|
|
82
87
|
net-http-persistent (4.0.8)
|
|
83
88
|
connection_pool (>= 2.2.4, < 4)
|
|
84
89
|
nkf (0.2.0)
|
|
85
|
-
nokogiri (1.19.
|
|
90
|
+
nokogiri (1.19.2)
|
|
86
91
|
mini_portile2 (~> 2.8.2)
|
|
87
92
|
racc (~> 1.4)
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
+
nokogiri (1.19.2-arm64-darwin)
|
|
94
|
+
racc (~> 1.4)
|
|
95
|
+
nori (2.7.1)
|
|
96
|
+
bigdecimal
|
|
97
|
+
optimist (3.2.1)
|
|
98
|
+
ox (2.14.23)
|
|
99
|
+
bigdecimal (>= 3.0)
|
|
100
|
+
parallel (1.27.0)
|
|
101
|
+
parser (3.3.10.2)
|
|
93
102
|
ast (~> 2.4.1)
|
|
94
103
|
racc
|
|
95
104
|
parslet (2.0.0)
|
|
96
|
-
|
|
105
|
+
prism (1.9.0)
|
|
106
|
+
pry (0.16.0)
|
|
97
107
|
coderay (~> 1.1)
|
|
98
108
|
method_source (~> 1.0)
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
109
|
+
reline (>= 0.6.0)
|
|
110
|
+
pry-byebug (3.12.0)
|
|
111
|
+
byebug (~> 13.0)
|
|
112
|
+
pry (>= 0.13, < 0.17)
|
|
113
|
+
pry-doc (1.7.0)
|
|
103
114
|
pry (~> 0.11)
|
|
104
|
-
yard (~> 0.9.
|
|
115
|
+
yard (~> 0.9.21)
|
|
105
116
|
psych (3.3.4)
|
|
106
117
|
public_suffix (7.0.2)
|
|
107
118
|
racc (1.8.1)
|
|
108
|
-
rack (3.2.
|
|
119
|
+
rack (3.2.6)
|
|
109
120
|
rainbow (3.1.1)
|
|
110
|
-
rake (13.
|
|
121
|
+
rake (13.3.1)
|
|
111
122
|
rdoc (6.3.4.1)
|
|
112
|
-
regexp_parser (2.
|
|
123
|
+
regexp_parser (2.11.3)
|
|
124
|
+
reline (0.6.3)
|
|
125
|
+
io-console (~> 0.5)
|
|
113
126
|
rexml (3.4.4)
|
|
114
|
-
rspec (3.
|
|
115
|
-
rspec-core (~> 3.
|
|
116
|
-
rspec-expectations (~> 3.
|
|
117
|
-
rspec-mocks (~> 3.
|
|
118
|
-
rspec-core (3.
|
|
119
|
-
rspec-support (~> 3.
|
|
120
|
-
rspec-expectations (3.
|
|
127
|
+
rspec (3.13.2)
|
|
128
|
+
rspec-core (~> 3.13.0)
|
|
129
|
+
rspec-expectations (~> 3.13.0)
|
|
130
|
+
rspec-mocks (~> 3.13.0)
|
|
131
|
+
rspec-core (3.13.6)
|
|
132
|
+
rspec-support (~> 3.13.0)
|
|
133
|
+
rspec-expectations (3.13.5)
|
|
121
134
|
diff-lcs (>= 1.2.0, < 2.0)
|
|
122
|
-
rspec-support (~> 3.
|
|
123
|
-
rspec-mocks (3.
|
|
135
|
+
rspec-support (~> 3.13.0)
|
|
136
|
+
rspec-mocks (3.13.7)
|
|
124
137
|
diff-lcs (>= 1.2.0, < 2.0)
|
|
125
|
-
rspec-support (~> 3.
|
|
126
|
-
rspec-support (3.
|
|
127
|
-
rubocop (1.
|
|
138
|
+
rspec-support (~> 3.13.0)
|
|
139
|
+
rspec-support (3.13.7)
|
|
140
|
+
rubocop (1.84.2)
|
|
128
141
|
json (~> 2.3)
|
|
142
|
+
language_server-protocol (~> 3.17.0.2)
|
|
143
|
+
lint_roller (~> 1.1.0)
|
|
129
144
|
parallel (~> 1.10)
|
|
130
|
-
parser (>= 3.
|
|
145
|
+
parser (>= 3.3.0.2)
|
|
131
146
|
rainbow (>= 2.2.2, < 4.0)
|
|
132
|
-
regexp_parser (>=
|
|
133
|
-
|
|
134
|
-
rubocop-ast (>= 1.28.0, < 2.0)
|
|
147
|
+
regexp_parser (>= 2.9.3, < 3.0)
|
|
148
|
+
rubocop-ast (>= 1.49.0, < 2.0)
|
|
135
149
|
ruby-progressbar (~> 1.7)
|
|
136
|
-
unicode-display_width (>= 2.4.0, <
|
|
137
|
-
rubocop-ast (1.
|
|
138
|
-
parser (>= 3.
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
150
|
+
unicode-display_width (>= 2.4.0, < 4.0)
|
|
151
|
+
rubocop-ast (1.49.0)
|
|
152
|
+
parser (>= 3.3.7.2)
|
|
153
|
+
prism (~> 1.7)
|
|
154
|
+
rubocop-performance (1.26.1)
|
|
155
|
+
lint_roller (~> 1.1)
|
|
156
|
+
rubocop (>= 1.75.0, < 2.0)
|
|
157
|
+
rubocop-ast (>= 1.47.1, < 2.0)
|
|
158
|
+
ruby-ole (1.2.13.1)
|
|
143
159
|
ruby-progressbar (1.13.0)
|
|
144
|
-
rubyXL (3.4.
|
|
160
|
+
rubyXL (3.4.33)
|
|
145
161
|
nokogiri (>= 1.10.8)
|
|
146
162
|
rubyzip (>= 1.3.0)
|
|
147
163
|
rubyntlm (0.6.5)
|
|
148
164
|
base64
|
|
149
|
-
rubyzip (3.0.
|
|
165
|
+
rubyzip (3.0.2)
|
|
150
166
|
savon (2.12.1)
|
|
151
167
|
akami (~> 1.2)
|
|
152
168
|
builder (>= 2.1.2)
|
|
@@ -156,31 +172,35 @@ GEM
|
|
|
156
172
|
nori (~> 2.4)
|
|
157
173
|
wasabi (~> 3.4)
|
|
158
174
|
sax-machine (1.3.2)
|
|
159
|
-
socksify (1.
|
|
160
|
-
spreadsheet (1.3.
|
|
175
|
+
socksify (1.8.1)
|
|
176
|
+
spreadsheet (1.3.4)
|
|
177
|
+
bigdecimal
|
|
178
|
+
logger
|
|
161
179
|
ruby-ole
|
|
162
|
-
standard (1.
|
|
180
|
+
standard (1.54.0)
|
|
163
181
|
language_server-protocol (~> 3.17.0.2)
|
|
164
182
|
lint_roller (~> 1.0)
|
|
165
|
-
rubocop (~> 1.
|
|
183
|
+
rubocop (~> 1.84.0)
|
|
166
184
|
standard-custom (~> 1.0.0)
|
|
167
|
-
standard-performance (~> 1.
|
|
185
|
+
standard-performance (~> 1.8)
|
|
168
186
|
standard-custom (1.0.2)
|
|
169
187
|
lint_roller (~> 1.0)
|
|
170
188
|
rubocop (~> 1.50)
|
|
171
|
-
standard-performance (1.0
|
|
172
|
-
lint_roller (~> 1.
|
|
173
|
-
rubocop-performance (~> 1.
|
|
189
|
+
standard-performance (1.9.0)
|
|
190
|
+
lint_roller (~> 1.1)
|
|
191
|
+
rubocop-performance (~> 1.26.0)
|
|
174
192
|
standardrb (1.0.1)
|
|
175
193
|
standard
|
|
176
|
-
timecop (0.9.
|
|
177
|
-
unicode-display_width (2.
|
|
178
|
-
|
|
194
|
+
timecop (0.9.10)
|
|
195
|
+
unicode-display_width (3.2.0)
|
|
196
|
+
unicode-emoji (~> 4.1)
|
|
197
|
+
unicode-emoji (4.2.0)
|
|
198
|
+
vcr (6.4.0)
|
|
179
199
|
wasabi (3.7.0)
|
|
180
200
|
addressable
|
|
181
201
|
httpi (~> 2.0)
|
|
182
202
|
nokogiri (>= 1.4.2)
|
|
183
|
-
webmock (3.
|
|
203
|
+
webmock (3.26.1)
|
|
184
204
|
addressable (>= 2.8.0)
|
|
185
205
|
crack (>= 0.3.2)
|
|
186
206
|
hashdiff (>= 0.4.0, < 2.0.0)
|
|
@@ -188,9 +208,10 @@ GEM
|
|
|
188
208
|
webrobots (0.1.2)
|
|
189
209
|
xml-simple (1.1.9)
|
|
190
210
|
rexml
|
|
191
|
-
yard (0.9.
|
|
211
|
+
yard (0.9.38)
|
|
192
212
|
|
|
193
213
|
PLATFORMS
|
|
214
|
+
arm64-darwin-25
|
|
194
215
|
ruby
|
|
195
216
|
|
|
196
217
|
DEPENDENCIES
|
|
@@ -211,4 +232,4 @@ DEPENDENCIES
|
|
|
211
232
|
webmock
|
|
212
233
|
|
|
213
234
|
BUNDLED WITH
|
|
214
|
-
2.
|
|
235
|
+
2.5.22
|
data/README.md
CHANGED
|
@@ -51,7 +51,7 @@ HIN (http://hin.ch) creates daily the actual file. They can be downloaded from `
|
|
|
51
51
|
see `--help`.
|
|
52
52
|
|
|
53
53
|
```
|
|
54
|
-
/opt/src/oddb2xml/bin/oddb2xml version 3.0.
|
|
54
|
+
/opt/src/oddb2xml/bin/oddb2xml version 3.0.5
|
|
55
55
|
Usage:
|
|
56
56
|
oddb2xml [option]
|
|
57
57
|
produced files are found under data
|
|
@@ -112,7 +112,7 @@ FR
|
|
|
112
112
|
|
|
113
113
|
## Supported ruby version
|
|
114
114
|
|
|
115
|
-
You will need ruby >= 2.5 to work correctly. Current development happens on Ruby 3.
|
|
115
|
+
You will need ruby >= 2.5 to work correctly. Current development happens on Ruby 3.3 (`.ruby-version`).
|
|
116
116
|
CI runs on Ruby 3.0, 3.1 and 3.2 via GitHub Actions — see the badge above for the latest spec results.
|
|
117
117
|
|
|
118
118
|
|
|
@@ -292,6 +292,27 @@ We use the following files:
|
|
|
292
292
|
* https://epl.bag.admin.ch/static/fhir/foph-sl-export-latest-{de,fr,it}.ndjson (FHIR NDJSON, used with `--fhir`)
|
|
293
293
|
* https://id.gs1.ch/01/07612345000961 (GS1 Switzerland firstbase CSV — full barcode registry, used with `-b`/`--firstbase`)
|
|
294
294
|
|
|
295
|
+
## Refdata data-quality compensation
|
|
296
|
+
|
|
297
|
+
Refdata.Articles.xml from `files.refdata.ch` ships with a number of recurring
|
|
298
|
+
data-quality issues that propagate into downstream systems unchanged. oddb2xml
|
|
299
|
+
applies a small set of conservative cleanups before emitting any output. See
|
|
300
|
+
GitHub issue [#112](https://github.com/zdavatz/oddb2xml/issues/112) for the
|
|
301
|
+
full catalogue and the parallel report sent to Refdata.
|
|
302
|
+
|
|
303
|
+
Currently active fixes (`lib/oddb2xml/refdata_cleanup.rb`):
|
|
304
|
+
|
|
305
|
+
* **Doubled dose token** — Refdata sometimes emits the strength twice in
|
|
306
|
+
`<FullName>`, e.g. `MIRTAZAPIN Sandoz eco 30 mg / 30 mg / 100 Tablette`.
|
|
307
|
+
When the matching Swissmedic entry shows a single active substance, the
|
|
308
|
+
duplicate token is collapsed to a single occurrence. Real combination
|
|
309
|
+
products (e.g. PHESGO 600 mg / 600 mg / 10 ml — pertuzumab + trastuzumab)
|
|
310
|
+
are detected via the comma in `substance_swissmedic` and left untouched.
|
|
311
|
+
|
|
312
|
+
The cleanup runs at the start of `prepare_articles` in `Builder` and is
|
|
313
|
+
idempotent. Each rule is guarded by a Swissmedic-side heuristic so genuine
|
|
314
|
+
data is never altered.
|
|
315
|
+
|
|
295
316
|
## Rules for matching GTIN (aka EAN13), product number and IKSNR
|
|
296
317
|
|
|
297
318
|
For drugs which appear in Packungen.xlsx file published by Swissmedic the following rule is used to create the GTIN
|
data/lib/oddb2xml/builder.rb
CHANGED
|
@@ -88,12 +88,42 @@ module Oddb2xml
|
|
|
88
88
|
end
|
|
89
89
|
end
|
|
90
90
|
|
|
91
|
+
# Mutates @refdata in place to compensate for known Refdata.Articles.xml
|
|
92
|
+
# data-quality issues (see GitHub issue #112). Idempotent: subsequent
|
|
93
|
+
# calls are no-ops within the same Builder instance.
|
|
94
|
+
def apply_refdata_description_cleanups!
|
|
95
|
+
return if @refdata_descriptions_cleaned
|
|
96
|
+
@refdata_descriptions_cleaned = true
|
|
97
|
+
return if @refdata.nil? || @refdata.empty?
|
|
98
|
+
double_dose_fixed = 0
|
|
99
|
+
@refdata.each_value do |item|
|
|
100
|
+
next unless item.is_a?(Hash)
|
|
101
|
+
no8 = item[:no8]
|
|
102
|
+
next if no8.nil? || no8.empty?
|
|
103
|
+
pack = @packs[no8]
|
|
104
|
+
next unless pack
|
|
105
|
+
substance = pack[:substance_swissmedic]
|
|
106
|
+
[:desc_de, :desc_fr, :desc_it].each do |key|
|
|
107
|
+
original = item[key]
|
|
108
|
+
cleaned = RefdataCleanup.fix_double_dose(original, substance)
|
|
109
|
+
if cleaned != original
|
|
110
|
+
item[key] = cleaned
|
|
111
|
+
double_dose_fixed += 1
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
end
|
|
115
|
+
if double_dose_fixed > 0
|
|
116
|
+
Oddb2xml.log("Refdata cleanup: fixed double-dose pattern in #{double_dose_fixed} description(s)")
|
|
117
|
+
end
|
|
118
|
+
end
|
|
119
|
+
|
|
91
120
|
private_class_method
|
|
92
121
|
|
|
93
122
|
def prepare_articles(reset = false)
|
|
94
123
|
@articles = nil if reset
|
|
95
124
|
unless @articles
|
|
96
125
|
Oddb2xml.log("prepare_articles starting with #{@articles ? @articles.size : "no"} articles.")
|
|
126
|
+
apply_refdata_description_cleanups!
|
|
97
127
|
@articles = []
|
|
98
128
|
@refdata.each do |ean13, obj|
|
|
99
129
|
unless SKIP_MIGEL_DOWNLOADER
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
module Oddb2xml
|
|
2
|
+
# Compensates for known data-quality issues in upstream Refdata.Articles.xml
|
|
3
|
+
# before they reach the generated output. Each fix is opt-in and guarded by
|
|
4
|
+
# a heuristic against Swissmedic data so we never alter genuine combination
|
|
5
|
+
# products. See GitHub issue #112 for the catalogue of upstream problems.
|
|
6
|
+
module RefdataCleanup
|
|
7
|
+
DOSE_TOKEN = /\d+(?:[.,]\d+)?\s*(?:mg|µg|mcg|g|ml|UI|U\.I\.|IE|%)/i
|
|
8
|
+
# Matches "<dose> / <same dose> /" – the templating bug where Refdata
|
|
9
|
+
# repeats the strength once. The backreference \1 only matches when the
|
|
10
|
+
# exact same dose string appears twice, which keeps real combos
|
|
11
|
+
# (e.g. PHESGO 600 mg / 600 mg / 10 ml) safe – those are caught by the
|
|
12
|
+
# single_substance? guard, but the literal-match also acts as a backstop.
|
|
13
|
+
DOUBLE_DOSE_RE = /(#{DOSE_TOKEN})\s*\/\s*\1\s*\/\s*/
|
|
14
|
+
|
|
15
|
+
# A Swissmedic compositions cell like "mirtazapinum" indicates a mono
|
|
16
|
+
# product; "atovaquonum, proguanili hydrochloridum" or
|
|
17
|
+
# "pertuzumabum, trastuzumabum" indicates a real combination.
|
|
18
|
+
def self.single_substance?(swissmedic_substance)
|
|
19
|
+
return false if swissmedic_substance.nil?
|
|
20
|
+
str = swissmedic_substance.to_s.strip
|
|
21
|
+
return false if str.empty?
|
|
22
|
+
!str.include?(",")
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# Removes the duplicated dose token in mono products. Returns the
|
|
26
|
+
# cleaned description, or the original string if no change applies.
|
|
27
|
+
def self.fix_double_dose(desc, swissmedic_substance)
|
|
28
|
+
return desc if desc.nil? || desc.empty?
|
|
29
|
+
return desc unless DOUBLE_DOSE_RE.match?(desc)
|
|
30
|
+
return desc unless single_substance?(swissmedic_substance)
|
|
31
|
+
desc.sub(DOUBLE_DOSE_RE, '\1 / ')
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
data/lib/oddb2xml/version.rb
CHANGED
data/lib/oddb2xml.rb
CHANGED
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
require "spec_helper"
|
|
2
|
+
require "oddb2xml/refdata_cleanup"
|
|
3
|
+
|
|
4
|
+
describe Oddb2xml::RefdataCleanup do
|
|
5
|
+
describe ".single_substance?" do
|
|
6
|
+
it "returns true for a single Swissmedic substance" do
|
|
7
|
+
expect(described_class.single_substance?("mirtazapinum")).to be true
|
|
8
|
+
expect(described_class.single_substance?("methotrexatum")).to be true
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
it "returns false when multiple substances are listed (combo)" do
|
|
12
|
+
expect(described_class.single_substance?("pertuzumabum, trastuzumabum")).to be false
|
|
13
|
+
expect(described_class.single_substance?("atovaquonum, proguanili hydrochloridum")).to be false
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
it "returns false when input is nil or empty" do
|
|
17
|
+
expect(described_class.single_substance?(nil)).to be false
|
|
18
|
+
expect(described_class.single_substance?("")).to be false
|
|
19
|
+
expect(described_class.single_substance?(" ")).to be false
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
describe ".fix_double_dose" do
|
|
24
|
+
let(:mono) { "mirtazapinum" }
|
|
25
|
+
let(:combo) { "pertuzumabum, trastuzumabum" }
|
|
26
|
+
|
|
27
|
+
it "removes the duplicate dose for a mono product" do
|
|
28
|
+
input = "MIRTAZAPIN Sandoz eco 30 mg / 30 mg / 100 Tablette"
|
|
29
|
+
expected = "MIRTAZAPIN Sandoz eco 30 mg / 100 Tablette"
|
|
30
|
+
expect(described_class.fix_double_dose(input, mono)).to eq expected
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
it "handles ICATIBANT-style spacing" do
|
|
34
|
+
input = "ICATIBANT Spirig HC 30 mg / 30 mg / 1 x 3 ml"
|
|
35
|
+
expected = "ICATIBANT Spirig HC 30 mg / 1 x 3 ml"
|
|
36
|
+
expect(described_class.fix_double_dose(input, mono)).to eq expected
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
it "leaves real combinations untouched (PHESGO 600 mg / 600 mg / 10 ml)" do
|
|
40
|
+
input = "PHESGO Inj Lös 600 mg/600 mg/10 ml Durchstf"
|
|
41
|
+
expect(described_class.fix_double_dose(input, combo)).to eq input
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
it "leaves descriptions without the double-dose pattern untouched" do
|
|
45
|
+
input = "LEVOCETIRIZIN Spirig HC Filmtabl 5 mg 10 Stk"
|
|
46
|
+
expect(described_class.fix_double_dose(input, mono)).to eq input
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
it "leaves the description untouched when Swissmedic substance is unknown" do
|
|
50
|
+
input = "MIRTAZAPIN Sandoz eco 30 mg / 30 mg / 100 Tablette"
|
|
51
|
+
expect(described_class.fix_double_dose(input, nil)).to eq input
|
|
52
|
+
expect(described_class.fix_double_dose(input, "")).to eq input
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
it "is a no-op for nil or empty descriptions" do
|
|
56
|
+
expect(described_class.fix_double_dose(nil, mono)).to be_nil
|
|
57
|
+
expect(described_class.fix_double_dose("", mono)).to eq ""
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
it "does not collapse different doses (X mg / Y mg)" do
|
|
61
|
+
input = "FOO 250 mg / 100 mg / 12 Stk"
|
|
62
|
+
expect(described_class.fix_double_dose(input, combo)).to eq input
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
describe Oddb2xml::Builder do
|
|
68
|
+
describe "#apply_refdata_description_cleanups!" do
|
|
69
|
+
let(:builder) { Oddb2xml::Builder.new }
|
|
70
|
+
|
|
71
|
+
it "fixes double-dose entries on mono products" do
|
|
72
|
+
builder.packs = {
|
|
73
|
+
"69475006" => {substance_swissmedic: "mirtazapinum"}
|
|
74
|
+
}
|
|
75
|
+
builder.refdata = {
|
|
76
|
+
"7680694750066" => {
|
|
77
|
+
ean13: "7680694750066",
|
|
78
|
+
no8: "69475006",
|
|
79
|
+
desc_de: "MIRTAZAPIN Sandoz eco 30 mg / 30 mg / 100 Tablette",
|
|
80
|
+
desc_fr: "MIRTAZAPIN Sandoz eco 30 mg / 30 mg / 100 comprimé(",
|
|
81
|
+
desc_it: ""
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
builder.apply_refdata_description_cleanups!
|
|
86
|
+
|
|
87
|
+
item = builder.refdata["7680694750066"]
|
|
88
|
+
expect(item[:desc_de]).to eq "MIRTAZAPIN Sandoz eco 30 mg / 100 Tablette"
|
|
89
|
+
expect(item[:desc_fr]).to eq "MIRTAZAPIN Sandoz eco 30 mg / 100 comprimé("
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
it "leaves combo products untouched" do
|
|
93
|
+
builder.packs = {
|
|
94
|
+
"67828001" => {substance_swissmedic: "pertuzumabum, trastuzumabum"}
|
|
95
|
+
}
|
|
96
|
+
original = "PHESGO Inj Lös 600 mg/600 mg/10 ml Durchstf"
|
|
97
|
+
builder.refdata = {
|
|
98
|
+
"7680678280013" => {
|
|
99
|
+
ean13: "7680678280013",
|
|
100
|
+
no8: "67828001",
|
|
101
|
+
desc_de: original,
|
|
102
|
+
desc_fr: "",
|
|
103
|
+
desc_it: ""
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
builder.apply_refdata_description_cleanups!
|
|
108
|
+
|
|
109
|
+
expect(builder.refdata["7680678280013"][:desc_de]).to eq original
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
it "is idempotent" do
|
|
113
|
+
builder.packs = {
|
|
114
|
+
"69475006" => {substance_swissmedic: "mirtazapinum"}
|
|
115
|
+
}
|
|
116
|
+
builder.refdata = {
|
|
117
|
+
"7680694750066" => {
|
|
118
|
+
ean13: "7680694750066",
|
|
119
|
+
no8: "69475006",
|
|
120
|
+
desc_de: "MIRTAZAPIN Sandoz eco 30 mg / 30 mg / 100 Tablette",
|
|
121
|
+
desc_fr: "",
|
|
122
|
+
desc_it: ""
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
builder.apply_refdata_description_cleanups!
|
|
127
|
+
builder.apply_refdata_description_cleanups!
|
|
128
|
+
|
|
129
|
+
expect(builder.refdata["7680694750066"][:desc_de])
|
|
130
|
+
.to eq "MIRTAZAPIN Sandoz eco 30 mg / 100 Tablette"
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
it "skips entries without a Swissmedic match" do
|
|
134
|
+
builder.packs = {}
|
|
135
|
+
input = "MIRTAZAPIN Sandoz eco 30 mg / 30 mg / 100 Tablette"
|
|
136
|
+
builder.refdata = {
|
|
137
|
+
"7680694750066" => {
|
|
138
|
+
ean13: "7680694750066",
|
|
139
|
+
no8: "69475006",
|
|
140
|
+
desc_de: input,
|
|
141
|
+
desc_fr: "",
|
|
142
|
+
desc_it: ""
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
builder.apply_refdata_description_cleanups!
|
|
147
|
+
|
|
148
|
+
expect(builder.refdata["7680694750066"][:desc_de]).to eq input
|
|
149
|
+
end
|
|
150
|
+
end
|
|
151
|
+
end
|
metadata
CHANGED
|
@@ -1,14 +1,13 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: oddb2xml
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 3.0.
|
|
4
|
+
version: 3.0.5
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Yasuhiro Asaka, Zeno R.R. Davatz, Niklaus Giger
|
|
8
|
-
autorequire:
|
|
9
8
|
bindir: bin
|
|
10
9
|
cert_chain: []
|
|
11
|
-
date:
|
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
|
12
11
|
dependencies:
|
|
13
12
|
- !ruby/object:Gem::Dependency
|
|
14
13
|
name: rubyzip
|
|
@@ -470,6 +469,7 @@ files:
|
|
|
470
469
|
- lib/oddb2xml/fhir_support.rb
|
|
471
470
|
- lib/oddb2xml/options.rb
|
|
472
471
|
- lib/oddb2xml/parslet_compositions.rb
|
|
472
|
+
- lib/oddb2xml/refdata_cleanup.rb
|
|
473
473
|
- lib/oddb2xml/semantic_check.rb
|
|
474
474
|
- lib/oddb2xml/util.rb
|
|
475
475
|
- lib/oddb2xml/version.rb
|
|
@@ -544,6 +544,7 @@ files:
|
|
|
544
544
|
- spec/galenic_spec.rb
|
|
545
545
|
- spec/options_spec.rb
|
|
546
546
|
- spec/parslet_spec.rb
|
|
547
|
+
- spec/refdata_cleanup_spec.rb
|
|
547
548
|
- spec/spec_helper.rb
|
|
548
549
|
- test_options.rb
|
|
549
550
|
- tools/cacert.pem
|
|
@@ -553,7 +554,6 @@ homepage: https://github.com/zdavatz/oddb2xml
|
|
|
553
554
|
licenses:
|
|
554
555
|
- GPL-3.0-only
|
|
555
556
|
metadata: {}
|
|
556
|
-
post_install_message:
|
|
557
557
|
rdoc_options: []
|
|
558
558
|
require_paths:
|
|
559
559
|
- lib
|
|
@@ -568,8 +568,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
568
568
|
- !ruby/object:Gem::Version
|
|
569
569
|
version: '0'
|
|
570
570
|
requirements: []
|
|
571
|
-
rubygems_version: 3.
|
|
572
|
-
signing_key:
|
|
571
|
+
rubygems_version: 3.6.9
|
|
573
572
|
specification_version: 4
|
|
574
573
|
summary: oddb2xml creates xml files.
|
|
575
574
|
test_files:
|
|
@@ -640,4 +639,5 @@ test_files:
|
|
|
640
639
|
- spec/galenic_spec.rb
|
|
641
640
|
- spec/options_spec.rb
|
|
642
641
|
- spec/parslet_spec.rb
|
|
642
|
+
- spec/refdata_cleanup_spec.rb
|
|
643
643
|
- spec/spec_helper.rb
|