oddb2xml 2.0.0 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/History.txt +4 -0
- data/lib/oddb2xml/builder.rb +1 -0
- data/lib/oddb2xml/calc.rb +4 -73
- data/lib/oddb2xml/parse_compositions.rb +80 -0
- data/lib/oddb2xml/version.rb +1 -1
- data/spec/calc_spec.rb +2 -0
- metadata +2 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 82754e27fde87282a1643f1acb81a8e30f2bee99
|
4
|
+
data.tar.gz: 13ae8919c8572bf3d8bf3261802a096bbab9ead2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 74e386dab573b605b60e3e4fd92740f1ab482b546e41998bd51e60105361bb8c2164723098a24bec2c64e43ff4c338612c6212806c22748c94077788bca59967
|
7
|
+
data.tar.gz: 785ab82c497388b1278c5dba170e9318fdec62fe223fa4fa4a7aa22be3100b3094339d74d29bf7f9bb03f8fa758c8e2d9131d6e2d08ea09d3ae9873ff4a34572
|
data/History.txt
CHANGED
data/lib/oddb2xml/builder.rb
CHANGED
@@ -681,6 +681,7 @@ module Oddb2xml
|
|
681
681
|
xml.COMPOSITION {
|
682
682
|
# xml.SOURCE composition.source # emit this if you want to debug the results
|
683
683
|
xml.LABEL composition.label if composition.label
|
684
|
+
xml.LABEL_DESCRIPTION composition.label_description if composition.label_description
|
684
685
|
xml.SUBSTANCES {
|
685
686
|
composition.substances.each { |substance|
|
686
687
|
xml.SUBSTANCE {
|
data/lib/oddb2xml/calc.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
|
3
3
|
require 'oddb2xml/util'
|
4
|
+
require 'oddb2xml/parse_compositions'
|
4
5
|
require 'yaml'
|
5
6
|
|
6
7
|
module Oddb2xml
|
@@ -150,78 +151,11 @@ module Oddb2xml
|
|
150
151
|
"\n\n\nColumn Präparateliste has everywhere a name\n"
|
151
152
|
end
|
152
153
|
end
|
153
|
-
public
|
154
|
-
SCALE_P = %r{pro\s+(?<scale>(?<qty>[\d.,]+)\s*(?<unit>[kcmuµn]?[glh]))}u
|
155
154
|
private
|
156
155
|
def remove_duplicated_spaces(string)
|
157
156
|
string ? string.to_s.gsub(/\s\s+/, ' ') : nil
|
158
157
|
end
|
159
158
|
public
|
160
|
-
# Update of active substances, etc picked up from oddb.org/src/plugin/swissmedic.rb update_compositions
|
161
|
-
Composition = Struct.new("Composition", :source, :label, :substances, :galenic_form, :route_of_administration)
|
162
|
-
Substance = Struct.new("Substance", :name, :qty, :unit, :chemical_substance, :chemical_dose)
|
163
|
-
def update_compositions(active_substance)
|
164
|
-
rep_1 = '----'; to_1 = '('
|
165
|
-
rep_2 = '-----'; to_2 = ')'
|
166
|
-
rep_3 = '------'; to_3 = ','
|
167
|
-
|
168
|
-
comps = []
|
169
|
-
label_pattern = /^(?<label>A|I|B|II|C|III|D|IV|E|V|F|VI)[)]\s*(?<designation>[^)]+):/
|
170
|
-
composition_text = composition.gsub(/\r\n?/u, "\n")
|
171
|
-
puts "composition_text for #{name}: #{composition_text}" if composition_text.split(/\n/u).size > 1 and $VERBOSE
|
172
|
-
lines = composition_text.split(/\n/u)
|
173
|
-
idx = 0
|
174
|
-
compositions = lines.select do |line|
|
175
|
-
if match = label_pattern.match(line)
|
176
|
-
label = match[:label]
|
177
|
-
else
|
178
|
-
label = nil
|
179
|
-
end
|
180
|
-
idx += 1
|
181
|
-
next if idx > 1 and not label # avoid lines like 'I) et II)'
|
182
|
-
substances = []
|
183
|
-
filler = line.split(',')[-1].sub(/\.$/, '')
|
184
|
-
filler_match = /^(?<name>[^,\d]+)\s*(?<dose>[\d\-.]+(\s*(?:(Mio\.?\s*)?(U\.\s*Ph\.\s*Eur\.|[^\s,]+))))/.match(filler)
|
185
|
-
components = line.split(/([^\(]+\([^)]+\)[^,]+|),/).each {
|
186
|
-
|component|
|
187
|
-
next unless component.size > 0
|
188
|
-
to_consider = component.strip.split(':')[-1] # remove label
|
189
|
-
# very ugly hack to ignore ,()
|
190
|
-
m = /^(?<name>[^,\d()]+)\s*(?<dose>[\d\-.]+(\s*(?:(Mio\.?\s*)?(U\.\s*Ph\.\s*Eur\.|[^\s,]+))))/.match(to_consider
|
191
|
-
.gsub(to_1, rep_1).gsub(to_2, rep_2).gsub(to_3, rep_3))
|
192
|
-
if m2 = /^(|[^:]+:\s)(E\s+\d+)$/.match(component.strip)
|
193
|
-
to_add = Substance.new(m2[2], '', '')
|
194
|
-
substances << to_add
|
195
|
-
elsif m
|
196
|
-
ptrn = /(\s*(?:ut|corresp\.?)\s+(?<chemical>[^\d,]+)\s*(?<cdose>[\d\-.]+(\s*(?:(Mio\.?\s*)?(U\.\s*Ph\.\s*Eur\.|[^\s,]+))(\s*[mv]\/[mv])?))?)/
|
197
|
-
m3 = ptrn.match(component.strip)
|
198
|
-
dose = nil
|
199
|
-
unit = nil
|
200
|
-
name = m[:name].split(/\s/).collect{ |x| x.capitalize }.join(' ').strip.gsub(rep_3, to_3).gsub(rep_2, to_2).gsub(rep_1, to_1)
|
201
|
-
dose = m[:dose].split(/\b\s*(?![.,\d\-]|Mio\.?)/u, 2) if m[:dose]
|
202
|
-
if dose && (scale = SCALE_P.match(filler)) && dose[1] && !dose[1].include?('/')
|
203
|
-
unit = dose[1] << '/'
|
204
|
-
num = scale[:qty].to_f
|
205
|
-
if num <= 1
|
206
|
-
unit << scale[:unit]
|
207
|
-
else
|
208
|
-
unit << scale[:scale]
|
209
|
-
end
|
210
|
-
elsif dose.size == 2
|
211
|
-
unit = dose[1]
|
212
|
-
end
|
213
|
-
next if /\s+pro($|\s+)|emulsion|solution/i.match(name)
|
214
|
-
chemical = m3 ? capitalize(m3[:chemical]) : nil
|
215
|
-
cdose = m3 ? m3[:cdose] : nil
|
216
|
-
substances << Substance.new(name, dose ? dose[0].to_f : nil, unit ? unit.gsub(rep_3, to_3).gsub(rep_2, to_2).gsub(rep_1, to_1) : nil,
|
217
|
-
chemical, cdose)
|
218
|
-
end
|
219
|
-
}
|
220
|
-
comps << Composition.new(line, label, substances) if substances.size > 0
|
221
|
-
end
|
222
|
-
comps
|
223
|
-
end
|
224
|
-
|
225
159
|
def initialize(name = nil, size = nil, unit = nil, active_substance = nil, composition= nil)
|
226
160
|
@name = remove_duplicated_spaces(name)
|
227
161
|
@pkg_size = remove_duplicated_spaces(size)
|
@@ -233,10 +167,10 @@ public
|
|
233
167
|
@measure = @galenic_form.description if @galenic_form and not @measure
|
234
168
|
@galenic_form ||= @@galenic_forms[UnknownGalenicForm]
|
235
169
|
|
236
|
-
unless
|
170
|
+
unless composition
|
237
171
|
@compositions = []
|
238
172
|
else
|
239
|
-
@compositions =
|
173
|
+
@compositions = ParseUtil.parse_compositions(composition)
|
240
174
|
end
|
241
175
|
end
|
242
176
|
|
@@ -263,9 +197,6 @@ public
|
|
263
197
|
@galenic_form.description
|
264
198
|
end
|
265
199
|
private
|
266
|
-
def capitalize(string)
|
267
|
-
string.split(/\s+/u).collect { |word| word.capitalize }.join(' ')
|
268
|
-
end
|
269
200
|
|
270
201
|
def update_rule(rulename)
|
271
202
|
@@rules_counter[rulename] ||= 0
|
@@ -362,7 +293,7 @@ public
|
|
362
293
|
end
|
363
294
|
def search_galenic_info
|
364
295
|
@substances = nil
|
365
|
-
@substances = @composition.split(/\s*,(?!\d|[^(]+\))\s*/u).collect { |name| capitalize(name) }.uniq if @composition
|
296
|
+
@substances = @composition.split(/\s*,(?!\d|[^(]+\))\s*/u).collect { |name| ParseUtil.capitalize(name) }.uniq if @composition
|
366
297
|
|
367
298
|
name = @name ? @name.clone : ''
|
368
299
|
parts = name.split(',')
|
@@ -0,0 +1,80 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
# This file is shared since oddb2xml 2.0.0 (lib/oddb2xml/parse_compositions.rb)
|
4
|
+
# with oddb.org src/plugin/parse_compositions.rb
|
5
|
+
#
|
6
|
+
# It allows an easy parsing of the column P Zusammensetzung of the swissmedic packages.xlsx file
|
7
|
+
#
|
8
|
+
|
9
|
+
module ParseUtil
|
10
|
+
SCALE_P = %r{pro\s+(?<scale>(?<qty>[\d.,]+)\s*(?<unit>[kcmuµn]?[glh]))}u
|
11
|
+
ParseComposition = Struct.new("ParseComposition", :source, :label, :label_description, :substances, :galenic_form, :route_of_administration)
|
12
|
+
ParseSubstance = Struct.new("ParseSubstance", :name, :qty, :unit, :chemical_substance, :chemical_dose)
|
13
|
+
def ParseUtil.capitalize(string)
|
14
|
+
string.split(/\s+/u).collect { |word| word.capitalize }.join(' ')
|
15
|
+
end
|
16
|
+
|
17
|
+
def ParseUtil.parse_compositions(composition)
|
18
|
+
rep_1 = '----'; to_1 = '('
|
19
|
+
rep_2 = '-----'; to_2 = ')'
|
20
|
+
rep_3 = '------'; to_3 = ','
|
21
|
+
|
22
|
+
comps = []
|
23
|
+
label_pattern = /^(?<label>A|I|B|II|C|III|D|IV|E|V|F|VI)[)]\s*(?<description>[^)]+):/
|
24
|
+
composition_text = composition.gsub(/\r\n?/u, "\n")
|
25
|
+
puts "composition_text for #{name}: #{composition_text}" if composition_text.split(/\n/u).size > 1 and $VERBOSE
|
26
|
+
lines = composition_text.split(/\n/u)
|
27
|
+
idx = 0
|
28
|
+
compositions = lines.select do |line|
|
29
|
+
if match = label_pattern.match(line)
|
30
|
+
label = match[:label]
|
31
|
+
label_description = match[:description]
|
32
|
+
else
|
33
|
+
label = nil
|
34
|
+
label_description = nil
|
35
|
+
end
|
36
|
+
idx += 1
|
37
|
+
next if idx > 1 and not label # avoid lines like 'I) et II)'
|
38
|
+
substances = []
|
39
|
+
filler = line.split(',')[-1].sub(/\.$/, '')
|
40
|
+
filler_match = /^(?<name>[^,\d]+)\s*(?<dose>[\d\-.]+(\s*(?:(Mio\.?\s*)?(U\.\s*Ph\.\s*Eur\.|[^\s,]+))))/.match(filler)
|
41
|
+
components = line.split(/([^\(]+\([^)]+\)[^,]+|),/).each {
|
42
|
+
|component|
|
43
|
+
next unless component.size > 0
|
44
|
+
to_consider = component.strip.split(':')[-1] # remove label
|
45
|
+
# very ugly hack to ignore ,()
|
46
|
+
m = /^(?<name>[^,\d()]+)\s*(?<dose>[\d\-.]+(\s*(?:(Mio\.?\s*)?(U\.\s*Ph\.\s*Eur\.|[^\s,]+))))/.match(to_consider
|
47
|
+
.gsub(to_1, rep_1).gsub(to_2, rep_2).gsub(to_3, rep_3))
|
48
|
+
if m2 = /^(|[^:]+:\s)(E\s+\d+)$/.match(component.strip)
|
49
|
+
to_add = ParseSubstance.new(m2[2], '', '')
|
50
|
+
substances << to_add
|
51
|
+
elsif m
|
52
|
+
ptrn = /(\s*(?:ut|corresp\.?)\s+(?<chemical>[^\d,]+)\s*(?<cdose>[\d\-.]+(\s*(?:(Mio\.?\s*)?(U\.\s*Ph\.\s*Eur\.|[^\s,]+))(\s*[mv]\/[mv])?))?)/
|
53
|
+
m3 = ptrn.match(component.strip)
|
54
|
+
dose = nil
|
55
|
+
unit = nil
|
56
|
+
name = m[:name].split(/\s/).collect{ |x| x.capitalize }.join(' ').strip.gsub(rep_3, to_3).gsub(rep_2, to_2).gsub(rep_1, to_1)
|
57
|
+
dose = m[:dose].split(/\b\s*(?![.,\d\-]|Mio\.?)/u, 2) if m[:dose]
|
58
|
+
if dose && (scale = SCALE_P.match(filler)) && dose[1] && !dose[1].include?('/')
|
59
|
+
unit = dose[1] << '/'
|
60
|
+
num = scale[:qty].to_f
|
61
|
+
if num <= 1
|
62
|
+
unit << scale[:unit]
|
63
|
+
else
|
64
|
+
unit << scale[:scale]
|
65
|
+
end
|
66
|
+
elsif dose.size == 2
|
67
|
+
unit = dose[1]
|
68
|
+
end
|
69
|
+
next if /\s+pro($|\s+)|emulsion|solution/i.match(name)
|
70
|
+
chemical = m3 ? capitalize(m3[:chemical]) : nil
|
71
|
+
cdose = m3 ? m3[:cdose] : nil
|
72
|
+
substances << ParseSubstance.new(name, dose ? dose[0].to_f : nil, unit ? unit.gsub(rep_3, to_3).gsub(rep_2, to_2).gsub(rep_1, to_1) : nil,
|
73
|
+
chemical, cdose)
|
74
|
+
end
|
75
|
+
}
|
76
|
+
comps << ParseComposition.new(line, label, label_description, substances) if substances.size > 0
|
77
|
+
end
|
78
|
+
comps
|
79
|
+
end
|
80
|
+
end
|
data/lib/oddb2xml/version.rb
CHANGED
data/spec/calc_spec.rb
CHANGED
@@ -314,6 +314,7 @@ Corresp. 5300 kJ.",
|
|
314
314
|
XPath.match( doc, "//ARTICLE[GTIN='7680545250363']/COMPOSITIONS/COMPOSITION/SUBSTANCES/SUBSTANCE/SUBSTANCE_NAME").last.text.should eq 'Alprostadilum'
|
315
315
|
XPath.match( doc, "//ARTICLE[GTIN='7680458820202']/NAME").last.text.should eq 'Magnesiumchlorid 0,5 molar B. Braun, Zusatzampulle für Infusionslösungen'
|
316
316
|
XPath.match( doc, "//ARTICLE[GTIN='7680555940018']/COMPOSITIONS/COMPOSITION/LABEL").first.text.should eq 'I'
|
317
|
+
XPath.match( doc, "//ARTICLE[GTIN='7680555940018']/COMPOSITIONS/COMPOSITION/LABEL_DESCRIPTION").first.text.should eq 'Glucoselösung'
|
317
318
|
XPath.match( doc, "//ARTICLE[GTIN='7680555940018']/COMPOSITIONS/COMPOSITION/LABEL").last.text.should eq 'III'
|
318
319
|
end
|
319
320
|
end
|
@@ -423,6 +424,7 @@ Corresp. 5300 kJ.",
|
|
423
424
|
|
424
425
|
specify { expect(result.compositions[0].source).to eq Line_1}
|
425
426
|
specify { expect(result.compositions[0].label).to eq 'I'}
|
427
|
+
specify { expect(result.compositions[0].label_description).to eq 'Glucoselösung'}
|
426
428
|
specify { expect(result.compositions[1].label).to eq 'II' }
|
427
429
|
specify { expect(result.compositions[2].label).to eq 'III' }
|
428
430
|
glucosum = result.compositions.first.substances.first
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: oddb2xml
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.0.
|
4
|
+
version: 2.0.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Yasuhiro Asaka, Zeno R.R. Davatz
|
@@ -221,6 +221,7 @@ files:
|
|
221
221
|
- lib/oddb2xml/downloader.rb
|
222
222
|
- lib/oddb2xml/extractor.rb
|
223
223
|
- lib/oddb2xml/options.rb
|
224
|
+
- lib/oddb2xml/parse_compositions.rb
|
224
225
|
- lib/oddb2xml/util.rb
|
225
226
|
- lib/oddb2xml/version.rb
|
226
227
|
- lib/oddb2xml/xml_definitions.rb
|