oddb2xml 2.0.0 → 2.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/History.txt +4 -0
- data/lib/oddb2xml/builder.rb +1 -0
- data/lib/oddb2xml/calc.rb +4 -73
- data/lib/oddb2xml/parse_compositions.rb +80 -0
- data/lib/oddb2xml/version.rb +1 -1
- data/spec/calc_spec.rb +2 -0
- metadata +2 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 82754e27fde87282a1643f1acb81a8e30f2bee99
|
4
|
+
data.tar.gz: 13ae8919c8572bf3d8bf3261802a096bbab9ead2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 74e386dab573b605b60e3e4fd92740f1ab482b546e41998bd51e60105361bb8c2164723098a24bec2c64e43ff4c338612c6212806c22748c94077788bca59967
|
7
|
+
data.tar.gz: 785ab82c497388b1278c5dba170e9318fdec62fe223fa4fa4a7aa22be3100b3094339d74d29bf7f9bb03f8fa758c8e2d9131d6e2d08ea09d3ae9873ff4a34572
|
data/History.txt
CHANGED
data/lib/oddb2xml/builder.rb
CHANGED
@@ -681,6 +681,7 @@ module Oddb2xml
|
|
681
681
|
xml.COMPOSITION {
|
682
682
|
# xml.SOURCE composition.source # emit this if you want to debug the results
|
683
683
|
xml.LABEL composition.label if composition.label
|
684
|
+
xml.LABEL_DESCRIPTION composition.label_description if composition.label_description
|
684
685
|
xml.SUBSTANCES {
|
685
686
|
composition.substances.each { |substance|
|
686
687
|
xml.SUBSTANCE {
|
data/lib/oddb2xml/calc.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
|
3
3
|
require 'oddb2xml/util'
|
4
|
+
require 'oddb2xml/parse_compositions'
|
4
5
|
require 'yaml'
|
5
6
|
|
6
7
|
module Oddb2xml
|
@@ -150,78 +151,11 @@ module Oddb2xml
|
|
150
151
|
"\n\n\nColumn Präparateliste has everywhere a name\n"
|
151
152
|
end
|
152
153
|
end
|
153
|
-
public
|
154
|
-
SCALE_P = %r{pro\s+(?<scale>(?<qty>[\d.,]+)\s*(?<unit>[kcmuµn]?[glh]))}u
|
155
154
|
private
|
156
155
|
def remove_duplicated_spaces(string)
|
157
156
|
string ? string.to_s.gsub(/\s\s+/, ' ') : nil
|
158
157
|
end
|
159
158
|
public
|
160
|
-
# Update of active substances, etc picked up from oddb.org/src/plugin/swissmedic.rb update_compositions
|
161
|
-
Composition = Struct.new("Composition", :source, :label, :substances, :galenic_form, :route_of_administration)
|
162
|
-
Substance = Struct.new("Substance", :name, :qty, :unit, :chemical_substance, :chemical_dose)
|
163
|
-
def update_compositions(active_substance)
|
164
|
-
rep_1 = '----'; to_1 = '('
|
165
|
-
rep_2 = '-----'; to_2 = ')'
|
166
|
-
rep_3 = '------'; to_3 = ','
|
167
|
-
|
168
|
-
comps = []
|
169
|
-
label_pattern = /^(?<label>A|I|B|II|C|III|D|IV|E|V|F|VI)[)]\s*(?<designation>[^)]+):/
|
170
|
-
composition_text = composition.gsub(/\r\n?/u, "\n")
|
171
|
-
puts "composition_text for #{name}: #{composition_text}" if composition_text.split(/\n/u).size > 1 and $VERBOSE
|
172
|
-
lines = composition_text.split(/\n/u)
|
173
|
-
idx = 0
|
174
|
-
compositions = lines.select do |line|
|
175
|
-
if match = label_pattern.match(line)
|
176
|
-
label = match[:label]
|
177
|
-
else
|
178
|
-
label = nil
|
179
|
-
end
|
180
|
-
idx += 1
|
181
|
-
next if idx > 1 and not label # avoid lines like 'I) et II)'
|
182
|
-
substances = []
|
183
|
-
filler = line.split(',')[-1].sub(/\.$/, '')
|
184
|
-
filler_match = /^(?<name>[^,\d]+)\s*(?<dose>[\d\-.]+(\s*(?:(Mio\.?\s*)?(U\.\s*Ph\.\s*Eur\.|[^\s,]+))))/.match(filler)
|
185
|
-
components = line.split(/([^\(]+\([^)]+\)[^,]+|),/).each {
|
186
|
-
|component|
|
187
|
-
next unless component.size > 0
|
188
|
-
to_consider = component.strip.split(':')[-1] # remove label
|
189
|
-
# very ugly hack to ignore ,()
|
190
|
-
m = /^(?<name>[^,\d()]+)\s*(?<dose>[\d\-.]+(\s*(?:(Mio\.?\s*)?(U\.\s*Ph\.\s*Eur\.|[^\s,]+))))/.match(to_consider
|
191
|
-
.gsub(to_1, rep_1).gsub(to_2, rep_2).gsub(to_3, rep_3))
|
192
|
-
if m2 = /^(|[^:]+:\s)(E\s+\d+)$/.match(component.strip)
|
193
|
-
to_add = Substance.new(m2[2], '', '')
|
194
|
-
substances << to_add
|
195
|
-
elsif m
|
196
|
-
ptrn = /(\s*(?:ut|corresp\.?)\s+(?<chemical>[^\d,]+)\s*(?<cdose>[\d\-.]+(\s*(?:(Mio\.?\s*)?(U\.\s*Ph\.\s*Eur\.|[^\s,]+))(\s*[mv]\/[mv])?))?)/
|
197
|
-
m3 = ptrn.match(component.strip)
|
198
|
-
dose = nil
|
199
|
-
unit = nil
|
200
|
-
name = m[:name].split(/\s/).collect{ |x| x.capitalize }.join(' ').strip.gsub(rep_3, to_3).gsub(rep_2, to_2).gsub(rep_1, to_1)
|
201
|
-
dose = m[:dose].split(/\b\s*(?![.,\d\-]|Mio\.?)/u, 2) if m[:dose]
|
202
|
-
if dose && (scale = SCALE_P.match(filler)) && dose[1] && !dose[1].include?('/')
|
203
|
-
unit = dose[1] << '/'
|
204
|
-
num = scale[:qty].to_f
|
205
|
-
if num <= 1
|
206
|
-
unit << scale[:unit]
|
207
|
-
else
|
208
|
-
unit << scale[:scale]
|
209
|
-
end
|
210
|
-
elsif dose.size == 2
|
211
|
-
unit = dose[1]
|
212
|
-
end
|
213
|
-
next if /\s+pro($|\s+)|emulsion|solution/i.match(name)
|
214
|
-
chemical = m3 ? capitalize(m3[:chemical]) : nil
|
215
|
-
cdose = m3 ? m3[:cdose] : nil
|
216
|
-
substances << Substance.new(name, dose ? dose[0].to_f : nil, unit ? unit.gsub(rep_3, to_3).gsub(rep_2, to_2).gsub(rep_1, to_1) : nil,
|
217
|
-
chemical, cdose)
|
218
|
-
end
|
219
|
-
}
|
220
|
-
comps << Composition.new(line, label, substances) if substances.size > 0
|
221
|
-
end
|
222
|
-
comps
|
223
|
-
end
|
224
|
-
|
225
159
|
def initialize(name = nil, size = nil, unit = nil, active_substance = nil, composition= nil)
|
226
160
|
@name = remove_duplicated_spaces(name)
|
227
161
|
@pkg_size = remove_duplicated_spaces(size)
|
@@ -233,10 +167,10 @@ public
|
|
233
167
|
@measure = @galenic_form.description if @galenic_form and not @measure
|
234
168
|
@galenic_form ||= @@galenic_forms[UnknownGalenicForm]
|
235
169
|
|
236
|
-
unless
|
170
|
+
unless composition
|
237
171
|
@compositions = []
|
238
172
|
else
|
239
|
-
@compositions =
|
173
|
+
@compositions = ParseUtil.parse_compositions(composition)
|
240
174
|
end
|
241
175
|
end
|
242
176
|
|
@@ -263,9 +197,6 @@ public
|
|
263
197
|
@galenic_form.description
|
264
198
|
end
|
265
199
|
private
|
266
|
-
def capitalize(string)
|
267
|
-
string.split(/\s+/u).collect { |word| word.capitalize }.join(' ')
|
268
|
-
end
|
269
200
|
|
270
201
|
def update_rule(rulename)
|
271
202
|
@@rules_counter[rulename] ||= 0
|
@@ -362,7 +293,7 @@ public
|
|
362
293
|
end
|
363
294
|
def search_galenic_info
|
364
295
|
@substances = nil
|
365
|
-
@substances = @composition.split(/\s*,(?!\d|[^(]+\))\s*/u).collect { |name| capitalize(name) }.uniq if @composition
|
296
|
+
@substances = @composition.split(/\s*,(?!\d|[^(]+\))\s*/u).collect { |name| ParseUtil.capitalize(name) }.uniq if @composition
|
366
297
|
|
367
298
|
name = @name ? @name.clone : ''
|
368
299
|
parts = name.split(',')
|
@@ -0,0 +1,80 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
# This file is shared since oddb2xml 2.0.0 (lib/oddb2xml/parse_compositions.rb)
|
4
|
+
# with oddb.org src/plugin/parse_compositions.rb
|
5
|
+
#
|
6
|
+
# It allows an easy parsing of the column P Zusammensetzung of the swissmedic packages.xlsx file
|
7
|
+
#
|
8
|
+
|
9
|
+
module ParseUtil
|
10
|
+
SCALE_P = %r{pro\s+(?<scale>(?<qty>[\d.,]+)\s*(?<unit>[kcmuµn]?[glh]))}u
|
11
|
+
ParseComposition = Struct.new("ParseComposition", :source, :label, :label_description, :substances, :galenic_form, :route_of_administration)
|
12
|
+
ParseSubstance = Struct.new("ParseSubstance", :name, :qty, :unit, :chemical_substance, :chemical_dose)
|
13
|
+
def ParseUtil.capitalize(string)
|
14
|
+
string.split(/\s+/u).collect { |word| word.capitalize }.join(' ')
|
15
|
+
end
|
16
|
+
|
17
|
+
def ParseUtil.parse_compositions(composition)
|
18
|
+
rep_1 = '----'; to_1 = '('
|
19
|
+
rep_2 = '-----'; to_2 = ')'
|
20
|
+
rep_3 = '------'; to_3 = ','
|
21
|
+
|
22
|
+
comps = []
|
23
|
+
label_pattern = /^(?<label>A|I|B|II|C|III|D|IV|E|V|F|VI)[)]\s*(?<description>[^)]+):/
|
24
|
+
composition_text = composition.gsub(/\r\n?/u, "\n")
|
25
|
+
puts "composition_text for #{name}: #{composition_text}" if composition_text.split(/\n/u).size > 1 and $VERBOSE
|
26
|
+
lines = composition_text.split(/\n/u)
|
27
|
+
idx = 0
|
28
|
+
compositions = lines.select do |line|
|
29
|
+
if match = label_pattern.match(line)
|
30
|
+
label = match[:label]
|
31
|
+
label_description = match[:description]
|
32
|
+
else
|
33
|
+
label = nil
|
34
|
+
label_description = nil
|
35
|
+
end
|
36
|
+
idx += 1
|
37
|
+
next if idx > 1 and not label # avoid lines like 'I) et II)'
|
38
|
+
substances = []
|
39
|
+
filler = line.split(',')[-1].sub(/\.$/, '')
|
40
|
+
filler_match = /^(?<name>[^,\d]+)\s*(?<dose>[\d\-.]+(\s*(?:(Mio\.?\s*)?(U\.\s*Ph\.\s*Eur\.|[^\s,]+))))/.match(filler)
|
41
|
+
components = line.split(/([^\(]+\([^)]+\)[^,]+|),/).each {
|
42
|
+
|component|
|
43
|
+
next unless component.size > 0
|
44
|
+
to_consider = component.strip.split(':')[-1] # remove label
|
45
|
+
# very ugly hack to ignore ,()
|
46
|
+
m = /^(?<name>[^,\d()]+)\s*(?<dose>[\d\-.]+(\s*(?:(Mio\.?\s*)?(U\.\s*Ph\.\s*Eur\.|[^\s,]+))))/.match(to_consider
|
47
|
+
.gsub(to_1, rep_1).gsub(to_2, rep_2).gsub(to_3, rep_3))
|
48
|
+
if m2 = /^(|[^:]+:\s)(E\s+\d+)$/.match(component.strip)
|
49
|
+
to_add = ParseSubstance.new(m2[2], '', '')
|
50
|
+
substances << to_add
|
51
|
+
elsif m
|
52
|
+
ptrn = /(\s*(?:ut|corresp\.?)\s+(?<chemical>[^\d,]+)\s*(?<cdose>[\d\-.]+(\s*(?:(Mio\.?\s*)?(U\.\s*Ph\.\s*Eur\.|[^\s,]+))(\s*[mv]\/[mv])?))?)/
|
53
|
+
m3 = ptrn.match(component.strip)
|
54
|
+
dose = nil
|
55
|
+
unit = nil
|
56
|
+
name = m[:name].split(/\s/).collect{ |x| x.capitalize }.join(' ').strip.gsub(rep_3, to_3).gsub(rep_2, to_2).gsub(rep_1, to_1)
|
57
|
+
dose = m[:dose].split(/\b\s*(?![.,\d\-]|Mio\.?)/u, 2) if m[:dose]
|
58
|
+
if dose && (scale = SCALE_P.match(filler)) && dose[1] && !dose[1].include?('/')
|
59
|
+
unit = dose[1] << '/'
|
60
|
+
num = scale[:qty].to_f
|
61
|
+
if num <= 1
|
62
|
+
unit << scale[:unit]
|
63
|
+
else
|
64
|
+
unit << scale[:scale]
|
65
|
+
end
|
66
|
+
elsif dose.size == 2
|
67
|
+
unit = dose[1]
|
68
|
+
end
|
69
|
+
next if /\s+pro($|\s+)|emulsion|solution/i.match(name)
|
70
|
+
chemical = m3 ? capitalize(m3[:chemical]) : nil
|
71
|
+
cdose = m3 ? m3[:cdose] : nil
|
72
|
+
substances << ParseSubstance.new(name, dose ? dose[0].to_f : nil, unit ? unit.gsub(rep_3, to_3).gsub(rep_2, to_2).gsub(rep_1, to_1) : nil,
|
73
|
+
chemical, cdose)
|
74
|
+
end
|
75
|
+
}
|
76
|
+
comps << ParseComposition.new(line, label, label_description, substances) if substances.size > 0
|
77
|
+
end
|
78
|
+
comps
|
79
|
+
end
|
80
|
+
end
|
data/lib/oddb2xml/version.rb
CHANGED
data/spec/calc_spec.rb
CHANGED
@@ -314,6 +314,7 @@ Corresp. 5300 kJ.",
|
|
314
314
|
XPath.match( doc, "//ARTICLE[GTIN='7680545250363']/COMPOSITIONS/COMPOSITION/SUBSTANCES/SUBSTANCE/SUBSTANCE_NAME").last.text.should eq 'Alprostadilum'
|
315
315
|
XPath.match( doc, "//ARTICLE[GTIN='7680458820202']/NAME").last.text.should eq 'Magnesiumchlorid 0,5 molar B. Braun, Zusatzampulle für Infusionslösungen'
|
316
316
|
XPath.match( doc, "//ARTICLE[GTIN='7680555940018']/COMPOSITIONS/COMPOSITION/LABEL").first.text.should eq 'I'
|
317
|
+
XPath.match( doc, "//ARTICLE[GTIN='7680555940018']/COMPOSITIONS/COMPOSITION/LABEL_DESCRIPTION").first.text.should eq 'Glucoselösung'
|
317
318
|
XPath.match( doc, "//ARTICLE[GTIN='7680555940018']/COMPOSITIONS/COMPOSITION/LABEL").last.text.should eq 'III'
|
318
319
|
end
|
319
320
|
end
|
@@ -423,6 +424,7 @@ Corresp. 5300 kJ.",
|
|
423
424
|
|
424
425
|
specify { expect(result.compositions[0].source).to eq Line_1}
|
425
426
|
specify { expect(result.compositions[0].label).to eq 'I'}
|
427
|
+
specify { expect(result.compositions[0].label_description).to eq 'Glucoselösung'}
|
426
428
|
specify { expect(result.compositions[1].label).to eq 'II' }
|
427
429
|
specify { expect(result.compositions[2].label).to eq 'III' }
|
428
430
|
glucosum = result.compositions.first.substances.first
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: oddb2xml
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.0.
|
4
|
+
version: 2.0.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Yasuhiro Asaka, Zeno R.R. Davatz
|
@@ -221,6 +221,7 @@ files:
|
|
221
221
|
- lib/oddb2xml/downloader.rb
|
222
222
|
- lib/oddb2xml/extractor.rb
|
223
223
|
- lib/oddb2xml/options.rb
|
224
|
+
- lib/oddb2xml/parse_compositions.rb
|
224
225
|
- lib/oddb2xml/util.rb
|
225
226
|
- lib/oddb2xml/version.rb
|
226
227
|
- lib/oddb2xml/xml_definitions.rb
|