oddb2xml 2.0.0 → 2.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 3a0dff7ea1897c93913176510675ffb84641763c
4
- data.tar.gz: ea3b174167d1a38862cf5eba377989c3ed8244f2
3
+ metadata.gz: 82754e27fde87282a1643f1acb81a8e30f2bee99
4
+ data.tar.gz: 13ae8919c8572bf3d8bf3261802a096bbab9ead2
5
5
  SHA512:
6
- metadata.gz: ab2bef6593e7fbd5957d97d0e7fe42808527585ad63af64d689630fb29650ce7d0b4fd2cf208e7e81ff9e2bc66a807f4cb2ed6c05831cd80daff0a71427415a8
7
- data.tar.gz: 61757a8be3030715495f361467febf3f991e6669637b5cefe2cb967739d20009f2feca35c0ac12e88a2e9aafda421e4c3c7db654f1f4a085d77f302a8016bc91
6
+ metadata.gz: 74e386dab573b605b60e3e4fd92740f1ab482b546e41998bd51e60105361bb8c2164723098a24bec2c64e43ff4c338612c6212806c22748c94077788bca59967
7
+ data.tar.gz: 785ab82c497388b1278c5dba170e9318fdec62fe223fa4fa4a7aa22be3100b3094339d74d29bf7f9bb03f8fa758c8e2d9131d6e2d08ea09d3ae9873ff4a34572
data/History.txt CHANGED
@@ -1,3 +1,7 @@
1
+ === 2.0.1 / 17.03.2015
2
+
3
+ * Readded description for label
4
+
1
5
  === 2.0.0 / 17.03.2015
2
6
 
3
7
  * Adding CHEMICAL_SUBSTANCE/DOSE for ut|corresponding
@@ -681,6 +681,7 @@ module Oddb2xml
681
681
  xml.COMPOSITION {
682
682
  # xml.SOURCE composition.source # emit this if you want to debug the results
683
683
  xml.LABEL composition.label if composition.label
684
+ xml.LABEL_DESCRIPTION composition.label_description if composition.label_description
684
685
  xml.SUBSTANCES {
685
686
  composition.substances.each { |substance|
686
687
  xml.SUBSTANCE {
data/lib/oddb2xml/calc.rb CHANGED
@@ -1,6 +1,7 @@
1
1
  # encoding: utf-8
2
2
 
3
3
  require 'oddb2xml/util'
4
+ require 'oddb2xml/parse_compositions'
4
5
  require 'yaml'
5
6
 
6
7
  module Oddb2xml
@@ -150,78 +151,11 @@ module Oddb2xml
150
151
  "\n\n\nColumn Präparateliste has everywhere a name\n"
151
152
  end
152
153
  end
153
- public
154
- SCALE_P = %r{pro\s+(?<scale>(?<qty>[\d.,]+)\s*(?<unit>[kcmuµn]?[glh]))}u
155
154
  private
156
155
  def remove_duplicated_spaces(string)
157
156
  string ? string.to_s.gsub(/\s\s+/, ' ') : nil
158
157
  end
159
158
  public
160
- # Update of active substances, etc picked up from oddb.org/src/plugin/swissmedic.rb update_compositions
161
- Composition = Struct.new("Composition", :source, :label, :substances, :galenic_form, :route_of_administration)
162
- Substance = Struct.new("Substance", :name, :qty, :unit, :chemical_substance, :chemical_dose)
163
- def update_compositions(active_substance)
164
- rep_1 = '----'; to_1 = '('
165
- rep_2 = '-----'; to_2 = ')'
166
- rep_3 = '------'; to_3 = ','
167
-
168
- comps = []
169
- label_pattern = /^(?<label>A|I|B|II|C|III|D|IV|E|V|F|VI)[)]\s*(?<designation>[^)]+):/
170
- composition_text = composition.gsub(/\r\n?/u, "\n")
171
- puts "composition_text for #{name}: #{composition_text}" if composition_text.split(/\n/u).size > 1 and $VERBOSE
172
- lines = composition_text.split(/\n/u)
173
- idx = 0
174
- compositions = lines.select do |line|
175
- if match = label_pattern.match(line)
176
- label = match[:label]
177
- else
178
- label = nil
179
- end
180
- idx += 1
181
- next if idx > 1 and not label # avoid lines like 'I) et II)'
182
- substances = []
183
- filler = line.split(',')[-1].sub(/\.$/, '')
184
- filler_match = /^(?<name>[^,\d]+)\s*(?<dose>[\d\-.]+(\s*(?:(Mio\.?\s*)?(U\.\s*Ph\.\s*Eur\.|[^\s,]+))))/.match(filler)
185
- components = line.split(/([^\(]+\([^)]+\)[^,]+|),/).each {
186
- |component|
187
- next unless component.size > 0
188
- to_consider = component.strip.split(':')[-1] # remove label
189
- # very ugly hack to ignore ,()
190
- m = /^(?<name>[^,\d()]+)\s*(?<dose>[\d\-.]+(\s*(?:(Mio\.?\s*)?(U\.\s*Ph\.\s*Eur\.|[^\s,]+))))/.match(to_consider
191
- .gsub(to_1, rep_1).gsub(to_2, rep_2).gsub(to_3, rep_3))
192
- if m2 = /^(|[^:]+:\s)(E\s+\d+)$/.match(component.strip)
193
- to_add = Substance.new(m2[2], '', '')
194
- substances << to_add
195
- elsif m
196
- ptrn = /(\s*(?:ut|corresp\.?)\s+(?<chemical>[^\d,]+)\s*(?<cdose>[\d\-.]+(\s*(?:(Mio\.?\s*)?(U\.\s*Ph\.\s*Eur\.|[^\s,]+))(\s*[mv]\/[mv])?))?)/
197
- m3 = ptrn.match(component.strip)
198
- dose = nil
199
- unit = nil
200
- name = m[:name].split(/\s/).collect{ |x| x.capitalize }.join(' ').strip.gsub(rep_3, to_3).gsub(rep_2, to_2).gsub(rep_1, to_1)
201
- dose = m[:dose].split(/\b\s*(?![.,\d\-]|Mio\.?)/u, 2) if m[:dose]
202
- if dose && (scale = SCALE_P.match(filler)) && dose[1] && !dose[1].include?('/')
203
- unit = dose[1] << '/'
204
- num = scale[:qty].to_f
205
- if num <= 1
206
- unit << scale[:unit]
207
- else
208
- unit << scale[:scale]
209
- end
210
- elsif dose.size == 2
211
- unit = dose[1]
212
- end
213
- next if /\s+pro($|\s+)|emulsion|solution/i.match(name)
214
- chemical = m3 ? capitalize(m3[:chemical]) : nil
215
- cdose = m3 ? m3[:cdose] : nil
216
- substances << Substance.new(name, dose ? dose[0].to_f : nil, unit ? unit.gsub(rep_3, to_3).gsub(rep_2, to_2).gsub(rep_1, to_1) : nil,
217
- chemical, cdose)
218
- end
219
- }
220
- comps << Composition.new(line, label, substances) if substances.size > 0
221
- end
222
- comps
223
- end
224
-
225
159
  def initialize(name = nil, size = nil, unit = nil, active_substance = nil, composition= nil)
226
160
  @name = remove_duplicated_spaces(name)
227
161
  @pkg_size = remove_duplicated_spaces(size)
@@ -233,10 +167,10 @@ public
233
167
  @measure = @galenic_form.description if @galenic_form and not @measure
234
168
  @galenic_form ||= @@galenic_forms[UnknownGalenicForm]
235
169
 
236
- unless active_substance
170
+ unless composition
237
171
  @compositions = []
238
172
  else
239
- @compositions = update_compositions(active_substance)
173
+ @compositions = ParseUtil.parse_compositions(composition)
240
174
  end
241
175
  end
242
176
 
@@ -263,9 +197,6 @@ public
263
197
  @galenic_form.description
264
198
  end
265
199
  private
266
- def capitalize(string)
267
- string.split(/\s+/u).collect { |word| word.capitalize }.join(' ')
268
- end
269
200
 
270
201
  def update_rule(rulename)
271
202
  @@rules_counter[rulename] ||= 0
@@ -362,7 +293,7 @@ public
362
293
  end
363
294
  def search_galenic_info
364
295
  @substances = nil
365
- @substances = @composition.split(/\s*,(?!\d|[^(]+\))\s*/u).collect { |name| capitalize(name) }.uniq if @composition
296
+ @substances = @composition.split(/\s*,(?!\d|[^(]+\))\s*/u).collect { |name| ParseUtil.capitalize(name) }.uniq if @composition
366
297
 
367
298
  name = @name ? @name.clone : ''
368
299
  parts = name.split(',')
@@ -0,0 +1,80 @@
1
+ # encoding: utf-8
2
+
3
+ # This file is shared since oddb2xml 2.0.0 (lib/oddb2xml/parse_compositions.rb)
4
+ # with oddb.org src/plugin/parse_compositions.rb
5
+ #
6
+ # It allows an easy parsing of the column P Zusammensetzung of the swissmedic packages.xlsx file
7
+ #
8
+
9
+ module ParseUtil
10
+ SCALE_P = %r{pro\s+(?<scale>(?<qty>[\d.,]+)\s*(?<unit>[kcmuµn]?[glh]))}u
11
+ ParseComposition = Struct.new("ParseComposition", :source, :label, :label_description, :substances, :galenic_form, :route_of_administration)
12
+ ParseSubstance = Struct.new("ParseSubstance", :name, :qty, :unit, :chemical_substance, :chemical_dose)
13
+ def ParseUtil.capitalize(string)
14
+ string.split(/\s+/u).collect { |word| word.capitalize }.join(' ')
15
+ end
16
+
17
+ def ParseUtil.parse_compositions(composition)
18
+ rep_1 = '----'; to_1 = '('
19
+ rep_2 = '-----'; to_2 = ')'
20
+ rep_3 = '------'; to_3 = ','
21
+
22
+ comps = []
23
+ label_pattern = /^(?<label>A|I|B|II|C|III|D|IV|E|V|F|VI)[)]\s*(?<description>[^)]+):/
24
+ composition_text = composition.gsub(/\r\n?/u, "\n")
25
+ puts "composition_text for #{name}: #{composition_text}" if composition_text.split(/\n/u).size > 1 and $VERBOSE
26
+ lines = composition_text.split(/\n/u)
27
+ idx = 0
28
+ compositions = lines.select do |line|
29
+ if match = label_pattern.match(line)
30
+ label = match[:label]
31
+ label_description = match[:description]
32
+ else
33
+ label = nil
34
+ label_description = nil
35
+ end
36
+ idx += 1
37
+ next if idx > 1 and not label # avoid lines like 'I) et II)'
38
+ substances = []
39
+ filler = line.split(',')[-1].sub(/\.$/, '')
40
+ filler_match = /^(?<name>[^,\d]+)\s*(?<dose>[\d\-.]+(\s*(?:(Mio\.?\s*)?(U\.\s*Ph\.\s*Eur\.|[^\s,]+))))/.match(filler)
41
+ components = line.split(/([^\(]+\([^)]+\)[^,]+|),/).each {
42
+ |component|
43
+ next unless component.size > 0
44
+ to_consider = component.strip.split(':')[-1] # remove label
45
+ # very ugly hack to ignore ,()
46
+ m = /^(?<name>[^,\d()]+)\s*(?<dose>[\d\-.]+(\s*(?:(Mio\.?\s*)?(U\.\s*Ph\.\s*Eur\.|[^\s,]+))))/.match(to_consider
47
+ .gsub(to_1, rep_1).gsub(to_2, rep_2).gsub(to_3, rep_3))
48
+ if m2 = /^(|[^:]+:\s)(E\s+\d+)$/.match(component.strip)
49
+ to_add = ParseSubstance.new(m2[2], '', '')
50
+ substances << to_add
51
+ elsif m
52
+ ptrn = /(\s*(?:ut|corresp\.?)\s+(?<chemical>[^\d,]+)\s*(?<cdose>[\d\-.]+(\s*(?:(Mio\.?\s*)?(U\.\s*Ph\.\s*Eur\.|[^\s,]+))(\s*[mv]\/[mv])?))?)/
53
+ m3 = ptrn.match(component.strip)
54
+ dose = nil
55
+ unit = nil
56
+ name = m[:name].split(/\s/).collect{ |x| x.capitalize }.join(' ').strip.gsub(rep_3, to_3).gsub(rep_2, to_2).gsub(rep_1, to_1)
57
+ dose = m[:dose].split(/\b\s*(?![.,\d\-]|Mio\.?)/u, 2) if m[:dose]
58
+ if dose && (scale = SCALE_P.match(filler)) && dose[1] && !dose[1].include?('/')
59
+ unit = dose[1] << '/'
60
+ num = scale[:qty].to_f
61
+ if num <= 1
62
+ unit << scale[:unit]
63
+ else
64
+ unit << scale[:scale]
65
+ end
66
+ elsif dose.size == 2
67
+ unit = dose[1]
68
+ end
69
+ next if /\s+pro($|\s+)|emulsion|solution/i.match(name)
70
+ chemical = m3 ? capitalize(m3[:chemical]) : nil
71
+ cdose = m3 ? m3[:cdose] : nil
72
+ substances << ParseSubstance.new(name, dose ? dose[0].to_f : nil, unit ? unit.gsub(rep_3, to_3).gsub(rep_2, to_2).gsub(rep_1, to_1) : nil,
73
+ chemical, cdose)
74
+ end
75
+ }
76
+ comps << ParseComposition.new(line, label, label_description, substances) if substances.size > 0
77
+ end
78
+ comps
79
+ end
80
+ end
@@ -1,3 +1,3 @@
1
1
  module Oddb2xml
2
- VERSION = "2.0.0"
2
+ VERSION = "2.0.1"
3
3
  end
data/spec/calc_spec.rb CHANGED
@@ -314,6 +314,7 @@ Corresp. 5300 kJ.",
314
314
  XPath.match( doc, "//ARTICLE[GTIN='7680545250363']/COMPOSITIONS/COMPOSITION/SUBSTANCES/SUBSTANCE/SUBSTANCE_NAME").last.text.should eq 'Alprostadilum'
315
315
  XPath.match( doc, "//ARTICLE[GTIN='7680458820202']/NAME").last.text.should eq 'Magnesiumchlorid 0,5 molar B. Braun, Zusatzampulle für Infusionslösungen'
316
316
  XPath.match( doc, "//ARTICLE[GTIN='7680555940018']/COMPOSITIONS/COMPOSITION/LABEL").first.text.should eq 'I'
317
+ XPath.match( doc, "//ARTICLE[GTIN='7680555940018']/COMPOSITIONS/COMPOSITION/LABEL_DESCRIPTION").first.text.should eq 'Glucoselösung'
317
318
  XPath.match( doc, "//ARTICLE[GTIN='7680555940018']/COMPOSITIONS/COMPOSITION/LABEL").last.text.should eq 'III'
318
319
  end
319
320
  end
@@ -423,6 +424,7 @@ Corresp. 5300 kJ.",
423
424
 
424
425
  specify { expect(result.compositions[0].source).to eq Line_1}
425
426
  specify { expect(result.compositions[0].label).to eq 'I'}
427
+ specify { expect(result.compositions[0].label_description).to eq 'Glucoselösung'}
426
428
  specify { expect(result.compositions[1].label).to eq 'II' }
427
429
  specify { expect(result.compositions[2].label).to eq 'III' }
428
430
  glucosum = result.compositions.first.substances.first
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: oddb2xml
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.0
4
+ version: 2.0.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Yasuhiro Asaka, Zeno R.R. Davatz
@@ -221,6 +221,7 @@ files:
221
221
  - lib/oddb2xml/downloader.rb
222
222
  - lib/oddb2xml/extractor.rb
223
223
  - lib/oddb2xml/options.rb
224
+ - lib/oddb2xml/parse_compositions.rb
224
225
  - lib/oddb2xml/util.rb
225
226
  - lib/oddb2xml/version.rb
226
227
  - lib/oddb2xml/xml_definitions.rb