oddb2xml 2.0.5 → 2.0.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +13 -5
- data/Gemfile.lock +26 -19
- data/History.txt +5 -0
- data/dokumentation_calc.textile +50 -0
- data/lib/oddb2xml/builder.rb +9 -4
- data/lib/oddb2xml/calc.rb +1 -1
- data/lib/oddb2xml/cli.rb +1 -0
- data/lib/oddb2xml/compositions_syntax.rb +368 -0
- data/lib/oddb2xml/extractor.rb +13 -4
- data/lib/oddb2xml/parslet_compositions.rb +598 -0
- data/lib/oddb2xml/version.rb +1 -1
- data/oddb2xml.gemspec +1 -0
- data/spec/builder_spec.rb +1 -1
- data/spec/calc_spec.rb +102 -121
- data/spec/composition_syntax_spec.rb +502 -0
- data/spec/data/compositions.txt +8937 -0
- data/spec/data/swissmedic_package-galenic.xlsx +0 -0
- data/spec/data/zurrose_transfer.dat +5 -0
- data/spec/extractor_spec.rb +40 -0
- data/spec/parslet_spec.rb +1268 -0
- data/spec/spec_helper.rb +8 -0
- metadata +56 -34
- data/lib/oddb2xml/parse_compositions.rb +0 -106
@@ -0,0 +1,598 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
# This file is shared since oddb2xml 2.0.0 (lib/oddb2xml/parse_compositions.rb)
|
4
|
+
# with oddb.org src/plugin/parse_compositions.rb
|
5
|
+
#
|
6
|
+
# It allows an easy parsing of the column P Zusammensetzung of the swissmedic packages.xlsx file
|
7
|
+
#
|
8
|
+
|
9
|
+
require 'parslet'
|
10
|
+
require 'parslet/convenience'
|
11
|
+
require 'oddb2xml/compositions_syntax'
|
12
|
+
include Parslet
|
13
|
+
VERBOSE_MESSAGES = false
|
14
|
+
|
15
|
+
module ParseUtil
|
16
|
+
# this class is responsible to patch errors in swissmedic entries after
|
17
|
+
# oddb.org detected them, as it takes sometimes a few days (or more) till they get corrected
|
18
|
+
# Reports the number of occurrences of each entry
|
19
|
+
class HandleSwissmedicErrors
|
20
|
+
|
21
|
+
attr_accessor :nrParsingErrors
|
22
|
+
class ErrorEntry < Struct.new('ErrorEntry', :pattern, :replacement, :nr_occurrences)
|
23
|
+
end
|
24
|
+
|
25
|
+
def reset_errors
|
26
|
+
@errors = []
|
27
|
+
@nrLines = 0
|
28
|
+
@nrParsingErrors = 0
|
29
|
+
end
|
30
|
+
|
31
|
+
# error_entries should be a hash of pattern, replacement
|
32
|
+
def initialize(error_entries)
|
33
|
+
reset_errors
|
34
|
+
error_entries.each{ |pattern, replacement| @errors << ErrorEntry.new(pattern, replacement, 0) }
|
35
|
+
end
|
36
|
+
|
37
|
+
def report
|
38
|
+
s = ["Report of changed compositions in #{@nrLines} lines. Had #{@nrParsingErrors} parsing errors" ]
|
39
|
+
@errors.each {
|
40
|
+
|entry|
|
41
|
+
s << " replaced #{entry.nr_occurrences} times '#{entry.pattern}' by '#{entry.replacement}'"
|
42
|
+
}
|
43
|
+
s
|
44
|
+
end
|
45
|
+
|
46
|
+
def apply_fixes(string)
|
47
|
+
result = string.clone
|
48
|
+
@errors.each{
|
49
|
+
|entry|
|
50
|
+
intermediate = result.clone
|
51
|
+
result = result.gsub(entry.pattern, entry.replacement)
|
52
|
+
unless result.eql?(intermediate)
|
53
|
+
entry.nr_occurrences += 1
|
54
|
+
puts "Fixed #{result}" if VERBOSE_MESSAGES
|
55
|
+
end
|
56
|
+
}
|
57
|
+
@nrLines += 1
|
58
|
+
result
|
59
|
+
end
|
60
|
+
# hepar sulfuris D6 2,2 mg hypericum perforatum D2 0,66 mg where itlacks a comma and should be hepar sulfuris D6 2,2 mg, hypericum perforatum D2 0,66 mg
|
61
|
+
end
|
62
|
+
|
63
|
+
def ParseUtil.capitalize(string)
|
64
|
+
string.split(/\s+/u).collect { |word| word.capitalize }.join(' ').strip
|
65
|
+
end
|
66
|
+
|
67
|
+
def ParseUtil.parse_compositions(composition_text, active_agents_string = '')
|
68
|
+
active_agents = active_agents_string ? active_agents_string.downcase.split(/,\s+/) : []
|
69
|
+
comps = []
|
70
|
+
lines = composition_text.gsub(/\r\n?/u, "\n").split(/\n/u)
|
71
|
+
lines.select {
|
72
|
+
|line|
|
73
|
+
composition = ParseComposition.from_string(line)
|
74
|
+
if composition.is_a?(ParseComposition)
|
75
|
+
composition.substances.each {
|
76
|
+
|substance_item|
|
77
|
+
substance_item.is_active_agent = (active_agents.find {|x| x.downcase.eql?(substance_item.name.downcase) } != nil)
|
78
|
+
substance_item.is_active_agent = true if substance_item.chemical_substance and active_agents.find {|x| x.downcase.eql?(substance_item.chemical_substance.name.downcase) }
|
79
|
+
}
|
80
|
+
comps << composition
|
81
|
+
end
|
82
|
+
}
|
83
|
+
comps << ParseComposition.new(composition_text.split(/,|:|\(/)[0]) if comps.size == 0
|
84
|
+
comps
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
class IntLit < Struct.new(:int)
|
89
|
+
def eval; int.to_i; end
|
90
|
+
end
|
91
|
+
class QtyLit < Struct.new(:qty)
|
92
|
+
def eval; qty.to_i; end
|
93
|
+
end
|
94
|
+
|
95
|
+
class CompositionTransformer < Parslet::Transform
|
96
|
+
rule(:int => simple(:int)) { IntLit.new(int) }
|
97
|
+
rule(:number => simple(:nb)) {
|
98
|
+
nb.match(/[eE\.]/) ? Float(nb) : Integer(nb)
|
99
|
+
}
|
100
|
+
rule(
|
101
|
+
:qty_range => simple(:qty_range),
|
102
|
+
:unit => simple(:unit)) {
|
103
|
+
ParseDose.new(qty_range, unit)
|
104
|
+
}
|
105
|
+
rule(
|
106
|
+
:qty_range => simple(:qty_range)) {
|
107
|
+
ParseDose.new(qty_range)
|
108
|
+
}
|
109
|
+
rule(
|
110
|
+
:qty => simple(:qty),
|
111
|
+
:unit => simple(:unit)) {
|
112
|
+
ParseDose.new(qty, unit)
|
113
|
+
}
|
114
|
+
rule(
|
115
|
+
:unit => simple(:unit)) { ParseDose.new(nil, unit) }
|
116
|
+
rule(
|
117
|
+
:qty => simple(:qty)) { ParseDose.new(qty, nil) }
|
118
|
+
|
119
|
+
@@substances ||= []
|
120
|
+
@@excipiens = nil
|
121
|
+
def CompositionTransformer.clear_substances
|
122
|
+
@@substances = []
|
123
|
+
@@excipiens = nil
|
124
|
+
@@corresp = nil
|
125
|
+
end
|
126
|
+
def CompositionTransformer.substances
|
127
|
+
@@substances.clone
|
128
|
+
end
|
129
|
+
def CompositionTransformer.excipiens
|
130
|
+
@@excipiens ? @@excipiens.clone : nil
|
131
|
+
end
|
132
|
+
def CompositionTransformer.corresp
|
133
|
+
@@corresp ? @@corresp.clone : nil
|
134
|
+
end
|
135
|
+
|
136
|
+
rule(:ratio => simple(:ratio) ) {
|
137
|
+
|dictionary|
|
138
|
+
puts "#{File.basename(__FILE__)}:#{__LINE__}: dictionary #{dictionary}" if VERBOSE_MESSAGES
|
139
|
+
@@substances.last.more_info = dictionary[:ratio].to_s if @@substances.last
|
140
|
+
}
|
141
|
+
rule(:substance => sequence(:substance),
|
142
|
+
:ratio => simple(:ratio)) {
|
143
|
+
|dictionary|
|
144
|
+
puts "#{File.basename(__FILE__)}:#{__LINE__}: dictionary #{dictionary}" if VERBOSE_MESSAGES
|
145
|
+
@@substances.last.more_info = dictionary[:ratio].to_s if @@substances.last
|
146
|
+
}
|
147
|
+
|
148
|
+
rule(:solvens => simple(:solvens) ) {
|
149
|
+
|dictionary|
|
150
|
+
puts "#{File.basename(__FILE__)}:#{__LINE__}: dictionary #{dictionary}" if VERBOSE_MESSAGES
|
151
|
+
substance = ParseSubstance.new(dictionary[:solvens].to_s)
|
152
|
+
substance.more_info = 'Solvens'
|
153
|
+
@@substances << substance
|
154
|
+
}
|
155
|
+
rule(:lebensmittel_zusatz => simple(:lebensmittel_zusatz),
|
156
|
+
:more_info => simple(:more_info),
|
157
|
+
:digits => simple(:digits)) {
|
158
|
+
|dictionary|
|
159
|
+
puts "#{File.basename(__FILE__)}:#{__LINE__}: dictionary #{dictionary}" if VERBOSE_MESSAGES
|
160
|
+
substance = ParseSubstance.new("#{dictionary[:lebensmittel_zusatz]} #{dictionary[:digits]}")
|
161
|
+
substance.more_info = dictionary[:more_info].to_s.sub(/:$/, '')
|
162
|
+
@@substances << substance
|
163
|
+
}
|
164
|
+
rule(:lebensmittel_zusatz => simple(:lebensmittel_zusatz),
|
165
|
+
:digits => simple(:digits)) {
|
166
|
+
|dictionary|
|
167
|
+
puts "#{File.basename(__FILE__)}:#{__LINE__}: dictionary #{dictionary}" if VERBOSE_MESSAGES
|
168
|
+
@@substances << ParseSubstance.new("#{dictionary[:lebensmittel_zusatz]} #{dictionary[:digits]}")
|
169
|
+
dictionary[:substance]
|
170
|
+
}
|
171
|
+
rule(:substance => simple(:substance)) {
|
172
|
+
|dictionary|
|
173
|
+
puts "#{File.basename(__FILE__)}:#{__LINE__}: dictionary #{dictionary}" if VERBOSE_MESSAGES
|
174
|
+
}
|
175
|
+
rule(:substance_name => simple(:substance_name),
|
176
|
+
:dose => simple(:dose),
|
177
|
+
) {
|
178
|
+
|dictionary|
|
179
|
+
puts "#{File.basename(__FILE__)}:#{__LINE__}: dictionary #{dictionary}" if VERBOSE_MESSAGES
|
180
|
+
@@substances << ParseSubstance.new(dictionary[:substance_name].to_s, dictionary[:dose])
|
181
|
+
}
|
182
|
+
rule(:substance_ut => sequence(:substance_ut),
|
183
|
+
) {
|
184
|
+
|dictionary|
|
185
|
+
puts "#{File.basename(__FILE__)}:#{__LINE__}: dictionary #{dictionary}" if VERBOSE_MESSAGES
|
186
|
+
nil
|
187
|
+
}
|
188
|
+
rule(:for_ut => sequence(:for_ut),
|
189
|
+
) {
|
190
|
+
|dictionary|
|
191
|
+
puts "#{File.basename(__FILE__)}:#{__LINE__}: dictionary #{dictionary}" if VERBOSE_MESSAGES
|
192
|
+
if dictionary[:for_ut].size > 1
|
193
|
+
@@substances[-2].salts << dictionary[:for_ut].last.clone
|
194
|
+
@@substances.delete(dictionary[:for_ut].last)
|
195
|
+
end
|
196
|
+
nil
|
197
|
+
}
|
198
|
+
|
199
|
+
rule(:substance_name => simple(:substance_name),
|
200
|
+
:dose => simple(:dose),
|
201
|
+
:substance_corresp => sequence(:substance_corresp),
|
202
|
+
) {
|
203
|
+
|dictionary|
|
204
|
+
puts "#{File.basename(__FILE__)}:#{__LINE__}: dictionary #{dictionary}" if VERBOSE_MESSAGES
|
205
|
+
substance = ParseSubstance.new(dictionary[:substance_name].to_s, dictionary[:dose])
|
206
|
+
substance.chemical_substance = @@substances.last
|
207
|
+
@@substances.delete_at(-1)
|
208
|
+
@@substances << substance
|
209
|
+
}
|
210
|
+
|
211
|
+
rule(:mineralia => simple(:mineralia),
|
212
|
+
:more_info => simple(:more_info),
|
213
|
+
:substance_name => simple(:substance_name),
|
214
|
+
:dose => simple(:dose),
|
215
|
+
) {
|
216
|
+
|dictionary|
|
217
|
+
puts "#{File.basename(__FILE__)}:#{__LINE__}: dictionary #{dictionary}" if VERBOSE_MESSAGES
|
218
|
+
substance = ParseSubstance.new(dictionary[:substance_name].to_s, dictionary[:dose])
|
219
|
+
substance.more_info = dictionary[:mineralia].to_s + ' ' + dictionary[:more_info].to_s
|
220
|
+
# TODO: fix alia
|
221
|
+
@@substances << substance
|
222
|
+
}
|
223
|
+
rule(:substance_name => simple(:substance_name),
|
224
|
+
:conserv => simple(:conserv),
|
225
|
+
:dose => simple(:dose),
|
226
|
+
) {
|
227
|
+
|dictionary|
|
228
|
+
puts "#{File.basename(__FILE__)}:#{__LINE__}: dictionary #{dictionary}"
|
229
|
+
substance = ParseSubstance.new(dictionary[:substance_name], ParseDose.new(dictionary[:dose].to_s))
|
230
|
+
@@substances << substance
|
231
|
+
substance.more_info = dictionary[:conserv].to_s.sub(/:$/, '')
|
232
|
+
}
|
233
|
+
|
234
|
+
rule(:substance_name => simple(:substance_name),
|
235
|
+
:mineralia => simple(:mineralia),
|
236
|
+
) {
|
237
|
+
|dictionary|
|
238
|
+
puts "#{File.basename(__FILE__)}:#{__LINE__}: dictionary #{dictionary}"
|
239
|
+
substance = ParseSubstance.new(dictionary[:substance_name])
|
240
|
+
substance.more_info = dictionary[:mineralia].to_s.sub(/:$/, '')
|
241
|
+
@@substances << substance
|
242
|
+
}
|
243
|
+
rule(:substance_name => simple(:substance_name),
|
244
|
+
:more_info => simple(:more_info),
|
245
|
+
) {
|
246
|
+
|dictionary|
|
247
|
+
puts "#{File.basename(__FILE__)}:#{__LINE__}: dictionary #{dictionary}" if VERBOSE_MESSAGES
|
248
|
+
substance = ParseSubstance.new(dictionary[:substance_name])
|
249
|
+
@@substances << substance
|
250
|
+
substance.more_info = dictionary[:more_info].to_s.sub(/:$/, '')
|
251
|
+
}
|
252
|
+
rule(:substance_name => simple(:substance_name),
|
253
|
+
:residui => simple(:residui),
|
254
|
+
) {
|
255
|
+
|dictionary|
|
256
|
+
puts "#{File.basename(__FILE__)}:#{__LINE__}: dictionary #{dictionary}" if VERBOSE_MESSAGES
|
257
|
+
binding.pry
|
258
|
+
substance = ParseSubstance.new(dictionary[:substance_name])
|
259
|
+
@@substances << substance
|
260
|
+
substance.more_info = dictionary[:residui].to_s.sub(/:$/, '')
|
261
|
+
}
|
262
|
+
rule(:qty => simple(:qty),
|
263
|
+
:unit => simple(:unit),
|
264
|
+
:dose_right => simple(:dose_right),
|
265
|
+
) {
|
266
|
+
|dictionary|
|
267
|
+
puts "#{File.basename(__FILE__)}:#{__LINE__}: dictionary #{dictionary}" if VERBOSE_MESSAGES
|
268
|
+
ParseDose.new(dictionary[:qty].to_s, dictionary[:unit].to_s + ' et ' + dictionary[:dose_right].to_s )
|
269
|
+
}
|
270
|
+
|
271
|
+
rule(:substance_name => simple(:substance_name),
|
272
|
+
:qty => simple(:qty),
|
273
|
+
) {
|
274
|
+
|dictionary|
|
275
|
+
puts "#{File.basename(__FILE__)}:#{__LINE__}: dictionary #{dictionary}"
|
276
|
+
@@substances << ParseSubstance.new(dictionary[:substance_name].to_s.strip, ParseDose.new(dictionary[:qty].to_s))
|
277
|
+
}
|
278
|
+
|
279
|
+
rule(:substance_name => simple(:substance_name),
|
280
|
+
:dose_corresp => simple(:dose_corresp),
|
281
|
+
) {
|
282
|
+
|dictionary|
|
283
|
+
puts "#{File.basename(__FILE__)}:#{__LINE__}: dictionary #{dictionary}"
|
284
|
+
@@substances << ParseSubstance.new(dictionary[:substance_name].to_s, dictionary[:dose_corresp])
|
285
|
+
}
|
286
|
+
rule(:description => simple(:description),
|
287
|
+
:substance_name => simple(:substance_name),
|
288
|
+
:qty => simple(:qty),
|
289
|
+
:more_info => simple(:more_info),
|
290
|
+
) {
|
291
|
+
|dictionary|
|
292
|
+
puts "#{File.basename(__FILE__)}:#{__LINE__}: dictionary #{dictionary}" if VERBOSE_MESSAGES
|
293
|
+
substance = ParseSubstance.new(dictionary[:substance_name], ParseDose.new(dictionary[:qty].to_s))
|
294
|
+
@@substances << substance
|
295
|
+
substance.more_info = dictionary[:more_info].to_s
|
296
|
+
substance.description = dictionary[:description].to_s
|
297
|
+
substance
|
298
|
+
}
|
299
|
+
rule(:der => simple(:der),
|
300
|
+
) {
|
301
|
+
|dictionary|
|
302
|
+
puts "#{File.basename(__FILE__)}:#{__LINE__}: dictionary #{dictionary}" if VERBOSE_MESSAGES
|
303
|
+
@@substances << ParseSubstance.new(dictionary[:der].to_s)
|
304
|
+
}
|
305
|
+
rule(:der => simple(:der),
|
306
|
+
:substance_corresp => sequence(:substance_corresp),
|
307
|
+
) {
|
308
|
+
|dictionary|
|
309
|
+
puts "#{File.basename(__FILE__)}:#{__LINE__}: dictionary #{dictionary}" if VERBOSE_MESSAGES
|
310
|
+
substance = ParseSubstance.new(dictionary[:der].to_s)
|
311
|
+
substance.chemical_substance = @@substances.last
|
312
|
+
@@substances.delete_at(-1)
|
313
|
+
@@substances << substance
|
314
|
+
}
|
315
|
+
rule(:histamin => simple(:histamin),
|
316
|
+
) {
|
317
|
+
|dictionary|
|
318
|
+
puts "#{File.basename(__FILE__)}:#{__LINE__}: histamin dictionary #{dictionary}"
|
319
|
+
@@substances << ParseSubstance.new(dictionary[:histamin].to_s)
|
320
|
+
}
|
321
|
+
rule(:substance_name => simple(:substance_name),
|
322
|
+
) {
|
323
|
+
|dictionary|
|
324
|
+
puts "#{File.basename(__FILE__)}:#{__LINE__}: dictionary #{dictionary}" if VERBOSE_MESSAGES
|
325
|
+
@@substances << ParseSubstance.new(dictionary[:substance_name].to_s)
|
326
|
+
}
|
327
|
+
rule(:one_substance => sequence(:one_substance)) {
|
328
|
+
|dictionary|
|
329
|
+
puts "#{File.basename(__FILE__)}:#{__LINE__}: dictionary #{dictionary}"
|
330
|
+
@@substances << ParseSubstance.new(dictionary[:one_substance])
|
331
|
+
}
|
332
|
+
rule(:one_substance => sequence(:one_substance)) {
|
333
|
+
|dictionary|
|
334
|
+
puts "#{File.basename(__FILE__)}:#{__LINE__}: dictionary #{dictionary}"
|
335
|
+
@@substances << ParseSubstance.new(dictionary[:one_substance])
|
336
|
+
}
|
337
|
+
|
338
|
+
rule(:substance_name => simple(:substance_name),
|
339
|
+
:substance_ut => sequence(:substance_ut),
|
340
|
+
:dose => simple(:dose),
|
341
|
+
) {
|
342
|
+
|dictionary|
|
343
|
+
puts "#{File.basename(__FILE__)}:#{__LINE__}: dictionary #{dictionary}"
|
344
|
+
@@substances.last.salts << ParseSubstance.new(dictionary[:substance_name].to_s, dictionary[:dose])
|
345
|
+
nil
|
346
|
+
}
|
347
|
+
|
348
|
+
rule(:mineralia => simple(:mineralia),
|
349
|
+
:dose => simple(:dose),
|
350
|
+
:substance_name => simple(:substance_name),
|
351
|
+
) {
|
352
|
+
|dictionary|
|
353
|
+
puts "#{File.basename(__FILE__)}:#{__LINE__}: dictionary #{dictionary}" if VERBOSE_MESSAGES
|
354
|
+
dose = dictionary[:dose].is_a?(ParseDose) ? dictionary[:dose] : ParseDose.new(dictionary[:dose].to_s)
|
355
|
+
substance = ParseSubstance.new(dictionary[:substance_name], dose)
|
356
|
+
substance.more_info = dictionary[:mineralia].to_s
|
357
|
+
@@substances << substance
|
358
|
+
# @@substances << ParseSubstance.new(dictionary[:substance_name].to_s, dictionary[:dose])
|
359
|
+
}
|
360
|
+
|
361
|
+
rule(:mineralia => simple(:mineralia),
|
362
|
+
:dose => simple(:dose),
|
363
|
+
:substance_ut => simple(:substance_ut),
|
364
|
+
) {
|
365
|
+
|dictionary|
|
366
|
+
puts "#{File.basename(__FILE__)}:#{__LINE__}: dictionary #{dictionary}"
|
367
|
+
dose = dictionary[:dose].is_a?(ParseDose) ? dictionary[:dose] : ParseDose.new(dictionary[:dose].to_s)
|
368
|
+
substance = ParseSubstance.new(dictionary[:substance_ut], dose)
|
369
|
+
substance.more_info = dictionary[:mineralia].to_s
|
370
|
+
binding.pry
|
371
|
+
@@substances << substance
|
372
|
+
nil
|
373
|
+
}
|
374
|
+
|
375
|
+
|
376
|
+
rule(:mineralia => simple(:mineralia),
|
377
|
+
:substance_ut => simple(:substance_ut),
|
378
|
+
) {
|
379
|
+
|dictionary|
|
380
|
+
puts "#{File.basename(__FILE__)}:#{__LINE__}: dictionary #{dictionary}"
|
381
|
+
binding.pry
|
382
|
+
@@substances.last.salts << ParseSubstance.new(dictionary[:substance_name].to_s, dictionary[:dose])
|
383
|
+
nil
|
384
|
+
}
|
385
|
+
rule( :more_info => simple(:more_info),
|
386
|
+
:substance_name => simple(:substance_name),
|
387
|
+
:dose => simple(:dose),
|
388
|
+
) {
|
389
|
+
|dictionary|
|
390
|
+
puts "#{File.basename(__FILE__)}:#{__LINE__}: dictionary #{dictionary}" if VERBOSE_MESSAGES
|
391
|
+
dose = dictionary[:dose].is_a?(ParseDose) ? dictionary[:dose] : ParseDose.new(dictionary[:dose].to_s)
|
392
|
+
substance = ParseSubstance.new(dictionary[:substance_name], dose)
|
393
|
+
substance.more_info = dictionary[:more_info].to_s
|
394
|
+
@@substances << substance
|
395
|
+
}
|
396
|
+
|
397
|
+
rule(:excipiens => simple(:excipiens),
|
398
|
+
) {
|
399
|
+
|dictionary|
|
400
|
+
puts "#{File.basename(__FILE__)}:#{__LINE__}: dictionary #{dictionary}" if VERBOSE_MESSAGES
|
401
|
+
@@excipiens = dictionary[:excipiens].is_a?(ParseDose) ? ParseSubstance.new('excipiens', dictionary[:excipiens]) : nil
|
402
|
+
}
|
403
|
+
|
404
|
+
rule(:substance_name => simple(:substance_name),
|
405
|
+
:dose_pro => simple(:dose_pro),
|
406
|
+
) {
|
407
|
+
|dictionary|
|
408
|
+
puts "#{File.basename(__FILE__)}:#{__LINE__}: dictionary #{dictionary}" if VERBOSE_MESSAGES
|
409
|
+
dose = dictionary[:dose_pro].is_a?(ParseDose) ? dictionary[:dose_pro] : ParseDose.new(dictionary[:dose_pro].to_s)
|
410
|
+
substance = ParseSubstance.new(dictionary[:substance_name], dose)
|
411
|
+
@@excipiens = dose
|
412
|
+
@@substances << substance
|
413
|
+
}
|
414
|
+
rule(:substance_name => simple(:substance_name),
|
415
|
+
:dose => simple(:dose),
|
416
|
+
:dose_pro => simple(:dose_pro),
|
417
|
+
) {
|
418
|
+
|dictionary|
|
419
|
+
puts "#{File.basename(__FILE__)}:#{__LINE__}: dictionary #{dictionary}" if VERBOSE_MESSAGES
|
420
|
+
dose = dictionary[:dose_pro].is_a?(ParseDose) ? dictionary[:dose_pro] : ParseDose.new(dictionary[:dose_pro].to_s)
|
421
|
+
dose_pro = dictionary[:dose_pro].is_a?(ParseDose) ? dictionary[:dose_pro] : ParseDose.new(dictionary[:dose_pro].to_s)
|
422
|
+
substance = ParseSubstance.new(dictionary[:substance_name], dose)
|
423
|
+
@@excipiens = dose_pro
|
424
|
+
@@substances << substance
|
425
|
+
}
|
426
|
+
|
427
|
+
rule(:dose_pro => simple(:dose_pro),
|
428
|
+
) {
|
429
|
+
|dictionary|
|
430
|
+
puts "#{File.basename(__FILE__)}:#{__LINE__}: dictionary #{dictionary}" if VERBOSE_MESSAGES
|
431
|
+
dictionary[:dose_pro]
|
432
|
+
}
|
433
|
+
|
434
|
+
rule(:corresp => simple(:corresp),
|
435
|
+
) {
|
436
|
+
|dictionary|
|
437
|
+
puts "#{File.basename(__FILE__)}:#{__LINE__}: dictionary #{dictionary}" if VERBOSE_MESSAGES
|
438
|
+
@@corresp = dictionary[:corresp].to_s
|
439
|
+
}
|
440
|
+
end
|
441
|
+
|
442
|
+
class ParseDose
|
443
|
+
attr_reader :qty, :qty_range
|
444
|
+
attr_accessor :unit
|
445
|
+
def initialize(qty=nil, unit=nil)
|
446
|
+
puts "ParseDose.new from #{qty.inspect} #{unit.inspect} #{unit.inspect}" if VERBOSE_MESSAGES
|
447
|
+
if qty and (qty.is_a?(String) || qty.is_a?(Parslet::Slice))
|
448
|
+
string = qty.to_s.gsub("'", '')
|
449
|
+
if string.index('-') and (string.index('-') > 0)
|
450
|
+
@qty_range = string
|
451
|
+
elsif string.index(/\^|\*|\//)
|
452
|
+
@qty = string
|
453
|
+
else
|
454
|
+
@qty = string.index('.') ? string.to_f : string.to_i
|
455
|
+
end
|
456
|
+
elsif qty
|
457
|
+
@qty = qty.eval
|
458
|
+
else
|
459
|
+
@qty = 1
|
460
|
+
end
|
461
|
+
@unit = unit ? unit.to_s : nil
|
462
|
+
end
|
463
|
+
def eval
|
464
|
+
self
|
465
|
+
end
|
466
|
+
def to_s
|
467
|
+
return @unit unless @qty or @qty_range
|
468
|
+
res = "#{@qty}#{@qty_range}"
|
469
|
+
res = "#{res} #{@unit}" if @unit
|
470
|
+
res
|
471
|
+
end
|
472
|
+
end
|
473
|
+
|
474
|
+
class ParseSubstance
|
475
|
+
attr_accessor :name, :qty, :unit, :chemical_substance, :chemical_qty, :chemical_unit, :is_active_agent, :dose, :cdose, :is_excipiens
|
476
|
+
attr_accessor :description, :more_info, :salts
|
477
|
+
def initialize(name, dose=nil)
|
478
|
+
puts "ParseSubstance.new from #{name.inspect} #{dose.inspect}" if VERBOSE_MESSAGES
|
479
|
+
@name = ParseUtil.capitalize(name.to_s)
|
480
|
+
@name.sub!(/\baqua\b/i, 'aqua')
|
481
|
+
@name.sub!(/\bDER\b/i, 'DER')
|
482
|
+
@name.sub!(/\bad pulverem\b/i, 'ad pulverem')
|
483
|
+
@name.sub!(/\bad iniectabilia\b/i, 'ad iniectabilia')
|
484
|
+
@name.sub!(/\bad suspensionem\b/i, 'ad suspensionem')
|
485
|
+
@name.sub!(/\bad solutionem\b/i, 'ad solutionem')
|
486
|
+
@name.sub!(/\bpro compresso\b/i, 'pro compresso')
|
487
|
+
@name.sub!(/\bpro\b/i, 'pro')
|
488
|
+
@name.sub!(/ Q\.S\. /i, ' q.s. ')
|
489
|
+
@name.sub!(/\s+\bpro$/i, '')
|
490
|
+
@dose = dose if dose
|
491
|
+
@salts = []
|
492
|
+
end
|
493
|
+
def qty
|
494
|
+
return @dose.qty_range if @dose and @dose.qty_range
|
495
|
+
@dose ? @dose.qty : @qty
|
496
|
+
end
|
497
|
+
def unit
|
498
|
+
return @unit if @unit
|
499
|
+
@dose ? @dose.unit : @unit
|
500
|
+
end
|
501
|
+
def to_string
|
502
|
+
s = "#{@name}:"
|
503
|
+
s = " #{@qty}" if @qty
|
504
|
+
s = " #{@unit}" if @unit
|
505
|
+
s += @chemical_substance.to_s if chemical_substance
|
506
|
+
s
|
507
|
+
end
|
508
|
+
end
|
509
|
+
|
510
|
+
class ParseComposition
|
511
|
+
attr_accessor :source, :label, :label_description, :substances, :galenic_form, :route_of_administration,
|
512
|
+
:corresp
|
513
|
+
|
514
|
+
ErrorsToFix = { /(sulfuris D6\s[^\s]+\smg)\s([^,]+)/ => '\1, \2',
|
515
|
+
/(\d+)\s+\-\s*(\d+)/ => '\1-\2',
|
516
|
+
'o.1' => '0.1',
|
517
|
+
'g DER:' => 'g, DER:',
|
518
|
+
/(excipiens ad solutionem pro \d+ ml), corresp\./ => '\1 corresp.',
|
519
|
+
/^(pollinis allergeni extractum[^\:]+\:)/ => 'A): \1',
|
520
|
+
/^(acari allergeni extractum 5000 U\.\:)/ => 'A): \1',
|
521
|
+
}
|
522
|
+
@@errorHandler = ParseUtil::HandleSwissmedicErrors.new( ErrorsToFix )
|
523
|
+
|
524
|
+
def initialize(source)
|
525
|
+
@substances ||= []
|
526
|
+
puts "ParseComposition.new from #{source.inspect} @substances #{@substances.inspect}" if VERBOSE_MESSAGES
|
527
|
+
@source = source.to_s
|
528
|
+
end
|
529
|
+
def ParseComposition.reset
|
530
|
+
@@errorHandler = ParseUtil::HandleSwissmedicErrors.new( ErrorsToFix )
|
531
|
+
end
|
532
|
+
def ParseComposition.report
|
533
|
+
@@errorHandler.report
|
534
|
+
end
|
535
|
+
def ParseComposition.from_string(string)
|
536
|
+
return nil if string == nil or string.eql?('.') or string.eql?('')
|
537
|
+
stripped = string.gsub(/^"|["\n]+$/, '')
|
538
|
+
return nil unless stripped
|
539
|
+
@@errorHandler.nrParsingErrors += 1
|
540
|
+
if /(U\.I\.|U\.)$/.match(stripped)
|
541
|
+
cleaned = stripped
|
542
|
+
else
|
543
|
+
cleaned = stripped.sub(/[\.]+$/, '')
|
544
|
+
end
|
545
|
+
value = nil
|
546
|
+
puts "ParseComposition.from_string #{string}" if VERBOSE_MESSAGES # /ng-tr/.match(Socket.gethostbyname(Socket.gethostname).first)
|
547
|
+
|
548
|
+
cleaned = @@errorHandler.apply_fixes(cleaned)
|
549
|
+
puts "ParseComposition.new cleaned #{cleaned}" if VERBOSE_MESSAGES and not cleaned.eql?(stripped)
|
550
|
+
|
551
|
+
CompositionTransformer.clear_substances
|
552
|
+
result = ParseComposition.new(cleaned)
|
553
|
+
parser3 = CompositionParser.new
|
554
|
+
transf3 = CompositionTransformer.new
|
555
|
+
begin
|
556
|
+
if defined?(RSpec)
|
557
|
+
ast = transf3.apply(parser3.parse_with_debug(cleaned))
|
558
|
+
puts "#{File.basename(__FILE__)}:#{__LINE__}: ==> #{ast}" if VERBOSE_MESSAGES
|
559
|
+
else
|
560
|
+
ast = transf3.apply(parser3.parse(cleaned))
|
561
|
+
end
|
562
|
+
rescue Parslet::ParseFailed => error
|
563
|
+
puts "#{File.basename(__FILE__)}:#{__LINE__}: failed parsing ==> #{cleaned}"
|
564
|
+
return nil
|
565
|
+
end
|
566
|
+
result.source = string
|
567
|
+
return result unless ast
|
568
|
+
return result if ast.is_a?(Parslet::Slice)
|
569
|
+
# pp ast; binding.pry
|
570
|
+
|
571
|
+
result.substances = CompositionTransformer.substances
|
572
|
+
excipiens = CompositionTransformer.excipiens
|
573
|
+
result.corresp = CompositionTransformer.corresp if CompositionTransformer.corresp
|
574
|
+
if excipiens and excipiens.unit
|
575
|
+
pro_qty = "/#{excipiens.qty} #{excipiens.unit}".sub(/\/1\s+/, '/')
|
576
|
+
result.substances.each {
|
577
|
+
|substance|
|
578
|
+
substance.chemical_substance.unit = "#{substance.chemical_substance.unit}#{pro_qty}" if substance.chemical_substance
|
579
|
+
substance.dose.unit = "#{substance.dose.unit}#{pro_qty}" if substance.unit and not substance.unit.eql?(excipiens.unit)
|
580
|
+
}
|
581
|
+
end
|
582
|
+
if ast.is_a?(Array) and ast.first.is_a?(Hash)
|
583
|
+
label = ast.first[:label].to_s if ast.first[:label]
|
584
|
+
label_description = ast.first[:label_description].to_s if ast.first[:label_description]
|
585
|
+
elsif ast and ast.is_a?(Hash)
|
586
|
+
label = ast[:label].to_s if ast[:label]
|
587
|
+
label_description = ast[:label_description].to_s if ast[:label_description]
|
588
|
+
end
|
589
|
+
if label
|
590
|
+
if label and not /((A|B|C|D|E|I|II|III|IV|\)+)\s+et\s+(A|B|C|D|E|I|II|III|IV|\))+)/.match(label)
|
591
|
+
result.label = label
|
592
|
+
end
|
593
|
+
result.label_description = label_description
|
594
|
+
end
|
595
|
+
@@errorHandler.nrParsingErrors -=1 if result.substances.size > 0 or result.corresp
|
596
|
+
return result
|
597
|
+
end
|
598
|
+
end
|
data/lib/oddb2xml/version.rb
CHANGED
data/oddb2xml.gemspec
CHANGED
@@ -28,6 +28,7 @@ Gem::Specification.new do |spec|
|
|
28
28
|
spec.add_dependency 'spreadsheet', '~> 1.0.0'
|
29
29
|
spec.add_dependency 'rubyXL', '~> 3.3.1'
|
30
30
|
spec.add_dependency 'sax-machine', '~> 0.1.0'
|
31
|
+
spec.add_dependency 'parslet', '~> 1.7.0'
|
31
32
|
|
32
33
|
spec.add_development_dependency "bundler"
|
33
34
|
spec.add_development_dependency "rake"
|