oddb2xml 2.0.6 → 2.0.7

Sign up to get free protection for your applications and to get access to all the features.
@@ -38,18 +38,49 @@ class CompositionParser < Parslet::Parser
38
38
  rule(:radio_isotop) { match['a-zA-Z'].repeat(1) >> lparen >> digits >> str('-') >> match['a-zA-Z'].repeat(1-3) >> rparen >>
39
39
  ((space? >> match['a-zA-Z']).repeat(1)).repeat(0)
40
40
  } # e.g. Xenonum (133-Xe) or yttrii(90-Y) chloridum zum Kalibrierungszeitpunkt
41
- rule(:ratio_value) { match['0-9:\-\.'].repeat(1) >> space?} # eg. ratio: 1:1, ratio: 1:1.5-2.4., ratio: 1:0.68-0.95
41
+ rule(:ratio_value) { match['0-9:\-\.,'].repeat(1) >> space?} # eg. ratio: 1:1, ratio: 1:1.5-2.4., ratio: 1:0.68-0.95, 1:4,1
42
42
 
43
43
  # handle stuff like acidum 9,11-linolicum or 2,2'-methylen-bis(6-tert.-butyl-4-methyl-phenolum) specially. it must contain at least one a-z
44
44
  rule(:umlaut) { match(['éàèèçïöäüâ']) }
45
45
  rule(:identifier_D12) { match['a-zA-Z'] >> match['0-9'].repeat(1) }
46
- rule(:identifier) { str('A + B') | str('ethanol.') | str('poloxamerum 238') | str('TM:') | str('&') | # TODO: why do we have to hard code these identifiers?
47
- str('F.E.I.B.A.') | str('LA 25% TM') | str('50/50') | str('polysorbatum ') >> digit >> digit | str('q.s.') |
48
- digit >> digit.maybe >> space >> str('per centum ') >> str('q.s.').maybe| str('1g/9.6 cm²') |
49
- str('9 g/L 5.4 ml') |
50
- str('spag.') | str('spp.') | str('ssp.') | str('deklar.') | # TODO: Sind diese Abkürzung wirklich Teil eines Substanznamens?
51
- str('ca.') | str('var.') | str('spec.') |
52
- identifier_D12 | identifier_without_comma | identifier_with_comma
46
+
47
+ # TODO: why do we have to hard code these identifiers?
48
+ rule(:fix_coded_identifiers) {
49
+ str("2,2'-methylen-bis(6-tert.-butyl-4-methyl-phenolum)") |
50
+ str('A + B') |
51
+ str('CRM 197') |
52
+ str('F.E.I.B.A.') |
53
+ str('LA ') >> digit.repeat(1,2) >> str('% TM') |
54
+ str('TM:') | str('&') |
55
+ str('ethanol.') |
56
+ str('poloxamerum 238') |
57
+ str('polysorbatum ') >> digit >> digit
58
+ }
59
+
60
+ # TODO: Sind diese Abkürzung wirklich Teil eines Substanznamens?
61
+ rule(:identifier_abbrev_with_comma) {
62
+ str('aquos') |
63
+ str('ca.') |
64
+ str('deklar.') |
65
+ str('spag.') |
66
+ str('spec.') |
67
+ str('spp.') |
68
+ str('ssp.') |
69
+ str('var.')
70
+ }
71
+ rule(:fix_coded_doses) {
72
+ digit >> digit.maybe >> space >> str('per centum ') >> str('q.s.').maybe |
73
+ str('50/50') |
74
+ str('1g/9.6 cm²') |
75
+ str('9 g/L 5.4 ml')
76
+ }
77
+ rule(:identifier) { fix_coded_identifiers |
78
+ identifier_abbrev_with_comma |
79
+ fix_coded_doses |
80
+ str('q.s.') |
81
+ identifier_D12 |
82
+ identifier_without_comma |
83
+ identifier_with_comma
53
84
  }
54
85
 
55
86
  rule(:identifier_with_comma) {
@@ -65,64 +96,70 @@ class CompositionParser < Parslet::Parser
65
96
  rule(:words_nested) { one_word.repeat(1) >> in_parent.maybe >> space? >> one_word.repeat(0) }
66
97
  # dose
67
98
  # 150 U.I. hFSH et 150 U.I. hLH
68
- rule(:dose_unit) { (str('cm²') |
69
- str('g/dm²') |
70
- str('g/l') |
71
- str('g/L') |
72
- str('% V/V') |
73
- str('µg/24 h') |
74
- str('µg/g') |
75
- str('µg') |
76
- str('ng') |
77
- str('guttae') |
78
- str('mg/g') |
79
- str('mg/ml') |
80
- str('MBq/ml') |
81
- str('MBq') |
82
- str('CFU') |
83
- str('mg') |
84
- str('Mg') |
85
- str('kJ') |
86
- str('G') |
87
- str('g') |
88
- str('l') |
89
- str('µl') |
90
- str('U. Ph. Eur.') |
91
- str('ml') |
92
- str('µmol') |
93
- str('mmol/l') |
94
- str('mmol') |
95
- str('Mio CFU') |
96
- str('Mio U.I.') |
97
- str('Mio U.') |
98
- str('Mio. U.I.') |
99
- str('Mio. U.') |
100
- str('Mia. U.I.') |
101
- str('Mia. U.') |
102
- str('U. Botox,') | # TODO: Should be U. Botox
103
- str('U.I. hFSH') |
104
- str('U.I. hCG') |
105
- str('U.I. hLH') |
106
- str('U.I.') |
107
- str('U./ml') |
108
- str('U.') |
109
- str('Mia.') |
110
- str('Mrd.') |
111
- str('% m/m') |
112
- str('% m/m') |
113
- str('%')
114
- ).as(:unit) }
99
+ rule(:dose_unit) {
100
+ (
101
+ str('cm²') |
102
+ str('g/dm²') |
103
+ str('g/l') |
104
+ str('g/L') |
105
+ str('% V/V') |
106
+ str('µg/24 h') |
107
+ str('µg/g') |
108
+ str('µg') |
109
+ str('ng') |
110
+ str('guttae') |
111
+ str('mg/g') |
112
+ str('mg/ml') |
113
+ str('MBq/ml') |
114
+ str('MBq') |
115
+ str('CFU') |
116
+ str('mg') |
117
+ str('Mg') |
118
+ str('kJ') |
119
+ str('G') |
120
+ str('g') |
121
+ str('l') |
122
+ str('µl') |
123
+ str('U. Ph. Eur.') |
124
+ str('ml') |
125
+ str('µmol') |
126
+ str('mmol/l') |
127
+ str('mmol') |
128
+ str('Mio CFU') |
129
+ str('Mio U.I.') |
130
+ str('Mio U.') |
131
+ str('Mio. U.I.') |
132
+ str('Mio. U.') |
133
+ str('Mia. U.I.') |
134
+ str('Mia. U.') |
135
+ str('S.U.') |
136
+ str('U. Botox') |
137
+ str('U.I. hFSH') |
138
+ str('U.I. hCG') |
139
+ str('U.I. hLH') |
140
+ str('U.I.') |
141
+ str('U./ml') |
142
+ str('U.K.I.') |
143
+ str('U.') |
144
+ str('Mia.') |
145
+ str('Mrd.') |
146
+ str('% m/m') |
147
+ str('% m/m') |
148
+ str('%')
149
+ ).as(:unit)
150
+ }
115
151
  rule(:qty_range) { (number >> space? >> (str('+/-') | str(' - ') | str(' -') | str('-') | str('±') ) >> space? >> number).as(:qty_range) }
116
152
  rule(:qty_unit) { dose_qty >> (space >> dose_unit).maybe }
117
153
  rule(:dose_qty) { number.as(:qty) }
118
- rule(:min_max) { str('mind.') | (str('min.') | str('max.') | str('ca.') | str('<') ) >> space? } # TODO: swissmedic should replace mind. -> min.
154
+ rule(:min_max) { (str('min.') | str('max.') | str('ca.') | str('<') ) >> space? }
119
155
  # 75 U.I. hFSH et 75 U.I. hLH
120
156
  rule(:dose_fsh) { qty_unit >> space >> str('et') >> space >> qty_unit.as(:dose_right) }
121
157
  rule(:dose_per) { (digits >> str('/') >> digits).as(:qty)}
122
158
  rule(:dose) { dose_fsh |
123
159
  dose_per |
124
160
  ( min_max.maybe >>
125
- ( (qty_range >> (space >> dose_unit).maybe) | (qty_unit | dose_qty |dose_unit)) >> space? )
161
+ ( (qty_range >> (space >> dose_unit).maybe) | (qty_unit | dose_qty |dose_unit)) >> space? ) >>
162
+ str('pro dosi').maybe >> space?
126
163
  }
127
164
  rule(:dose_with_unit) { min_max.maybe >>
128
165
  dose_fsh |
@@ -140,45 +177,53 @@ class CompositionParser < Parslet::Parser
140
177
  str('conserv.:') |
141
178
  str('color.:')
142
179
  }
180
+
181
+ # Match Wirkstoffe like E 270
143
182
  rule(:lebensmittel_zusatz) { str('E').as(:lebensmittel_zusatz) >> space >>
144
183
  (digits >> match['(a-z)'].repeat(0,3)).as(:digits) >>
145
184
  (space >> dose.as(:dose_lebensmittel_zusatz)).maybe >> space?
146
185
 
147
- } # Match Wirkstoffe like E 270
148
- rule(:der) { (str('DER:') >> space >> digit >> match['0-9\.\-:'].repeat).as(:der) >> space?
149
- } # DER: 1:4 or DER: 3.5:1 or DER: 6-8:1 or DER: 4.0-9.0:1'
186
+ }
187
+ # DER: 1:4 or DER: 3.5:1 or DER: 6-8:1 or DER: 4.0-9.0:1'
188
+ rule(:der_identifier) { str('DER:') >> space >> digit >> match['0-9\.\-:'].repeat }
189
+ rule(:der) { (der_identifier).as(:substance_name) >> space? >> dose.maybe.as(:dose) }
150
190
  rule(:forbidden_in_substance_name) {
151
- useage |
152
- min_max |
153
- str('corresp. ca.,') |
154
- str(', corresp.') |
155
- str('corresp.') |
156
- str('ratio:') |
157
- str('Mio ') |
158
- str('et ') |
159
- str('ut ') |
160
- str('Beutel: ') |
161
- str('ut alia: ') |
162
- str('pro dosi') |
163
- str('pro capsula') |
164
- str('pro vitroe') |
191
+ min_max |
192
+ useage |
193
+ excipiens_identifiers |
194
+ pro_identifiers |
195
+ corresp_substance_label |
165
196
  (digits.repeat(1) >> space >> str(':')) | # match 50 %
197
+ str(', corresp.') |
198
+ str('Beutel: ') |
199
+ str('Mio ') |
200
+ str('ad emulsionem') |
166
201
  str('ad globulos') |
167
- str('ana ') |
168
- str('ana partes') |
169
- str('partes') |
170
202
  str('ad pulverem') |
203
+ str('ad q.s. ') |
204
+ str('ad solutionem') |
171
205
  str('ad suspensionem') |
206
+ str('ana partes') |
207
+ str('ana ') |
208
+ str('aqua ad ') |
209
+ str('aqua q.s. ') |
210
+ str('corresp. ca.,') |
211
+ str('et ') |
212
+ str('excipiens') |
213
+ str('partes') |
214
+ str('pro capsula') |
215
+ str('pro dosi') |
216
+ str('pro vitroe') |
172
217
  str('q.s. ad ') |
173
218
  str('q.s. pro ') |
174
- str('ad solutionem') |
175
- str('ad emulsionem') |
176
- str('excipiens')
219
+ str('ratio:') |
220
+ str('ut alia: ') |
221
+ str('ut ')
177
222
  }
178
223
  rule(:name_without_parenthesis) {
179
224
  (
180
225
  (str('(') | forbidden_in_substance_name).absent? >>
181
- (radio_isotop | str('> 1000') | str('> 500') | identifier.repeat(1)) >>
226
+ (radio_isotop | str('> 1000') | str('> 500') | identifier.repeat(1) >> str('.').maybe) >>
182
227
  space?
183
228
  ).repeat(1)
184
229
  }
@@ -192,116 +237,123 @@ class CompositionParser < Parslet::Parser
192
237
  (forbidden_in_substance_name.absent? >> (identifier.repeat(1) | part_with_parenthesis | rparen) >> space?).repeat(0)
193
238
  }
194
239
  rule(:substance_name) { (
195
- der |
196
240
  name_with_parenthesis |
197
241
  name_without_parenthesis
198
242
  ) >>
199
243
  str('pro dosi').maybe >> space?
200
244
  }
201
- rule(:simple_substance) { substance_name.as(:substance_name) >> space? >> dose.as(:dose).maybe}
245
+ rule(:simple_substance) { substance_name.as(:substance_name) >> space? >> dose.maybe.as(:dose)}
202
246
  rule(:simple_subtance_with_digits_in_name_and_dose) {
203
- substance_lead.maybe >> space? >>
247
+ substance_lead.maybe.as(:more_info) >> space? >>
204
248
  (name_without_parenthesis >> space? >> ((digits.repeat(1) >> (str(' %') | str('%')) | digits.repeat(1)))).as(:substance_name) >>
205
249
  space >> dose_with_unit.as(:dose)
206
250
  }
207
251
 
208
252
 
209
- rule(:pro_dose) { str('pro') >> space >> dose.as(:dose_corresp) }
210
-
211
- # TODO: what does ut alia: impl?
212
- rule(:substance_ut) {
213
- (substance_lead.maybe >> simple_substance).as(:substance_ut) >>
214
- (space? >> (str('pro dosi ut ') | str('ut ') ) >>
215
- space? >> str('alia:').absent? >>
216
- (excipiens |
217
- substance_name >> space? >> str('corresp.') >> space? >> substance_lead.maybe >> space? >> simple_substance |
218
- simple_substance
219
- ).as(:for_ut)
220
- ).repeat(1) >>
221
- space? # >> str('alia:').maybe >> space?
222
- }
223
-
224
253
  rule(:substance_more_info) { # e.g. "acari allergeni extractum 5000 U.:
225
- (str('ratio:').absent? >> (identifier|digits) >> space?).repeat(1).as(:more_info) >> space? >> (str('U.:') | str(':')| str('.:')) >> space?
254
+ (str('ratio:').absent? >> (identifier|digits) >> space?).repeat(1) >> space? >> (str('U.:') | str(':')| str('.:')) >> space?
226
255
  }
227
256
 
228
- rule(:dose_pro) { (
229
- str('excipiens ad solutionem pro ') |
230
- str('aqua q.s. ad gelatume pro ') |
231
- str('aqua q.s. ad solutionem pro ') |
232
- str('aqua q.s. ad suspensionem pro ') |
233
- str('q.s. ad pulverem pro ') |
257
+ rule(:pro_identifiers) {
258
+ str('ut aqua ad iniectabilia q.s. ad emulsionem pro ') |
259
+ str('aqua ').maybe >> str('ad iniectabilia q.s. ad solutionem pro ') |
260
+ str('aqua ').maybe >> str('ad solutionem pro ') |
261
+ str('aqua ').maybe >> str('q.s. ad emulsionem pro ') |
262
+ str('aqua ').maybe >> str('q.s. ad gelatume pro ') |
263
+ str('aqua ').maybe >> str('q.s. ad solutionem pro ') |
264
+ str('aqua ').maybe >> str('q.s. ad suspensionem pro ') |
234
265
  str('doses pro vase ') |
235
- str('pro vase ') |
236
266
  str('excipiens ad emulsionem pro ') |
237
267
  str('excipiens ad pulverem pro ') |
238
- str('aqua ad iniectabilia q.s. ad solutionem pro ')
239
- ) >> dose.as(:dose_pro) >> space? >> ratio.as(:ratio).maybe
268
+ str('excipiens ad solutionem pro ') |
269
+ str('pro vase ') |
270
+ str('q.s. ad pulverem pro ')
240
271
  }
272
+ rule(:excipiens_dose) { pro_identifiers.as(:excipiens_description) >> space? >> dose.as(:dose) >> space? >> ratio.maybe.as(:ratio) >>
273
+ space? >> str('corresp.').maybe >> space? >> dose.maybe.as(:dose_corresp)
274
+ }
241
275
 
242
- rule(:excipiens) { (dose_pro |
276
+ rule(:excipiens_identifiers) {
277
+ str('ad globulos') |
278
+ str('ad pulverem') |
279
+ str('ad solutionem') |
280
+ str('aether q.s.') |
281
+ str('ana partes') |
282
+ str('aqua ad iniectabilia q.s. ad solutionem') |
283
+ str('aqua ad iniectabilia') |
284
+ str('aqua q.s. ad') |
243
285
  str('excipiens pro compresso obducto') |
244
286
  str('excipiens pro compresso') |
245
287
  str('excipiens pro praeparatione') |
246
288
  str('excipiens') |
247
- str('ad pulverem') |
248
289
  str('pro charta') |
249
- str('ad globulos') |
250
- str('aqua ad iniectabilia q.s. ad solutionem') |
251
- str('solvens (i.v.): aqua ad iniectabilia') |
252
- str('ad solutionem') |
253
- str('q.s. ad') |
254
- str('aqua q.s. ad') |
255
- str('saccharum ad') |
256
- str('aether q.s.') |
257
- str('pro vitro') |
258
- str('aqua ad iniectabilia') |
259
290
  str('pro praeparatione') |
291
+ str('pro vitro') |
292
+ str('q.s. ad') |
260
293
  str('q.s. pro praeparatione') |
261
- str('ana partes')
262
- ) >> space? >>
263
- ( any.repeat(0) )
294
+ str('saccharum ad') |
295
+ str('solvens (i.v.): aqua ad iniectabilia')
264
296
  }
265
297
 
266
- rule(:substance_lead) { useage.as(:more_info) >> space? |
267
- str('Beutel:').as(:more_info) >> space? |
268
- str('residui:').as(:more_info) >> space? |
269
- str('mineralia').as(:mineralia) >> str(':') >> space? |
270
- str('Solvens:').as(:solvens) >> space? |
298
+ rule(:excipiens) { substance_lead.maybe.as(:more_info) >> space? >>
299
+ ( excipiens_dose | excipiens_identifiers.as(:excipiens_description)) >>
300
+ space? >> excipiens_dose.maybe.as(:dose_2) >>
301
+ any.repeat(0)
302
+ }
303
+
304
+ rule(:substance_lead) { useage >> space? |
305
+ str('Beutel:') >> space? |
306
+ str('residui:') >> space? |
307
+ str('mineralia:') >> str(':') >> space? |
308
+ str('Solvens:') >> space? |
271
309
  substance_more_info
272
310
  }
273
311
  rule(:corresp_substance_label) {
274
312
  str(', corresp. ca.,') |
275
313
  str('corresp. ca.,') |
314
+ str('corresp.,') |
276
315
  str('corresp.') |
277
- str('corresp., ') |
278
316
  str(', corresp.')
279
317
  }
318
+ rule(:ratio) { str('ratio:') >> space >> ratio_value }
319
+
320
+ rule(:solvens) { (str('Solvens:') | str('Solvens (i.m.):'))>> space >> (any.repeat).as(:more_info) >> space? >>
321
+ (substance.as(:substance) >> str('/L').maybe).maybe >>
322
+ any.maybe
323
+ }
324
+ # Perhaps we could have some syntax sugar to make this more easy?
325
+ #
326
+ def tag(opts={})
327
+ close = opts[:close] || false
328
+ end
329
+
330
+ # TODO: what does ut alia: impl?
331
+ rule(:substance_ut) {
332
+ (space? >> (str('pro dosi ut ') | str('ut ') ) >>
333
+ space? >> str('alia:').absent? >>substance
334
+ ) >>
335
+ space?
336
+ }
280
337
 
281
338
  rule(:corresp_substance) {
282
339
  (corresp_substance_label) >> space? >>
283
340
  (
284
- simple_substance.as(:substance_corresp) |
285
- dose.as(:dose_corresp_2)
341
+ substance |
342
+ dose.as(:dose_corresp_2) |
343
+ excipiens.as(:excipiens)
286
344
  )
287
345
  }
288
346
 
289
- rule(:ratio) { str('ratio:') >> space >> ratio_value }
290
-
291
- rule(:solvens) { (str('Solvens:') | str('Solvens (i.m.):'))>> space >> (any.repeat).as(:solvens) >> space? >>
292
- (substance.as(:substance) >> str('/L').maybe).maybe >>
293
- any.maybe
294
- }
295
347
  rule(:substance) {
296
- simple_subtance_with_digits_in_name_and_dose |
297
- useage.as(:more_info) >> space? >> excipiens |
298
- ratio.as(:ratio) |
299
- solvens |
300
- der >> corresp_substance.maybe |
301
- (str('potenziert mit:') >> space).maybe >> excipiens.as(:excipiens) |
302
- substance_ut |
303
- substance_lead.maybe >> space? >> lebensmittel_zusatz |
304
- substance_lead.maybe >> space? >> simple_substance >> corresp_substance.maybe >> space? >> corresp_substance.maybe >> space? >> dose_pro.maybe >> str('pro dosi').maybe
348
+ (
349
+ simple_subtance_with_digits_in_name_and_dose |
350
+ der |
351
+ substance_lead.maybe.as(:more_info) >> space? >> lebensmittel_zusatz |
352
+ substance_lead.maybe.as(:more_info) >> space? >> simple_substance >> str('pro dosi').maybe
353
+ ).as(:substance) >>
354
+ (space? >> str(', ').maybe >> ratio.maybe).as(:ratio) >>
355
+ space? >> corresp_substance.maybe.as(:chemical_substance) >>
356
+ space? >> substance_ut.repeat(0).as(:substance_ut)
305
357
  }
306
358
  rule(:histamin) { str('U = Histamin Equivalent Prick').as(:histamin) }
307
359
  rule(:praeparatio){ ((one_word >> space?).repeat(1).as(:description) >> str(':') >> space?).maybe >>
@@ -310,9 +362,8 @@ class CompositionParser < Parslet::Parser
310
362
  ((identifier >> space?).repeat(1).as(:more_info) >> space?).maybe
311
363
  }
312
364
  rule(:substance_separator) { (str(', et ') | comma | str('et ') | str('ut alia: ')) >> space? }
313
- rule(:one_substance) { (praeparatio | histamin | substance).as(:substance) >> space? >> ratio.as(:ratio).maybe }
314
- # rule(:one_substance) { (substance_ut).as(:substance) } # >> str('.').maybe }
315
- rule(:all_substances) { (one_substance >> substance_separator.maybe).repeat(1) }
365
+ rule(:one_substance) { (praeparatio | histamin | substance) >> space? >> ratio.as(:ratio).maybe >> space? }
366
+ rule(:all_substances) { (one_substance >> substance_separator.maybe).repeat(1) >> space? >> excipiens.as(:excipiens).maybe}
316
367
  rule(:composition) { all_substances }
317
368
  rule(:long_labels) {
318
369
  str('Praeparatio sicca cum solvens: praeparatio sicca:') |
@@ -344,6 +395,8 @@ class CompositionParser < Parslet::Parser
344
395
  label
345
396
  }
346
397
  rule(:corresp_label) {
398
+ str('aqua ') |
399
+ str('excipiens ') |
347
400
  str('doses ') |
348
401
  str('Pulver: ') |
349
402
  str('Diluens: ') |
@@ -357,11 +410,27 @@ class CompositionParser < Parslet::Parser
357
410
  }
358
411
  rule(:corresp_line) { corresp_label >> any.repeat(1).as(:corresp) |
359
412
  ((label_id >> label_separator >> space? >> str('et ').maybe).repeat(1) >> any.repeat(1)).as(:corresp)
413
+ }
414
+ rule(:corresp_line_neu) { corresp_label >> any.repeat(1).as(:corresp) }
415
+
416
+ rule(:multiple_et_line) {
417
+ ((label_id >> label_separator >> space? >> (str('pro usu') |str('et '))).repeat(1) >> any.repeat(1)).as(:corresp)
360
418
  }
361
419
 
420
+ rule(:polvac) { label_id.as(:label) >> label_separator >> space? >> composition.as(:composition) >> space? >> str('.').maybe >> space? }
421
+
422
+ rule(:label_composition) { label >> space? >> composition.as(:excipiens) >> space? >> str('.').maybe >> space? }
423
+ rule(:label_comment_excipiens) { label >> space? >> excipiens.as(:excipiens) >> space? >> str('.').maybe >> space? }
424
+
362
425
  rule(:expression_comp) {
426
+ corresp_line_neu |
363
427
  leading_label.maybe >> space? >> composition.as(:composition) >> space? >> str('.').maybe >> space? |
364
- corresp_line
428
+ multiple_et_line |
429
+ label_composition |
430
+ polvac |
431
+ label_comment_excipiens |
432
+ excipiens.as(:composition) |
433
+ space.repeat(3)
365
434
  }
366
435
  root :expression_comp
367
436
  end