oddb2xml 2.7.1 → 2.7.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +1 -1
- data/.standard.yml +2 -0
- data/Gemfile +3 -3
- data/History.txt +8 -0
- data/README.md +1 -1
- data/Rakefile +24 -23
- data/bin/check_artikelstamm +11 -11
- data/bin/compare_v5 +23 -23
- data/bin/oddb2xml +14 -13
- data/lib/oddb2xml.rb +1 -1
- data/lib/oddb2xml/builder.rb +1070 -1038
- data/lib/oddb2xml/calc.rb +232 -233
- data/lib/oddb2xml/chapter_70_hack.rb +38 -32
- data/lib/oddb2xml/cli.rb +252 -236
- data/lib/oddb2xml/compare.rb +70 -59
- data/lib/oddb2xml/compositions_syntax.rb +448 -430
- data/lib/oddb2xml/compressor.rb +20 -20
- data/lib/oddb2xml/downloader.rb +153 -127
- data/lib/oddb2xml/extractor.rb +302 -289
- data/lib/oddb2xml/options.rb +34 -35
- data/lib/oddb2xml/parslet_compositions.rb +263 -269
- data/lib/oddb2xml/semantic_check.rb +39 -33
- data/lib/oddb2xml/util.rb +163 -163
- data/lib/oddb2xml/version.rb +1 -1
- data/lib/oddb2xml/xml_definitions.rb +32 -33
- data/oddb2xml.gemspec +31 -32
- data/spec/artikelstamm_spec.rb +111 -110
- data/spec/builder_spec.rb +489 -505
- data/spec/calc_spec.rb +552 -593
- data/spec/check_artikelstamm_spec.rb +26 -26
- data/spec/cli_spec.rb +173 -174
- data/spec/compare_spec.rb +9 -11
- data/spec/composition_syntax_spec.rb +390 -409
- data/spec/compressor_spec.rb +48 -48
- data/spec/data/transfer.dat +1 -0
- data/spec/data_helper.rb +47 -49
- data/spec/downloader_spec.rb +247 -260
- data/spec/extractor_spec.rb +171 -159
- data/spec/galenic_spec.rb +233 -256
- data/spec/options_spec.rb +116 -119
- data/spec/parslet_spec.rb +833 -861
- data/spec/spec_helper.rb +154 -153
- data/test_options.rb +39 -42
- data/tools/win_fetch_cacerts.rb +2 -3
- metadata +19 -3
data/lib/oddb2xml/options.rb
CHANGED
@@ -1,61 +1,60 @@
|
|
1
|
-
|
2
|
-
require
|
3
|
-
require 'oddb2xml/version'
|
1
|
+
require "optimist"
|
2
|
+
require "oddb2xml/version"
|
4
3
|
|
5
4
|
module Oddb2xml
|
6
5
|
module Options
|
7
|
-
def self.parse(args =ARGV)
|
6
|
+
def self.parse(args = ARGV)
|
8
7
|
if args.is_a?(String)
|
9
|
-
args = args.split(
|
8
|
+
args = args.split(" ")
|
10
9
|
end
|
11
10
|
|
12
|
-
@opts = Optimist
|
13
|
-
version "
|
11
|
+
@opts = Optimist.options(args) do
|
12
|
+
version "#{$0} ver.#{Oddb2xml::VERSION}"
|
14
13
|
banner <<-EOS
|
15
14
|
#{File.expand_path($0)} version #{Oddb2xml::VERSION}
|
16
15
|
Usage:
|
17
16
|
oddb2xml [option]
|
18
17
|
produced files are found under data
|
19
|
-
EOS
|
20
|
-
opt :append,
|
18
|
+
EOS
|
19
|
+
opt :append, "Additional target nonpharma", default: false
|
21
20
|
opt :artikelstamm, "Create Artikelstamm Version 3 and 5 for Elexis >= 3.1"
|
22
|
-
opt :compress_ext,
|
23
|
-
opt :extended,
|
21
|
+
opt :compress_ext, "format F. {tar.gz|zip}", type: :string, default: nil, short: "c"
|
22
|
+
opt :extended, "pharma, non-pharma plus prices and non-pharma from zurrose.
|
24
23
|
Products without EAN-Code will also be listed.
|
25
24
|
File oddb_calc.xml will also be generated"
|
26
|
-
opt :format,
|
27
|
-
If F is given, -o option is ignored.", :
|
28
|
-
opt :include,
|
29
|
-
'xml' format includes always ean14 records.", :
|
30
|
-
opt :increment,
|
25
|
+
opt :format, "File format F, default is xml. {xml|dat}
|
26
|
+
If F is given, -o option is ignored.", type: :string, default: "xml"
|
27
|
+
opt :include, "Include target option for ean14 for 'dat' format.
|
28
|
+
'xml' format includes always ean14 records.", short: "i"
|
29
|
+
opt :increment, "Increment price by x percent. Forces -f dat -p zurrose.
|
31
30
|
create additional field price_resellerpub as
|
32
31
|
price_extfactory incremented by x percent (rounded to the next 0.05 francs)
|
33
32
|
in oddb_article.xml. In generated zurrose_transfer.dat PRPU is set to this price
|
34
|
-
Forces -f dat -p zurrose.", :
|
35
|
-
opt :fi,
|
36
|
-
opt :price,
|
37
|
-
opt :tag_suffix,
|
38
|
-
If S is given, it is also used as prefix of filename.", :
|
39
|
-
opt :context,
|
40
|
-
opt :calc,
|
33
|
+
Forces -f dat -p zurrose.", type: :int, default: nil, short: "I"
|
34
|
+
opt :fi, "Optional fachinfo output.", short: "o"
|
35
|
+
opt :price, "Price source (transfer.dat) from ZurRose", default: nil
|
36
|
+
opt :tag_suffix, "XML tag suffix S. Default is none. [A-z0-9]
|
37
|
+
If S is given, it is also used as prefix of filename.", type: :string, short: "t"
|
38
|
+
opt :context, "{product|address}. product is default.", default: "product", type: :string, short: "x"
|
39
|
+
opt :calc, "create only oddb_calc.xml with GTIN, name and galenic information"
|
41
40
|
|
42
41
|
opt :skip_download, "skips downloading files it the file is already under downloads.
|
43
42
|
Downloaded files are saved under downloads"
|
44
|
-
opt :log,
|
45
|
-
opt :use_ra11zip,
|
46
|
-
|
43
|
+
opt :log, "log important actions", short: :none
|
44
|
+
opt :use_ra11zip, "Use the ra11.zip (a zipped transfer.dat from Galexis)",
|
45
|
+
default: File.exist?("ra11.zip") ? "ra11.zip" : nil, type: :string
|
47
46
|
end
|
48
|
-
|
47
|
+
|
49
48
|
@opts[:percent] = @opts[:increment]
|
50
|
-
if @opts[:increment]
|
49
|
+
if @opts[:increment]
|
51
50
|
@opts[:nonpharma] = true
|
52
51
|
@opts[:price] = :zurrose
|
53
52
|
end
|
54
|
-
@opts[:ean14]
|
53
|
+
@opts[:ean14] = @opts[:increment]
|
55
54
|
@opts.delete(:increment)
|
56
55
|
@opts[:nonpharma] = @opts[:append]
|
57
56
|
@opts.delete(:append)
|
58
|
-
if @opts[:extended]
|
57
|
+
if @opts[:extended]
|
59
58
|
@opts[:nonpharma] = true
|
60
59
|
@opts[:price] = :zurrose
|
61
60
|
@opts[:calc] = true
|
@@ -63,22 +62,22 @@ EOS
|
|
63
62
|
if @opts[:artikelstamm]
|
64
63
|
@opts[:extended] = true
|
65
64
|
@opts[:price] = :zurrose
|
66
|
-
end
|
65
|
+
end
|
67
66
|
@opts[:price] = :zurrose if @opts[:price].is_a?(TrueClass)
|
68
|
-
@opts[:price]
|
67
|
+
@opts[:price] = @opts[:price].to_sym if @opts[:price]
|
69
68
|
@opts[:ean14] = @opts[:include]
|
70
69
|
@opts[:format] = @opts[:format].to_sym if @opts[:format]
|
71
70
|
@opts.delete(:include)
|
72
71
|
@opts.delete(:help)
|
73
72
|
@opts.delete(:version)
|
74
73
|
|
75
|
-
@opts[:address]
|
76
|
-
@opts[:address]
|
74
|
+
@opts[:address] = false
|
75
|
+
@opts[:address] = true if /^addr(ess)*$/i.match?(@opts[:context])
|
77
76
|
@opts.delete(:context)
|
78
77
|
|
79
78
|
@opts.delete(:price) unless @opts[:price]
|
80
79
|
|
81
|
-
@opts.each{|k,v| @opts.delete(k) if /_given$/.match(k.to_s)}
|
80
|
+
@opts.each { |k, v| @opts.delete(k) if /_given$/.match?(k.to_s) }
|
82
81
|
end
|
83
82
|
end
|
84
83
|
end
|
@@ -1,79 +1,74 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
1
|
# This file is shared since oddb2xml 2.0.0 (lib/oddb2xml/parse_compositions.rb)
|
4
2
|
# with oddb.org src/plugin/parse_compositions.rb
|
5
3
|
#
|
6
4
|
# It allows an easy parsing of the column P Zusammensetzung of the swissmedic packages.xlsx file
|
7
5
|
#
|
8
6
|
|
9
|
-
require
|
10
|
-
require
|
11
|
-
require
|
12
|
-
include Parslet
|
7
|
+
require "parslet"
|
8
|
+
require "parslet/convenience"
|
9
|
+
require "oddb2xml/compositions_syntax"
|
13
10
|
VERBOSE_MESSAGES ||= false
|
14
11
|
|
15
12
|
module ParseUtil
|
13
|
+
include Parslet
|
16
14
|
# this class is responsible to patch errors in swissmedic entries after
|
17
15
|
# oddb.org detected them, as it takes sometimes a few days (or more) till they get corrected
|
18
16
|
# Reports the number of occurrences of each entry
|
19
17
|
@@saved_parsed ||= {}
|
20
|
-
@@nr_saved_parsed_used
|
18
|
+
@@nr_saved_parsed_used ||= 0
|
21
19
|
|
22
20
|
class HandleSwissmedicErrors
|
23
|
-
|
24
21
|
attr_accessor :nrParsingErrors
|
25
|
-
class ErrorEntry
|
22
|
+
class ErrorEntry < Struct.new("ErrorEntry", :pattern, :replacement, :nr_occurrences)
|
26
23
|
end
|
27
24
|
|
28
25
|
def reset_errors
|
29
26
|
@errors = []
|
30
|
-
@
|
31
|
-
@
|
27
|
+
@nr_lines = 0
|
28
|
+
@nr_parsing_errors = 0
|
32
29
|
end
|
33
30
|
|
34
31
|
# error_entries should be a hash of pattern, replacement
|
35
32
|
def initialize(error_entries)
|
36
33
|
reset_errors
|
37
|
-
error_entries.each{ |pattern, replacement| @errors << ErrorEntry.new(pattern, replacement, 0) }
|
34
|
+
error_entries.each { |pattern, replacement| @errors << ErrorEntry.new(pattern, replacement, 0) }
|
38
35
|
end
|
39
36
|
|
40
37
|
def report
|
41
|
-
s = ["Report of changed compositions in #{@
|
42
|
-
@errors.each {
|
43
|
-
|
44
|
-
s << " replaced #{entry.nr_occurrences} times '#{entry.pattern}' by '#{entry.replacement}'"
|
38
|
+
s = ["Report of changed compositions in #{@nr_lines} lines. Had #{@nr_parsing_errors} parsing errors"]
|
39
|
+
@errors.each { |entry|
|
40
|
+
s << " replaced #{entry.nr_occurrences} times '#{entry.pattern}' by '#{entry.replacement}'"
|
45
41
|
}
|
46
42
|
s
|
47
43
|
end
|
48
44
|
|
49
45
|
def apply_fixes(string)
|
50
46
|
result = string.clone
|
51
|
-
@errors.each{
|
52
|
-
|entry|
|
47
|
+
@errors.each { |entry|
|
53
48
|
intermediate = result.clone
|
54
|
-
result = result.gsub(entry.pattern,
|
49
|
+
result = result.gsub(entry.pattern, entry.replacement)
|
55
50
|
unless result.eql?(intermediate)
|
56
|
-
|
57
|
-
|
51
|
+
entry.nr_occurrences += 1
|
52
|
+
puts "#{File.basename(__FILE__)}:#{__LINE__}: fixed \nbefore: #{intermediate}\nafter: #{result}" if $VERBOSE
|
58
53
|
end
|
59
54
|
}
|
60
|
-
@
|
55
|
+
@nr_lines += 1
|
61
56
|
result
|
62
57
|
end
|
63
58
|
# hepar sulfuris D6 2,2 mg hypericum perforatum D2 0,66 mg where itlacks a comma and should be hepar sulfuris D6 2,2 mg, hypericum perforatum D2 0,66 mg
|
64
59
|
end
|
65
60
|
|
66
|
-
def
|
67
|
-
string.split(/\s+/u).collect { |word| word.capitalize }.join(
|
61
|
+
def self.capitalize(string)
|
62
|
+
string.split(/\s+/u).collect { |word| word.capitalize }.join(" ").strip
|
68
63
|
end
|
69
64
|
|
70
|
-
def
|
65
|
+
def self.nr_saved_parsed_used
|
71
66
|
@@nr_saved_parsed_used
|
72
67
|
end
|
73
68
|
|
74
|
-
def
|
75
|
-
active_agents = active_agents_string ? active_agents_string.
|
76
|
-
key = [
|
69
|
+
def self.parse_compositions(composition_text, active_agents_string = "")
|
70
|
+
active_agents = active_agents_string ? active_agents_string.delete("[").downcase.split(/,\s+/) : []
|
71
|
+
key = [composition_text, active_agents]
|
77
72
|
saved_value = @@saved_parsed[key]
|
78
73
|
if saved_value
|
79
74
|
@@nr_saved_parsed_used += 1
|
@@ -81,20 +76,22 @@ module ParseUtil
|
|
81
76
|
end
|
82
77
|
comps = []
|
83
78
|
lines = composition_text.gsub(/\r\n?/u, "\n").split(/\n/u)
|
84
|
-
lines.select do
|
85
|
-
|
86
|
-
composition = ParseComposition.from_string(line)
|
79
|
+
lines.select do |line|
|
80
|
+
composition = ParseComposition.from_string(line)
|
87
81
|
if composition.is_a?(ParseComposition)
|
88
|
-
composition.substances.each do
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
82
|
+
composition.substances.each do |substance_item|
|
83
|
+
active_substance_name = substance_item.name.downcase.sub(/^cum\s/, "")
|
84
|
+
substance_item.is_active_agent = !active_agents.find { |x|
|
85
|
+
/#{x.downcase
|
86
|
+
.gsub('(', '\(')
|
87
|
+
.gsub(')', '\)')
|
88
|
+
.gsub('[', '\[')
|
89
|
+
.gsub(']', '\]')
|
90
|
+
}($|\s)/
|
91
|
+
.match(active_substance_name)
|
92
|
+
}.nil?
|
93
|
+
substance_item.is_active_agent = true if substance_item.chemical_substance && active_agents.find { |x| x.downcase.eql?(substance_item.chemical_substance.name.downcase) }
|
94
|
+
end
|
98
95
|
comps << composition
|
99
96
|
end
|
100
97
|
end
|
@@ -103,202 +100,194 @@ module ParseUtil
|
|
103
100
|
comps
|
104
101
|
rescue => error
|
105
102
|
puts "error #{error}"
|
106
|
-
|
103
|
+
# binding.pry
|
107
104
|
end
|
108
|
-
|
109
105
|
end
|
110
106
|
|
111
|
-
class IntLit
|
112
|
-
def eval
|
107
|
+
class IntLit < Struct.new(:int)
|
108
|
+
def eval
|
109
|
+
int.to_i
|
110
|
+
end
|
113
111
|
end
|
114
|
-
|
115
|
-
|
112
|
+
|
113
|
+
class QtyLit < Struct.new(:qty)
|
114
|
+
def eval
|
115
|
+
qty.to_i
|
116
|
+
end
|
116
117
|
end
|
117
118
|
|
118
119
|
class CompositionTransformer < Parslet::Transform
|
119
120
|
@@more_info = nil
|
120
|
-
def
|
121
|
+
def self.get_ratio(parse_info)
|
121
122
|
if parse_info[:ratio]
|
122
|
-
if parse_info[:ratio].to_s.length > 0
|
123
|
-
parse_info[:ratio].to_s.sub(/^,\s+/,
|
124
|
-
else
|
125
|
-
nil
|
123
|
+
if (parse_info[:ratio].to_s.length > 0) && (parse_info[:ratio].to_s != ", ")
|
124
|
+
parse_info[:ratio].to_s.sub(/^,\s+/, "").sub(/,\s+$/, "")
|
126
125
|
end
|
127
|
-
else
|
128
|
-
nil
|
129
126
|
end
|
130
127
|
end
|
131
128
|
|
132
|
-
def
|
133
|
-
return unless /^E \d\d\d/.match(substance.name)
|
129
|
+
def self.check_e_substance(substance)
|
130
|
+
return unless /^E \d\d\d/.match?(substance.name)
|
134
131
|
unless substance.more_info
|
135
132
|
case substance.name[2]
|
136
133
|
when "1"
|
137
|
-
substance.more_info =
|
134
|
+
substance.more_info = "color."
|
138
135
|
when "2"
|
139
|
-
substance.more_info =
|
140
|
-
else
|
136
|
+
substance.more_info = "conserv."
|
141
137
|
end
|
142
138
|
substance.more_info ||= @@more_info
|
143
139
|
end
|
144
140
|
@@more_info = substance.more_info
|
145
141
|
end
|
146
142
|
|
147
|
-
def
|
148
|
-
@@more_info
|
149
|
-
@@excipiens
|
150
|
-
@@excipiens.dose
|
143
|
+
def self.add_excipiens(info)
|
144
|
+
@@more_info = nil
|
145
|
+
@@excipiens = ParseSubstance.new(info[:excipiens_description] || "Excipiens")
|
146
|
+
@@excipiens.dose = info[:dose] if info[:dose]
|
151
147
|
@@excipiens.more_info = CompositionTransformer.get_ratio(info)
|
152
|
-
@@excipiens.cdose
|
148
|
+
@@excipiens.cdose = info[:dose_corresp] if info[:dose_corresp]
|
153
149
|
@@excipiens.more_info = info[:more_info] if info[:more_info]
|
154
150
|
end
|
155
151
|
|
156
|
-
rule(:
|
157
|
-
|
158
|
-
|
159
|
-
puts "#{File.basename(__FILE__)}:#{__LINE__}: dictionary #{dictionary}" if VERBOSE_MESSAGES
|
160
|
-
@@corresp = dictionary[:corresp].to_s
|
152
|
+
rule(corresp: simple(:corresp)) { |dictionary|
|
153
|
+
puts "#{File.basename(__FILE__)}:#{__LINE__}: dictionary #{dictionary}" if VERBOSE_MESSAGES
|
154
|
+
@@corresp = dictionary[:corresp].to_s
|
161
155
|
}
|
162
|
-
rule(
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
@@substances << substance
|
170
|
-
substance
|
156
|
+
rule(substance_name: simple(:substance_name),
|
157
|
+
dose: simple(:dose)) { |dictionary|
|
158
|
+
puts "#{File.basename(__FILE__)}:#{__LINE__}: dictionary #{dictionary}" if VERBOSE_MESSAGES
|
159
|
+
dose = dictionary[:dose].is_a?(ParseDose) ? dictionary[:dose] : nil
|
160
|
+
substance = ParseSubstance.new(dictionary[:substance_name], dose)
|
161
|
+
@@substances << substance
|
162
|
+
substance
|
171
163
|
}
|
172
164
|
|
173
|
-
rule(
|
174
|
-
|
175
|
-
|
176
|
-
puts "#{File.basename(__FILE__)}:#{__LINE__}: dictionary #{dictionary}" if VERBOSE_MESSAGES
|
177
|
-
@@corresp = dictionary[:more_info].to_s.strip.sub(/:$/, '')
|
165
|
+
rule(more_info: simple(:more_info)) { |dictionary|
|
166
|
+
puts "#{File.basename(__FILE__)}:#{__LINE__}: dictionary #{dictionary}" if VERBOSE_MESSAGES
|
167
|
+
@@corresp = dictionary[:more_info].to_s.strip.sub(/:$/, "")
|
178
168
|
}
|
179
|
-
rule(
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
@@substances << substance
|
190
|
-
substance
|
169
|
+
rule(more_info: simple(:more_info),
|
170
|
+
substance_name: simple(:substance_name),
|
171
|
+
dose: simple(:dose)) { |dictionary|
|
172
|
+
puts "#{File.basename(__FILE__)}:#{__LINE__}: dictionary #{dictionary}" if VERBOSE_MESSAGES
|
173
|
+
dose = dictionary[:dose].is_a?(ParseDose) ? dictionary[:dose] : nil
|
174
|
+
substance = ParseSubstance.new(dictionary[:substance_name].to_s, dose)
|
175
|
+
substance.more_info = dictionary[:more_info].to_s.strip.sub(/:$/, "") if dictionary[:more_info] && (dictionary[:more_info].to_s.length > 0)
|
176
|
+
CompositionTransformer.check_e_substance(substance)
|
177
|
+
@@substances << substance
|
178
|
+
substance
|
191
179
|
}
|
192
180
|
|
193
|
-
rule(:
|
194
|
-
:
|
195
|
-
:
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
substance
|
181
|
+
rule(lebensmittel_zusatz: simple(:lebensmittel_zusatz),
|
182
|
+
more_info: simple(:more_info),
|
183
|
+
digits: simple(:digits)) { |dictionary|
|
184
|
+
puts "#{File.basename(__FILE__)}:#{__LINE__}: dictionary #{dictionary}" if VERBOSE_MESSAGES
|
185
|
+
substance = ParseSubstance.new("#{dictionary[:lebensmittel_zusatz]} #{dictionary[:digits]}")
|
186
|
+
substance.more_info = dictionary[:more_info].to_s.strip.sub(/:$/, "") if dictionary[:more_info] && (dictionary[:more_info].to_s.length > 0)
|
187
|
+
CompositionTransformer.check_e_substance(substance)
|
188
|
+
@@substances << substance
|
189
|
+
substance
|
203
190
|
}
|
204
|
-
rule(:
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
end
|
217
|
-
nil
|
191
|
+
rule(excipiens: subtree(:excipiens)) { |dictionary|
|
192
|
+
puts "#{File.basename(__FILE__)}:#{__LINE__}: dictionary #{dictionary}" if VERBOSE_MESSAGES
|
193
|
+
info = dictionary[:excipiens].is_a?(Hash) ? dictionary[:excipiens] : dictionary[:excipiens].first
|
194
|
+
if info[:excipiens_description] ||
|
195
|
+
info[:dose] ||
|
196
|
+
info[:dose_corresp] ||
|
197
|
+
info[:more_info] ||
|
198
|
+
CompositionTransformer.get_ratio(dictionary)
|
199
|
+
CompositionTransformer.add_excipiens(info)
|
200
|
+
info
|
201
|
+
end
|
202
|
+
nil
|
218
203
|
}
|
219
|
-
rule(:
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
:
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
dictionary[:substance].salts << dictionary[:substance_ut].last
|
250
|
-
@@substances -= dictionary[:substance_ut]
|
251
|
-
end
|
252
|
-
dictionary[:substance]
|
204
|
+
rule(composition: subtree(:composition)) { |dictionary|
|
205
|
+
puts "#{File.basename(__FILE__)}:#{__LINE__}: dictionary #{dictionary}" if VERBOSE_MESSAGES
|
206
|
+
info = dictionary[:composition].is_a?(Hash) ? dictionary[:composition] : dictionary[:composition].first
|
207
|
+
CompositionTransformer.add_excipiens(info) if info.is_a?(Hash)
|
208
|
+
info
|
209
|
+
}
|
210
|
+
rule(substance: simple(:substance),
|
211
|
+
chemical_substance: simple(:chemical_substance),
|
212
|
+
substance_ut: sequence(:substance_ut),
|
213
|
+
ratio: simple(:ratio)) { |dictionary|
|
214
|
+
puts "#{File.basename(__FILE__)}:#{__LINE__}: dictionary #{dictionary}" if VERBOSE_MESSAGES
|
215
|
+
ratio = CompositionTransformer.get_ratio(dictionary)
|
216
|
+
if ratio && (ratio.length > 0)
|
217
|
+
if dictionary[:substance].more_info
|
218
|
+
dictionary[:substance].more_info += " " + ratio.strip
|
219
|
+
else
|
220
|
+
dictionary[:substance].more_info = ratio.strip
|
221
|
+
end
|
222
|
+
end
|
223
|
+
if dictionary[:chemical_substance]
|
224
|
+
dictionary[:substance].chemical_substance = dictionary[:chemical_substance]
|
225
|
+
@@substances -= [dictionary[:chemical_substance]]
|
226
|
+
end
|
227
|
+
if dictionary[:substance_ut].size > 0
|
228
|
+
dictionary[:substance].salts += dictionary[:substance_ut].last.salts
|
229
|
+
dictionary[:substance_ut].last.salts = []
|
230
|
+
dictionary[:substance].salts << dictionary[:substance_ut].last
|
231
|
+
@@substances -= dictionary[:substance_ut]
|
232
|
+
end
|
233
|
+
dictionary[:substance]
|
253
234
|
}
|
254
235
|
|
255
|
-
rule(:
|
256
|
-
rule(:
|
257
|
-
|
236
|
+
rule(int: simple(:int)) { IntLit.new(int) }
|
237
|
+
rule(number: simple(:nb)) {
|
238
|
+
/[eE.]/.match?(nb) ? Float(nb) : Integer(nb)
|
258
239
|
}
|
259
240
|
rule(
|
260
|
-
:
|
261
|
-
:
|
262
|
-
|
263
|
-
|
241
|
+
qty_range: simple(:qty_range),
|
242
|
+
unit: simple(:unit)
|
243
|
+
) {
|
244
|
+
ParseDose.new(qty_range, unit)
|
245
|
+
}
|
264
246
|
rule(
|
265
|
-
:
|
266
|
-
|
267
|
-
|
247
|
+
qty_range: simple(:qty_range)
|
248
|
+
) {
|
249
|
+
ParseDose.new(qty_range)
|
250
|
+
}
|
268
251
|
rule(
|
269
|
-
:
|
270
|
-
:
|
271
|
-
|
272
|
-
|
252
|
+
qty: simple(:qty),
|
253
|
+
unit: simple(:unit)
|
254
|
+
) {
|
255
|
+
ParseDose.new(qty, unit)
|
256
|
+
}
|
273
257
|
rule(
|
274
|
-
:
|
258
|
+
unit: simple(:unit)
|
259
|
+
) { ParseDose.new(nil, unit) }
|
275
260
|
rule(
|
276
|
-
:
|
261
|
+
qty: simple(:qty)
|
262
|
+
) { ParseDose.new(qty, nil) }
|
277
263
|
rule(
|
278
|
-
:
|
279
|
-
:
|
280
|
-
:
|
281
|
-
)
|
264
|
+
qty: simple(:qty),
|
265
|
+
unit: simple(:unit),
|
266
|
+
dose_right: simple(:dose_right)
|
267
|
+
) {
|
282
268
|
dose = ParseDose.new(qty, unit)
|
283
|
-
dose.unit = dose.unit.to_s +
|
269
|
+
dose.unit = dose.unit.to_s + " et " + ParseDose.new(dose_right).to_s
|
284
270
|
dose
|
285
271
|
}
|
286
272
|
|
287
273
|
@@substances ||= []
|
288
|
-
@@excipiens
|
289
|
-
def
|
290
|
-
@@more_info
|
274
|
+
@@excipiens = nil
|
275
|
+
def self.clear_substances
|
276
|
+
@@more_info = nil
|
291
277
|
@@substances = []
|
292
|
-
@@excipiens
|
293
|
-
@@corresp
|
278
|
+
@@excipiens = nil
|
279
|
+
@@corresp = nil
|
294
280
|
end
|
295
|
-
|
281
|
+
|
282
|
+
def self.substances
|
296
283
|
@@substances.clone
|
297
284
|
end
|
298
|
-
|
285
|
+
|
286
|
+
def self.excipiens
|
299
287
|
@@excipiens ? @@excipiens.clone : nil
|
300
288
|
end
|
301
|
-
|
289
|
+
|
290
|
+
def self.corresp
|
302
291
|
@@corresp ? @@corresp.clone : nil
|
303
292
|
end
|
304
293
|
end
|
@@ -306,29 +295,31 @@ end
|
|
306
295
|
class ParseDose
|
307
296
|
attr_reader :qty, :qty_range
|
308
297
|
attr_accessor :unit
|
309
|
-
def initialize(qty=nil, unit=nil)
|
298
|
+
def initialize(qty = nil, unit = nil)
|
310
299
|
puts "ParseDose.new from #{qty.inspect} #{unit.inspect} #{unit.inspect}" if VERBOSE_MESSAGES
|
311
|
-
if qty
|
312
|
-
string = qty.to_s.
|
313
|
-
if string.index(
|
300
|
+
if qty && (qty.is_a?(String) || qty.is_a?(Parslet::Slice))
|
301
|
+
string = qty.to_s.delete("'")
|
302
|
+
if string.index("-") && (string.index("-") > 0)
|
314
303
|
@qty_range = string
|
315
304
|
elsif string.index(/\^|\*|\//)
|
316
|
-
@qty
|
305
|
+
@qty = string
|
317
306
|
else
|
318
|
-
@qty
|
307
|
+
@qty = string.index(".") ? string.to_f : string.to_i
|
319
308
|
end
|
320
309
|
elsif qty
|
321
|
-
@qty
|
310
|
+
@qty = qty.eval
|
322
311
|
else
|
323
312
|
@qty = 1
|
324
313
|
end
|
325
314
|
@unit = unit ? unit.to_s : nil
|
326
315
|
end
|
316
|
+
|
327
317
|
def eval
|
328
318
|
self
|
329
319
|
end
|
320
|
+
|
330
321
|
def to_s
|
331
|
-
return @unit unless @qty
|
322
|
+
return @unit unless @qty || @qty_range
|
332
323
|
res = "#{@qty}#{@qty_range}"
|
333
324
|
res = "#{res} #{@unit}" if @unit
|
334
325
|
res
|
@@ -336,32 +327,36 @@ class ParseDose
|
|
336
327
|
end
|
337
328
|
|
338
329
|
class ParseSubstance
|
339
|
-
attr_accessor
|
340
|
-
attr_accessor
|
341
|
-
|
330
|
+
attr_accessor :name, :chemical_substance, :chemical_qty, :chemical_unit, :is_active_agent, :dose, :cdose, :is_excipiens
|
331
|
+
attr_accessor :description, :more_info, :salts
|
332
|
+
attr_writer :unit, :qty
|
333
|
+
def initialize(name, dose = nil)
|
342
334
|
puts "ParseSubstance.new from #{name.inspect} #{dose.inspect}" if VERBOSE_MESSAGES
|
343
335
|
@name = ParseUtil.capitalize(name.to_s)
|
344
|
-
@name.sub!(/\baqua\b/i,
|
345
|
-
@name.sub!(/\bDER\b/i,
|
346
|
-
@name.sub!(/\bad pulverem\b/i,
|
347
|
-
@name.sub!(/\bad iniectabilia\b/i,
|
348
|
-
@name.sub!(/\bad suspensionem\b/i,
|
349
|
-
@name.sub!(/\bad solutionem\b/i,
|
350
|
-
@name.sub!(/\bpro compresso\b/i,
|
351
|
-
@name.sub!(/\bpro\b/i,
|
352
|
-
@name.sub!(/ Q\.S\. /i,
|
353
|
-
@name.sub!(/\s+\bpro$/i,
|
336
|
+
@name.sub!(/\baqua\b/i, "aqua")
|
337
|
+
@name.sub!(/\bDER\b/i, "DER")
|
338
|
+
@name.sub!(/\bad pulverem\b/i, "ad pulverem")
|
339
|
+
@name.sub!(/\bad iniectabilia\b/i, "ad iniectabilia")
|
340
|
+
@name.sub!(/\bad suspensionem\b/i, "ad suspensionem")
|
341
|
+
@name.sub!(/\bad solutionem\b/i, "ad solutionem")
|
342
|
+
@name.sub!(/\bpro compresso\b/i, "pro compresso")
|
343
|
+
@name.sub!(/\bpro\b/i, "pro")
|
344
|
+
@name.sub!(/ Q\.S\. /i, " q.s. ")
|
345
|
+
@name.sub!(/\s+\bpro$/i, "")
|
354
346
|
@dose = dose if dose
|
355
347
|
@salts = []
|
356
348
|
end
|
349
|
+
|
357
350
|
def qty
|
358
|
-
return @dose.qty_range if @dose
|
351
|
+
return @dose.qty_range if @dose&.qty_range
|
359
352
|
@dose ? @dose.qty : @qty
|
360
353
|
end
|
354
|
+
|
361
355
|
def unit
|
362
356
|
return @unit if @unit
|
363
357
|
@dose ? @dose.unit : @unit
|
364
358
|
end
|
359
|
+
|
365
360
|
def to_string
|
366
361
|
s = "#{@name}:"
|
367
362
|
s = " #{@qty}" if @qty
|
@@ -372,51 +367,54 @@ class ParseSubstance
|
|
372
367
|
end
|
373
368
|
|
374
369
|
class ParseComposition
|
375
|
-
attr_accessor
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
|
381
|
-
|
382
|
-
|
383
|
-
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
|
388
|
-
|
389
|
-
|
390
|
-
|
391
|
-
|
392
|
-
|
393
|
-
|
370
|
+
attr_accessor :source, :label, :label_description, :substances, :galenic_form, :route_of_administration,
|
371
|
+
:corresp, :excipiens
|
372
|
+
|
373
|
+
ERRORS_TO_FIX = {
|
374
|
+
/(\d+)\s+-\s*(\d+)/ => '\1-\2',
|
375
|
+
"o.1" => "0.1",
|
376
|
+
/\s+(mg|g) DER:/ => ' \1, DER:',
|
377
|
+
" mind. " => " min. ",
|
378
|
+
" streptococci pyogen. " => " streptococci pyogen ",
|
379
|
+
" ut excipiens" => ", excipiens",
|
380
|
+
" Corresp. " => " corresp. ",
|
381
|
+
",," => ",",
|
382
|
+
"avena elatior,dactylis glomerata" => "avena elatior, dactylis glomerata",
|
383
|
+
" color.: corresp. " => " corresp.",
|
384
|
+
/ U\.: (excipiens) / => ' U. \1 ',
|
385
|
+
/ U\.: (alnus|betula|betula|betulae) / => ' U., \1 ',
|
386
|
+
/^(acari allergeni extractum (\(acarus siro\)|).+\s+U\.:)/ => 'A): \1',
|
387
|
+
"Solvens: alprostadilum" => "alprostadilum"
|
388
|
+
}
|
389
|
+
@@error_handler = ParseUtil::HandleSwissmedicErrors.new(ERRORS_TO_FIX)
|
394
390
|
|
395
391
|
def initialize(source)
|
396
392
|
@substances ||= []
|
397
393
|
puts "ParseComposition.new from #{source.inspect} @substances #{@substances.inspect}" if VERBOSE_MESSAGES
|
398
394
|
@source = source.to_s
|
399
395
|
end
|
400
|
-
|
401
|
-
|
396
|
+
|
397
|
+
def self.reset
|
398
|
+
@@error_handler = ParseUtil::HandleSwissmedicErrors.new(ERRORS_TO_FIX)
|
402
399
|
end
|
403
|
-
|
404
|
-
|
400
|
+
|
401
|
+
def self.report
|
402
|
+
@@error_handler.report
|
405
403
|
end
|
406
|
-
|
407
|
-
|
408
|
-
|
404
|
+
|
405
|
+
def self.from_string(string)
|
406
|
+
return nil if string.nil? || string.eql?(".") || string.eql?("")
|
407
|
+
stripped = string.gsub(/^"|["\n]+$/, "")
|
409
408
|
return nil unless stripped
|
410
|
-
if /(U\.I\.|U\.)$/.match(stripped)
|
411
|
-
|
409
|
+
cleaned = if /(U\.I\.|U\.)$/.match?(stripped)
|
410
|
+
stripped
|
412
411
|
else
|
413
|
-
|
412
|
+
stripped.sub(/\.+$/, "")
|
414
413
|
end
|
415
|
-
value = nil
|
416
414
|
puts "ParseComposition.from_string #{string}" if VERBOSE_MESSAGES # /ng-tr/.match(Socket.gethostbyname(Socket.gethostname).first)
|
417
415
|
|
418
|
-
cleaned = @@
|
419
|
-
puts "ParseComposition.new cleaned #{cleaned}" if VERBOSE_MESSAGES
|
416
|
+
cleaned = @@error_handler.apply_fixes(cleaned)
|
417
|
+
puts "ParseComposition.new cleaned #{cleaned}" if VERBOSE_MESSAGES && !cleaned.eql?(stripped)
|
420
418
|
CompositionTransformer.clear_substances
|
421
419
|
result = ParseComposition.new(cleaned)
|
422
420
|
parser = CompositionParser.new
|
@@ -430,8 +428,8 @@ class ParseComposition
|
|
430
428
|
ast = transf.apply(parser.parse(cleaned))
|
431
429
|
end
|
432
430
|
rescue Parslet::ParseFailed => error
|
433
|
-
@@
|
434
|
-
puts "#{File.basename(__FILE__)}:#{__LINE__}: failed parsing ==> #{cleaned}"
|
431
|
+
@@error_handler.nrParsingErrors += 1
|
432
|
+
puts "#{File.basename(__FILE__)}:#{__LINE__}: failed parsing ==> #{cleaned} #{error}"
|
435
433
|
return nil
|
436
434
|
end
|
437
435
|
result.source = string
|
@@ -441,50 +439,46 @@ class ParseComposition
|
|
441
439
|
result.substances = CompositionTransformer.substances
|
442
440
|
result.excipiens = CompositionTransformer.excipiens
|
443
441
|
result.corresp = CompositionTransformer.corresp if CompositionTransformer.corresp
|
444
|
-
if result
|
445
|
-
pro_qty = "/#{result.excipiens.qty} #{result.excipiens.unit}".sub(/\/1\s+/,
|
446
|
-
result.substances.each {
|
447
|
-
|
448
|
-
|
449
|
-
|
450
|
-
substance.dose.unit = "#{substance.dose.unit}#{pro_qty}" if substance.unit and not substance.unit.eql?(result.excipiens.unit)
|
442
|
+
if result&.excipiens&.unit
|
443
|
+
pro_qty = "/#{result.excipiens.qty} #{result.excipiens.unit}".sub(/\/1\s+/, "/")
|
444
|
+
result.substances.each { |substance|
|
445
|
+
next unless substance.is_a?(ParseSubstance)
|
446
|
+
substance.chemical_substance.unit = "#{substance.chemical_substance.unit}#{pro_qty}" if substance.chemical_substance
|
447
|
+
substance.dose.unit = "#{substance.dose.unit}#{pro_qty}" if substance.unit && !substance.unit.eql?(result.excipiens.unit)
|
451
448
|
}
|
452
449
|
end
|
453
|
-
if ast.is_a?(Array)
|
450
|
+
if ast.is_a?(Array) && ast.first.is_a?(Hash)
|
454
451
|
label = ast.first[:label].to_s if ast.first[:label]
|
455
452
|
label_description = ast.first[:label_description].to_s if ast.first[:label_description]
|
456
|
-
elsif ast
|
457
|
-
label = ast[:label].to_s if
|
453
|
+
elsif ast&.is_a?(Hash)
|
454
|
+
label = ast[:label].to_s if ast[:label]
|
458
455
|
label_description = ast[:label_description].to_s if ast[:label_description]
|
459
456
|
end
|
460
457
|
if label
|
461
|
-
if label
|
462
|
-
result.label
|
458
|
+
if label && !/((A|B|C|D|E|I|II|III|IV|\)+)\s+et\s+(A|B|C|D|E|I|II|III|IV|\))+)/.match(label)
|
459
|
+
result.label = label
|
463
460
|
end
|
464
|
-
result.label_description = label_description.gsub(/:+$/,
|
461
|
+
result.label_description = label_description.gsub(/:+$/, "").strip if label_description
|
465
462
|
end
|
466
|
-
result.corresp = ast[:corresp].to_s.sub(/:\s+/,
|
467
|
-
|
463
|
+
result.corresp = ast[:corresp].to_s.sub(/:\s+/, "") if !result.corresp && ast.is_a?(Hash) && ast[:corresp]
|
464
|
+
result
|
468
465
|
end
|
469
466
|
end
|
470
467
|
|
471
468
|
class GalenicFormTransformer < CompositionTransformer
|
472
|
-
|
473
|
-
|
474
|
-
|
475
|
-
|
476
|
-
|
477
|
-
|
478
|
-
name = dictionary[:preparation_name] ? dictionary[:preparation_name].to_s : nil
|
479
|
-
form = dictionary[:galenic_form] ? dictionary[:galenic_form].to_s : nil
|
480
|
-
# name, form
|
469
|
+
rule(preparation_name: simple(:preparation_name),
|
470
|
+
galenic_form: simple(:preparation_name)) { |dictionary|
|
471
|
+
puts "#{File.basename(__FILE__)}:#{__LINE__}: dictionary #{dictionary}" if VERBOSE_MESSAGES
|
472
|
+
dictionary[:preparation_name] ? dictionary[:preparation_name].to_s : nil
|
473
|
+
dictionary[:galenic_form] ? dictionary[:galenic_form].to_s : nil
|
474
|
+
# name, form
|
481
475
|
}
|
482
476
|
end
|
483
477
|
|
484
478
|
class ParseGalenicForm
|
485
|
-
def
|
486
|
-
return nil if string
|
487
|
-
stripped = string.gsub(/^"|["\n]+$/,
|
479
|
+
def self.from_string(string)
|
480
|
+
return nil if string.nil?
|
481
|
+
stripped = string.gsub(/^"|["\n]+$/, "")
|
488
482
|
return nil unless stripped
|
489
483
|
puts "ParseGalenicForm.from_string #{string}" if VERBOSE_MESSAGES # /ng-tr/.match(Socket.gethostbyname(Socket.gethostname).first)
|
490
484
|
|
@@ -499,13 +493,13 @@ class ParseGalenicForm
|
|
499
493
|
ast = transf.apply(parser.parse(string))
|
500
494
|
end
|
501
495
|
rescue Parslet::ParseFailed => error
|
502
|
-
@@
|
503
|
-
puts "#{File.basename(__FILE__)}:#{__LINE__}: failed parsing ==> #{string}"
|
496
|
+
@@error_handler.nrParsingErrors += 1
|
497
|
+
puts "#{File.basename(__FILE__)}:#{__LINE__}: failed parsing ==> #{string} #{error}"
|
504
498
|
return nil
|
505
499
|
end
|
506
500
|
return [] unless ast
|
507
|
-
form = ast[:galenic_form] ? ast[:galenic_form].to_s.sub(/^\/\s+/,
|
501
|
+
form = ast[:galenic_form] ? ast[:galenic_form].to_s.sub(/^\/\s+/, "") : nil
|
508
502
|
name = ast[:prepation_name] ? ast[:prepation_name].to_s.strip : nil
|
509
|
-
|
503
|
+
[name, form]
|
510
504
|
end
|
511
505
|
end
|