oddb2xml 2.7.1 → 2.7.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +1 -1
- data/.standard.yml +2 -0
- data/Gemfile +3 -3
- data/History.txt +8 -0
- data/README.md +1 -1
- data/Rakefile +24 -23
- data/bin/check_artikelstamm +11 -11
- data/bin/compare_v5 +23 -23
- data/bin/oddb2xml +14 -13
- data/lib/oddb2xml.rb +1 -1
- data/lib/oddb2xml/builder.rb +1070 -1038
- data/lib/oddb2xml/calc.rb +232 -233
- data/lib/oddb2xml/chapter_70_hack.rb +38 -32
- data/lib/oddb2xml/cli.rb +252 -236
- data/lib/oddb2xml/compare.rb +70 -59
- data/lib/oddb2xml/compositions_syntax.rb +448 -430
- data/lib/oddb2xml/compressor.rb +20 -20
- data/lib/oddb2xml/downloader.rb +153 -127
- data/lib/oddb2xml/extractor.rb +302 -289
- data/lib/oddb2xml/options.rb +34 -35
- data/lib/oddb2xml/parslet_compositions.rb +263 -269
- data/lib/oddb2xml/semantic_check.rb +39 -33
- data/lib/oddb2xml/util.rb +163 -163
- data/lib/oddb2xml/version.rb +1 -1
- data/lib/oddb2xml/xml_definitions.rb +32 -33
- data/oddb2xml.gemspec +31 -32
- data/spec/artikelstamm_spec.rb +111 -110
- data/spec/builder_spec.rb +489 -505
- data/spec/calc_spec.rb +552 -593
- data/spec/check_artikelstamm_spec.rb +26 -26
- data/spec/cli_spec.rb +173 -174
- data/spec/compare_spec.rb +9 -11
- data/spec/composition_syntax_spec.rb +390 -409
- data/spec/compressor_spec.rb +48 -48
- data/spec/data/transfer.dat +1 -0
- data/spec/data_helper.rb +47 -49
- data/spec/downloader_spec.rb +247 -260
- data/spec/extractor_spec.rb +171 -159
- data/spec/galenic_spec.rb +233 -256
- data/spec/options_spec.rb +116 -119
- data/spec/parslet_spec.rb +833 -861
- data/spec/spec_helper.rb +154 -153
- data/test_options.rb +39 -42
- data/tools/win_fetch_cacerts.rb +2 -3
- metadata +19 -3
data/lib/oddb2xml/extractor.rb
CHANGED
@@ -1,11 +1,10 @@
|
|
1
|
-
|
2
|
-
require
|
3
|
-
require
|
4
|
-
require
|
5
|
-
require
|
6
|
-
require
|
7
|
-
require
|
8
|
-
require 'oddb2xml/xml_definitions'
|
1
|
+
require "nokogiri"
|
2
|
+
require "spreadsheet"
|
3
|
+
require "stringio"
|
4
|
+
require "rubyXL"
|
5
|
+
require "rubyXL/convenience_methods/workbook"
|
6
|
+
require "csv"
|
7
|
+
require "oddb2xml/xml_definitions"
|
9
8
|
|
10
9
|
module Oddb2xml
|
11
10
|
module TxtExtractorMethods
|
@@ -13,23 +12,26 @@ module Oddb2xml
|
|
13
12
|
Oddb2xml.log("TxtExtractorMethods #{str} #{str.to_s.size} bytes")
|
14
13
|
@io = StringIO.new(str)
|
15
14
|
end
|
15
|
+
|
16
16
|
def to_hash
|
17
17
|
data = {}
|
18
|
-
while line = @io.gets
|
19
|
-
next unless
|
20
|
-
ean13 = line.chomp.
|
18
|
+
while (line = @io.gets)
|
19
|
+
next unless /\d{13}/.match?(line)
|
20
|
+
ean13 = line.chomp.delete("\"")
|
21
21
|
data[ean13] = true
|
22
22
|
end
|
23
23
|
data
|
24
24
|
end
|
25
25
|
end
|
26
|
+
|
26
27
|
class Extractor
|
27
28
|
attr_accessor :xml
|
28
29
|
def initialize(xml)
|
29
|
-
Oddb2xml.log("Extractor #{xml
|
30
|
+
Oddb2xml.log("Extractor #{xml} xml #{xml.size} bytes")
|
30
31
|
@xml = xml
|
31
32
|
end
|
32
33
|
end
|
34
|
+
|
33
35
|
class LppvExtractor < Extractor
|
34
36
|
include TxtExtractorMethods
|
35
37
|
end
|
@@ -37,156 +39,152 @@ module Oddb2xml
|
|
37
39
|
class BagXmlExtractor < Extractor
|
38
40
|
def to_hash
|
39
41
|
data = {}
|
40
|
-
result = PreparationsEntry.parse(@xml.sub(
|
42
|
+
result = PreparationsEntry.parse(@xml.sub(STRIP_FOR_SAX_MACHINE, ""), lazy: true)
|
41
43
|
result.Preparations.Preparation.each do |seq|
|
42
|
-
if seq.SwissmedicNo5.eql?(
|
44
|
+
if seq.SwissmedicNo5.eql?("0")
|
43
45
|
puts "BagXmlExtractor Skipping SwissmedicNo5 0 for #{seq.NameDe} #{seq.DescriptionDe} #{seq.CommentDe}"
|
44
46
|
next
|
45
47
|
end
|
46
48
|
item = {}
|
47
|
-
item[:data_origin]
|
48
|
-
item[:refdata]
|
49
|
-
item[:product_key]
|
50
|
-
item[:desc_de]
|
51
|
-
item[:desc_fr]
|
52
|
-
item[:name_de]
|
53
|
-
item[:name_fr]
|
54
|
-
item[:swissmedic_number5] = (num5 = seq.SwissmedicNo5) ?
|
55
|
-
item[:org_gen_code] = (orgc = seq.OrgGenCode)
|
56
|
-
item[:deductible]
|
57
|
-
item[:atc_code]
|
58
|
-
item[:comment_de]
|
59
|
-
item[:comment_fr]
|
60
|
-
item[:it_code]
|
49
|
+
item[:data_origin] = "bag_xml"
|
50
|
+
item[:refdata] = true
|
51
|
+
item[:product_key] = seq.ProductCommercial
|
52
|
+
item[:desc_de] = (desc = seq.DescriptionDe) ? desc : ""
|
53
|
+
item[:desc_fr] = (desc = seq.DescriptionFr) ? desc : ""
|
54
|
+
item[:name_de] = (name = seq.NameDe) ? name : ""
|
55
|
+
item[:name_fr] = (name = seq.NameFr) ? name : ""
|
56
|
+
item[:swissmedic_number5] = (num5 = seq.SwissmedicNo5) ? num5.rjust(5, "0") : ""
|
57
|
+
item[:org_gen_code] = (orgc = seq.OrgGenCode) ? orgc : ""
|
58
|
+
item[:deductible] = (ddbl = seq.FlagSB20) ? ddbl : ""
|
59
|
+
item[:atc_code] = (atcc = seq.AtcCode) ? atcc : ""
|
60
|
+
item[:comment_de] = (info = seq.CommentDe) ? info : ""
|
61
|
+
item[:comment_fr] = (info = seq.CommentFr) ? info : ""
|
62
|
+
item[:it_code] = ""
|
61
63
|
seq.ItCodes.ItCode.each do |itc|
|
62
64
|
if item[:it_code].to_s.empty?
|
63
65
|
it_code = itc.Code.to_s
|
64
|
-
item[:it_code] =
|
66
|
+
item[:it_code] = /(\d+)\.(\d+)\.(\d+)./.match?(it_code) ? it_code : ""
|
65
67
|
end
|
66
68
|
end
|
67
69
|
item[:substances] = []
|
68
70
|
seq.Substances.Substance.each_with_index do |sub, i|
|
69
71
|
item[:substances] << {
|
70
|
-
:
|
71
|
-
:
|
72
|
-
:
|
73
|
-
:
|
72
|
+
index: i.to_s,
|
73
|
+
name: (name = sub.DescriptionLa) ? name : "",
|
74
|
+
quantity: (qtty = sub.Quantity) ? qtty : "",
|
75
|
+
unit: (unit = sub.QuantityUnit) ? unit : ""
|
74
76
|
}
|
75
77
|
end
|
76
78
|
item[:pharmacodes] = []
|
77
|
-
item[:packages]
|
79
|
+
item[:packages] = {} # pharmacode => package
|
78
80
|
seq.Packs.Pack.each do |pac|
|
79
81
|
if pac.SwissmedicNo8 && pac.SwissmedicNo8.length < 8
|
80
82
|
puts "BagXmlExtractor: Adding leading zeros for SwissmedicNo8 #{pac.SwissmedicNo8} BagDossierNo #{pac.BagDossierNo} PackId #{pac.PackId} #{item[:name_de]}" if $VERBOSE
|
81
|
-
pac.SwissmedicNo8
|
83
|
+
pac.SwissmedicNo8 = pac.SwissmedicNo8.rjust(8, "0")
|
82
84
|
end
|
83
85
|
unless pac.GTIN
|
84
|
-
|
86
|
+
if pac.SwissmedicNo8
|
87
|
+
ean12 = "7680" + pac.SwissmedicNo8
|
88
|
+
pac.GTIN = (ean12 + Oddb2xml.calc_checksum(ean12)) unless @artikelstamm
|
89
|
+
# puts "BagXmlExtractor: Missing GTIN in SwissmedicNo8 #{pac.SwissmedicNo8} BagDossierNo #{pac.BagDossierNo} PackId #{pac.PackId} #{item[:name_de]}."
|
90
|
+
else
|
85
91
|
puts "BagXmlExtractor: Missing GTIN and SwissmedicNo8 in SwissmedicNo8 #{pac.SwissmedicNo8} BagDossierNo #{pac.BagDossierNo} PackId #{pac.PackId} #{item[:name_de]}"
|
86
92
|
next
|
87
|
-
else
|
88
|
-
ean12 = '7680' + pac.SwissmedicNo8
|
89
|
-
pac.GTIN = (ean12 + Oddb2xml.calc_checksum(ean12)) unless @artikelstamm
|
90
|
-
# puts "BagXmlExtractor: Missing GTIN in SwissmedicNo8 #{pac.SwissmedicNo8} BagDossierNo #{pac.BagDossierNo} PackId #{pac.PackId} #{item[:name_de]}."
|
91
93
|
end
|
92
94
|
end
|
93
95
|
ean13 = pac.GTIN.to_s
|
94
96
|
Oddb2xml.setEan13forNo8(pac.SwissmedicNo8, ean13) if pac.SwissmedicNo8
|
95
97
|
# packages
|
96
|
-
exf = {:
|
97
|
-
if pac
|
98
|
-
exf[:price]
|
99
|
-
exf[:valid_date] =
|
100
|
-
exf[:price_code] =
|
98
|
+
exf = {price: "", valid_date: "", price_code: ""}
|
99
|
+
if pac&.Prices&.ExFactoryPrice
|
100
|
+
exf[:price] = pac.Prices.ExFactoryPrice.Price if pac.Prices.ExFactoryPrice.Price
|
101
|
+
exf[:valid_date] = pac.Prices.ExFactoryPrice.ValidFromDate if pac.Prices.ExFactoryPrice.ValidFromDate
|
102
|
+
exf[:price_code] = pac.Prices.ExFactoryPrice.PriceTypeCode if pac.Prices.ExFactoryPrice.PriceTypeCode
|
101
103
|
end
|
102
|
-
pub = {:
|
103
|
-
if pac
|
104
|
-
pub[:price]
|
105
|
-
pub[:valid_date] =
|
106
|
-
pub[:price_code] =
|
104
|
+
pub = {price: "", valid_date: "", price_code: ""}
|
105
|
+
if pac&.Prices&.PublicPrice
|
106
|
+
pub[:price] = pac.Prices.PublicPrice.Price if pac.Prices.PublicPrice.Price
|
107
|
+
pub[:valid_date] = pac.Prices.PublicPrice.ValidFromDate if pac.Prices.PublicPrice.ValidFromDate
|
108
|
+
pub[:price_code] = pac.Prices.PublicPrice.PriceTypeCode if pac.Prices.PublicPrice.PriceTypeCode
|
107
109
|
end
|
108
110
|
item[:packages][ean13] = {
|
109
|
-
:
|
110
|
-
:
|
111
|
-
:
|
112
|
-
:
|
113
|
-
:
|
114
|
-
:
|
115
|
-
:
|
116
|
-
:
|
117
|
-
:
|
111
|
+
ean13: ean13,
|
112
|
+
name_de: (desc = seq.NameDe) ? desc : "",
|
113
|
+
name_fr: (desc = seq.NameFr) ? desc : "",
|
114
|
+
desc_de: (desc = pac.DescriptionDe) ? desc : "",
|
115
|
+
desc_fr: (desc = pac.DescriptionFr) ? desc : "",
|
116
|
+
sl_entry: true,
|
117
|
+
swissmedic_category: (cat = pac.SwissmedicCategory) ? cat : "",
|
118
|
+
swissmedic_number8: (num = pac.SwissmedicNo8) ? num : "",
|
119
|
+
prices: {exf_price: exf, pub_price: pub}
|
118
120
|
}
|
119
121
|
# related all limitations
|
120
122
|
item[:packages][ean13][:limitations] = []
|
121
|
-
limitations = Hash.new{|h,k| h[k] = [] }
|
122
|
-
if seq.Limitations
|
123
|
-
|
124
|
-
else
|
125
|
-
limitations[:seq] = nil
|
123
|
+
limitations = Hash.new { |h, k| h[k] = [] }
|
124
|
+
limitations[:seq] = if seq.Limitations
|
125
|
+
seq.Limitations.Limitation.collect { |x| x }
|
126
126
|
end
|
127
127
|
# in it-codes
|
128
|
-
if seq
|
128
|
+
if seq&.ItCodes && seq&.ItCodes&.ItCode
|
129
129
|
limitations[:itc] = []
|
130
|
-
seq.ItCodes.ItCode.each { |x|
|
130
|
+
seq.ItCodes.ItCode.each { |x| limitations[:itc] += x.Limitations.Limitation if x.Limitations.Limitation }
|
131
131
|
else
|
132
|
-
limitations[:itc] =nil
|
132
|
+
limitations[:itc] = nil
|
133
133
|
end
|
134
134
|
# in pac
|
135
|
-
if pac
|
136
|
-
|
137
|
-
else
|
138
|
-
limitations[:pac] = nil
|
135
|
+
limitations[:pac] = if pac && pac.Limitations
|
136
|
+
(lims = pac.Limitations.Limitation) ? lims.to_a : nil
|
139
137
|
end
|
140
138
|
limitations.each_pair do |lim_key, lims|
|
141
|
-
key =
|
142
|
-
id
|
139
|
+
key = ""
|
140
|
+
id = ""
|
143
141
|
case lim_key
|
144
142
|
when :seq, :itc
|
145
143
|
key = :swissmedic_number5
|
146
|
-
id
|
144
|
+
id = item[key].to_s
|
147
145
|
when :pac
|
148
146
|
key = :swissmedic_number8
|
149
|
-
id
|
147
|
+
id = item[:packages][ean13][key].to_s
|
150
148
|
end
|
151
|
-
if id.empty? && item[:packages][ean13][
|
149
|
+
if id.empty? && item[:packages][ean13][:swissmedic_number8]
|
152
150
|
key = :swissmedic_number8
|
153
|
-
id
|
151
|
+
id = item[:packages][ean13][key].to_s
|
154
152
|
end
|
155
|
-
lims
|
153
|
+
lims&.each do |lim|
|
156
154
|
limitation = {
|
157
|
-
:
|
158
|
-
:
|
159
|
-
:
|
160
|
-
:
|
161
|
-
:
|
162
|
-
:
|
163
|
-
:
|
164
|
-
:
|
165
|
-
:
|
166
|
-
:
|
155
|
+
it: item[:it_code],
|
156
|
+
key: key,
|
157
|
+
id: id,
|
158
|
+
code: (lic = lim.LimitationCode) ? lic : "",
|
159
|
+
type: (lit = lim.LimitationType) ? lit : "",
|
160
|
+
value: (liv = lim.LimitationValue) ? liv : "",
|
161
|
+
niv: (niv = lim.LimitationNiveau) ? niv : "",
|
162
|
+
desc_de: (dsc = lim.DescriptionDe) ? dsc : "",
|
163
|
+
desc_fr: (dsc = lim.DescriptionFr) ? dsc : "",
|
164
|
+
vdate: (dat = lim.ValidFromDate) ? dat : ""
|
167
165
|
}
|
168
166
|
deleted = false
|
169
|
-
if upto = ((thr = lim.ValidThruDate) ? thr : nil)
|
170
|
-
upto =~ /\d{2}\.\d{2}\.\d{2}/
|
167
|
+
if (upto = ((thr = lim.ValidThruDate) ? thr : nil)) &&
|
168
|
+
upto =~ (/\d{2}\.\d{2}\.\d{2}/)
|
171
169
|
begin
|
172
|
-
deleted = true if Date.strptime(upto,
|
170
|
+
deleted = true if Date.strptime(upto, "%d.%m.%y") >= Date.today
|
173
171
|
rescue ArgumentError
|
174
172
|
end
|
175
173
|
end
|
176
174
|
limitation[:del] = deleted
|
177
175
|
item[:packages][ean13][:limitations] << limitation
|
178
|
-
end
|
176
|
+
end
|
179
177
|
end
|
180
178
|
# limitation points
|
181
179
|
pts = pac.PointLimitations.PointLimitation.first # only first points
|
182
|
-
item[:packages][ean13][:limitation_points] = pts ? pts.Points :
|
180
|
+
item[:packages][ean13][:limitation_points] = pts ? pts.Points : ""
|
183
181
|
if pac.SwissmedicNo8
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
182
|
+
ean12 = "7680" + pac.SwissmedicNo8
|
183
|
+
correct_ean13 = ean12 + Oddb2xml.calc_checksum(ean12)
|
184
|
+
unless pac.GTIN.eql?(correct_ean13)
|
185
|
+
puts "pac.GTIN #{pac.GTIN} should be #{correct_ean13}"
|
186
|
+
item[:packages][ean13][:CORRECT_EAN13] = correct_ean13
|
187
|
+
end
|
190
188
|
end
|
191
189
|
data[ean13] = item
|
192
190
|
end
|
@@ -197,58 +195,60 @@ module Oddb2xml
|
|
197
195
|
|
198
196
|
class RefdataExtractor < Extractor
|
199
197
|
def initialize(xml, type)
|
200
|
-
@type = (type == :pharma ?
|
198
|
+
@type = (type == :pharma ? "PHARMA" : "NONPHARMA")
|
201
199
|
super(xml)
|
202
200
|
end
|
201
|
+
|
203
202
|
def to_hash
|
204
203
|
data = {}
|
205
|
-
result = SwissRegArticleEntry.parse(@xml.sub(
|
204
|
+
result = SwissRegArticleEntry.parse(@xml.sub(STRIP_FOR_SAX_MACHINE, ""), lazy: true)
|
206
205
|
items = result.ARTICLE.ITEM
|
207
206
|
items.each do |pac|
|
208
|
-
ean13 = (gtin = pac.GTIN.to_s) ? gtin:
|
207
|
+
ean13 = (gtin = pac.GTIN.to_s) ? gtin : "0"
|
209
208
|
if ean13.size < 13
|
210
209
|
puts "Refdata #{@type} use 13 chars not #{ean13.size} for #{ean13}" if $VERBOSE
|
211
|
-
ean13 = ean13.rjust(13,
|
210
|
+
ean13 = ean13.rjust(13, "0")
|
212
211
|
end
|
213
|
-
if ean13.size == 14 && ean13[0] ==
|
212
|
+
if ean13.size == 14 && ean13[0] == "0"
|
214
213
|
puts "Refdata #{@type} remove leading '0' for #{ean13}" if $VERBOSE
|
215
214
|
ean13 = ean13[1..-1]
|
216
215
|
end
|
217
216
|
# but in refdata_nonPharma we have a about 700 GTINs which are 14 characters and longer
|
218
217
|
item = {}
|
219
|
-
item[:ean13]
|
220
|
-
item[:no8]
|
221
|
-
item[:data_origin]
|
222
|
-
item[:refdata]
|
223
|
-
item[:_type]
|
224
|
-
item[:last_change]
|
225
|
-
item[:desc_de]
|
226
|
-
item[:desc_fr]
|
227
|
-
item[:atc_code]
|
228
|
-
item[:company_name] = (nam = pac.AUTH_HOLDER_NAME) ? nam:
|
229
|
-
item[:company_ean]
|
218
|
+
item[:ean13] = ean13
|
219
|
+
item[:no8] = pac.SWMC_AUTHNR
|
220
|
+
item[:data_origin] = "refdata"
|
221
|
+
item[:refdata] = true
|
222
|
+
item[:_type] = (typ = pac.ATYPE.downcase.to_sym) ? typ : ""
|
223
|
+
item[:last_change] = (date = Time.parse(pac.DT).to_s) ? date : "" # Date and time of last data change
|
224
|
+
item[:desc_de] = (dscr = pac.NAME_DE) ? dscr : ""
|
225
|
+
item[:desc_fr] = (dscr = pac.NAME_FR) ? dscr : ""
|
226
|
+
item[:atc_code] = (code = pac.ATC) ? code.to_s : ""
|
227
|
+
item[:company_name] = (nam = pac.AUTH_HOLDER_NAME) ? nam : ""
|
228
|
+
item[:company_ean] = (gln = pac.AUTH_HOLDER_GLN) ? gln : ""
|
230
229
|
data[item[:ean13]] = item
|
231
230
|
end
|
232
231
|
data
|
233
232
|
end
|
234
233
|
end
|
234
|
+
|
235
235
|
class SwissmedicExtractor < Extractor
|
236
236
|
def initialize(filename, type)
|
237
|
-
@filename = File.join(
|
238
|
-
@filename = File.join(SpecData, File.basename(filename)) if defined?(RSpec)
|
239
|
-
@type
|
237
|
+
@filename = File.join(DOWNLOADS, File.basename(filename))
|
238
|
+
@filename = File.join(SpecData, File.basename(filename)) if defined?(RSpec) && !File.exist?(@filename)
|
239
|
+
@type = type
|
240
240
|
Oddb2xml.log("SwissmedicExtractor #{@filename} #{File.size(@filename)} bytes")
|
241
|
-
return unless File.
|
241
|
+
return unless File.exist?(@filename)
|
242
242
|
@sheet = RubyXL::Parser.parse(File.expand_path(@filename)).worksheets[0]
|
243
243
|
end
|
244
|
+
|
244
245
|
def to_arry
|
245
246
|
data = []
|
246
247
|
return data unless @sheet
|
247
248
|
case @type
|
248
249
|
when :orphan
|
249
|
-
i = 1
|
250
250
|
col_zulassung = 6
|
251
|
-
raise "Could not find Zulassungsnummer in column #{col_zulassung} of #{@filename}" unless /Zulassungs.*nummer/.match(@sheet[3][col_zulassung].value)
|
251
|
+
raise "Could not find Zulassungsnummer in column #{col_zulassung} of #{@filename}" unless /Zulassungs.*nummer/.match?(@sheet[3][col_zulassung].value)
|
252
252
|
@sheet.each do |row|
|
253
253
|
next unless row[col_zulassung]
|
254
254
|
number = row[col_zulassung].value.to_i
|
@@ -262,24 +262,25 @@ module Oddb2xml
|
|
262
262
|
data.uniq
|
263
263
|
end
|
264
264
|
|
265
|
-
|
265
|
+
# Packungen.xlsx COLUMNS_FEBRUARY_2019
|
266
|
+
def to_hash
|
266
267
|
data = {}
|
267
268
|
return data unless @sheet
|
268
269
|
case @type
|
269
270
|
when :package
|
270
271
|
Oddb2xml.check_column_indices(@sheet)
|
271
|
-
ith
|
272
|
-
iksnr
|
273
|
-
seq_name
|
274
|
-
i_3
|
275
|
-
seqnr
|
276
|
-
cat
|
277
|
-
siz
|
278
|
-
atc
|
272
|
+
ith = COLUMNS_FEBRUARY_2019.keys.index(:index_therapeuticus)
|
273
|
+
iksnr = COLUMNS_FEBRUARY_2019.keys.index(:iksnr)
|
274
|
+
seq_name = COLUMNS_FEBRUARY_2019.keys.index(:name_base)
|
275
|
+
i_3 = COLUMNS_FEBRUARY_2019.keys.index(:ikscd)
|
276
|
+
seqnr = COLUMNS_FEBRUARY_2019.keys.index(:seqnr)
|
277
|
+
cat = COLUMNS_FEBRUARY_2019.keys.index(:ikscat)
|
278
|
+
siz = COLUMNS_FEBRUARY_2019.keys.index(:size)
|
279
|
+
atc = COLUMNS_FEBRUARY_2019.keys.index(:atc_class)
|
279
280
|
list_code = COLUMNS_FEBRUARY_2019.keys.index(:production_science)
|
280
|
-
eht
|
281
|
-
sub
|
282
|
-
comp
|
281
|
+
eht = COLUMNS_FEBRUARY_2019.keys.index(:unit)
|
282
|
+
sub = COLUMNS_FEBRUARY_2019.keys.index(:substances)
|
283
|
+
comp = COLUMNS_FEBRUARY_2019.keys.index(:composition)
|
283
284
|
|
284
285
|
# production_science Heilmittelcode, possible values are
|
285
286
|
# Allergene
|
@@ -299,42 +300,41 @@ module Oddb2xml
|
|
299
300
|
# Tierarzneimittel
|
300
301
|
# Transplantat: Gewebeprodukt
|
301
302
|
@sheet.each_with_index do |row, i|
|
302
|
-
|
303
|
-
next
|
304
|
-
next unless row
|
305
|
-
|
306
|
-
no8 = sprintf('%05d',row[iksnr].value.to_i) + sprintf('%03d',row[i_3].value.to_i)
|
303
|
+
next if i <= 1
|
304
|
+
next unless row && row[iksnr] && row[i_3]
|
305
|
+
next unless (row[iksnr].value.to_i > 0) && (row[i_3].value.to_i > 0)
|
306
|
+
no8 = sprintf("%05d", row[iksnr].value.to_i) + sprintf("%03d", row[i_3].value.to_i)
|
307
307
|
unless no8.empty?
|
308
308
|
next if no8.to_i == 0
|
309
309
|
ean_base12 = "7680#{no8}"
|
310
|
-
prodno =
|
311
|
-
ean13 = (ean_base12.ljust(12,
|
310
|
+
prodno = Oddb2xml.gen_prodno(row[iksnr].value.to_i, row[seqnr].value.to_i)
|
311
|
+
ean13 = (ean_base12.ljust(12, "0") + Oddb2xml.calc_checksum(ean_base12))
|
312
312
|
Oddb2xml.setEan13forProdno(prodno, ean13)
|
313
313
|
Oddb2xml.setEan13forNo8(no8, ean13)
|
314
314
|
data[no8] = {
|
315
|
-
:
|
316
|
-
:
|
317
|
-
:
|
318
|
-
:
|
319
|
-
:
|
320
|
-
:
|
321
|
-
:
|
322
|
-
:
|
323
|
-
:
|
324
|
-
:
|
325
|
-
:
|
326
|
-
:
|
327
|
-
:
|
328
|
-
:
|
329
|
-
:
|
330
|
-
:
|
331
|
-
:
|
332
|
-
:
|
333
|
-
:
|
334
|
-
:
|
335
|
-
:
|
336
|
-
:
|
337
|
-
:
|
315
|
+
iksnr: row[iksnr].value.to_i,
|
316
|
+
no8: no8,
|
317
|
+
ean13: ean13,
|
318
|
+
prodno: prodno,
|
319
|
+
seqnr: row[seqnr].value,
|
320
|
+
ith_swissmedic: row[ith] ? row[ith].value.to_s : "",
|
321
|
+
swissmedic_category: row[cat].value.to_s,
|
322
|
+
atc_code: row[atc] ? Oddb2xml.add_epha_changes_for_ATC(row[iksnr].value.to_s, row[atc].value.to_s) : "",
|
323
|
+
list_code: row[list_code] ? row[list_code].value.to_s : "",
|
324
|
+
package_size: row[siz] ? row[siz].value.to_s : "",
|
325
|
+
einheit_swissmedic: row[eht] ? row[eht].value.to_s : "",
|
326
|
+
substance_swissmedic: row[sub] ? row[sub].value.to_s : "",
|
327
|
+
composition_swissmedic: row[comp] ? row[comp].value.to_s : "",
|
328
|
+
sequence_name: row[seq_name] ? row[seq_name].value.to_s : "",
|
329
|
+
is_tier: (row[list_code] == "Tierarzneimittel"),
|
330
|
+
gen_production: row[COLUMNS_FEBRUARY_2019.keys.index(:gen_production)].value.to_s,
|
331
|
+
insulin_category: row[COLUMNS_FEBRUARY_2019.keys.index(:insulin_category)].value.to_s,
|
332
|
+
drug_index: row[COLUMNS_FEBRUARY_2019.keys.index(:drug_index)].value.to_s,
|
333
|
+
data_origin: "swissmedic_package",
|
334
|
+
expiry_date: row[COLUMNS_FEBRUARY_2019.keys.index(:expiry_date)].value.to_s,
|
335
|
+
company_name: row[COLUMNS_FEBRUARY_2019.keys.index(:company)].value.to_s,
|
336
|
+
size: row[COLUMNS_FEBRUARY_2019.keys.index(:size)].value.to_s,
|
337
|
+
unit: row[COLUMNS_FEBRUARY_2019.keys.index(:unit)].value.to_s
|
338
338
|
}
|
339
339
|
end
|
340
340
|
end
|
@@ -342,21 +342,26 @@ module Oddb2xml
|
|
342
342
|
cleanup_file
|
343
343
|
data
|
344
344
|
end
|
345
|
+
|
345
346
|
private
|
347
|
+
|
346
348
|
def cleanup_file
|
347
|
-
|
348
|
-
|
349
|
+
unless defined?(RSpec)
|
350
|
+
begin
|
351
|
+
File.unlink(@filename) if File.exist?(@filename)
|
349
352
|
rescue Errno::EACCES # Permission Denied on Windows
|
350
|
-
|
353
|
+
end
|
354
|
+
end
|
351
355
|
end
|
352
|
-
|
353
356
|
end
|
357
|
+
|
354
358
|
class MigelExtractor < Extractor
|
355
359
|
def initialize(bin)
|
356
360
|
Oddb2xml.log("MigelExtractor #{io} #{File.size(io)} bytes")
|
357
|
-
book = Spreadsheet.open(io,
|
361
|
+
book = Spreadsheet.open(io, "rb")
|
358
362
|
@sheet = book.worksheet(0)
|
359
363
|
end
|
364
|
+
|
360
365
|
def to_hash
|
361
366
|
data = {}
|
362
367
|
@sheet.each_with_index do |row, i|
|
@@ -366,15 +371,15 @@ module Oddb2xml
|
|
366
371
|
ean13 = row[0]
|
367
372
|
ean13 = phar unless ean13.to_s.length == 13
|
368
373
|
data[ean] = {
|
369
|
-
:
|
370
|
-
:
|
371
|
-
:
|
372
|
-
:
|
373
|
-
:
|
374
|
-
:
|
375
|
-
:
|
376
|
-
:
|
377
|
-
:
|
374
|
+
refdata: true,
|
375
|
+
ean13: ean13,
|
376
|
+
pharmacode: phar,
|
377
|
+
desc_de: row[3],
|
378
|
+
desc_fr: row[4],
|
379
|
+
quantity: row[5], # quantity
|
380
|
+
company_name: row[6],
|
381
|
+
company_ean: row[7],
|
382
|
+
data_origin: "migel"
|
378
383
|
}
|
379
384
|
end
|
380
385
|
data
|
@@ -383,26 +388,26 @@ module Oddb2xml
|
|
383
388
|
|
384
389
|
class SwissmedicInfoExtractor < Extractor
|
385
390
|
def to_hash
|
386
|
-
data = Hash.new{|h,k| h[k] = [] }
|
391
|
+
data = Hash.new { |h, k| h[k] = [] }
|
387
392
|
return data unless @xml.size > 0
|
388
|
-
result = MedicalInformationsContent.parse(@xml.sub(
|
393
|
+
result = MedicalInformationsContent.parse(@xml.sub(STRIP_FOR_SAX_MACHINE, ""), lazy: true)
|
389
394
|
result.medicalInformation.each do |pac|
|
390
395
|
lang = pac.lang.to_s
|
391
|
-
next unless
|
396
|
+
next unless /de|fr/.match?(lang)
|
392
397
|
item = {}
|
393
398
|
item[:refdata] = true
|
394
|
-
item[:data_origin] =
|
395
|
-
item[:name]
|
396
|
-
item[:owner] = (ownr = pac.authHolder) ? ownr :
|
397
|
-
item[:style] =
|
398
|
-
html = Nokogiri::HTML.fragment(pac.content.force_encoding(
|
399
|
+
item[:data_origin] = "swissmedic_info"
|
400
|
+
item[:name] = (name = pac.title) ? name : ""
|
401
|
+
item[:owner] = (ownr = pac.authHolder) ? ownr : ""
|
402
|
+
item[:style] = Nokogiri::HTML.fragment(pac.style).to_html(encoding: "UTF-8")
|
403
|
+
html = Nokogiri::HTML.fragment(pac.content.force_encoding("UTF-8"))
|
399
404
|
item[:paragraph] = html
|
400
|
-
numbers =
|
405
|
+
numbers = /(\d{5})[,\s]*(\d{5})?|(\d{5})[,\s]*(\d{5})?[,\s]*(\d{5})?/.match(html)
|
401
406
|
if numbers
|
402
|
-
|
403
|
-
|
404
|
-
|
405
|
-
|
407
|
+
[$1, $2, $3].compact.each do |n| # plural
|
408
|
+
item[:monid] = n
|
409
|
+
data[lang] << item
|
410
|
+
end
|
406
411
|
end
|
407
412
|
end
|
408
413
|
data
|
@@ -414,27 +419,28 @@ module Oddb2xml
|
|
414
419
|
Oddb2xml.log("EphaExtractor #{str.size} bytes")
|
415
420
|
@io = StringIO.new(str)
|
416
421
|
end
|
422
|
+
|
417
423
|
def to_arry
|
418
424
|
data = []
|
419
425
|
ixno = 0
|
420
426
|
inhalt = @io.read
|
421
427
|
inhalt.split("\n").each do |line|
|
422
428
|
ixno += 1
|
423
|
-
next if /ATC1.*Name1.*ATC2.*Name2/.match(line)
|
424
|
-
#line = '"'+line unless /^"/.match(line)
|
429
|
+
next if /ATC1.*Name1.*ATC2.*Name2/.match?(line)
|
430
|
+
# line = '"'+line unless /^"/.match(line)
|
425
431
|
begin
|
426
|
-
row = CSV.parse_line(line.gsub('""','"'))
|
432
|
+
row = CSV.parse_line(line.gsub('""', '"'))
|
427
433
|
action = {}
|
428
434
|
next unless row.size > 8
|
429
|
-
action[:data_origin] =
|
430
|
-
action[:ixno]
|
431
|
-
action[:title]
|
432
|
-
action[:atc1]
|
433
|
-
action[:atc2]
|
435
|
+
action[:data_origin] = "epha"
|
436
|
+
action[:ixno] = ixno
|
437
|
+
action[:title] = row[4]
|
438
|
+
action[:atc1] = row[0]
|
439
|
+
action[:atc2] = row[2]
|
434
440
|
action[:mechanism] = row[5]
|
435
|
-
action[:effect]
|
436
|
-
action[:measures]
|
437
|
-
action[:grad]
|
441
|
+
action[:effect] = row[6]
|
442
|
+
action[:measures] = row[7]
|
443
|
+
action[:grad] = row[8]
|
438
444
|
data << action
|
439
445
|
rescue CSV::MalformedCSVError
|
440
446
|
puts "CSV::MalformedCSVError in line #{ixno}: #{line}"
|
@@ -443,139 +449,146 @@ module Oddb2xml
|
|
443
449
|
data
|
444
450
|
end
|
445
451
|
end
|
452
|
+
|
446
453
|
class MedregbmExtractor < Extractor
|
447
454
|
def initialize(str, type)
|
448
|
-
@io
|
455
|
+
@io = StringIO.new(str)
|
449
456
|
@type = type
|
450
457
|
end
|
458
|
+
|
451
459
|
def to_arry
|
452
460
|
data = []
|
453
461
|
case @type
|
454
462
|
when :company
|
455
|
-
while line = @io.gets
|
463
|
+
while (line = @io.gets)
|
456
464
|
row = line.chomp.split("\t")
|
457
|
-
next if row[0]
|
465
|
+
next if /^GLN/.match?(row[0])
|
458
466
|
data << {
|
459
|
-
:
|
460
|
-
:
|
461
|
-
:
|
462
|
-
:
|
463
|
-
:
|
464
|
-
:
|
465
|
-
:
|
466
|
-
:
|
467
|
-
:
|
468
|
-
:
|
469
|
-
:
|
470
|
-
:
|
467
|
+
data_origin: "medreg",
|
468
|
+
gln: row[0].to_s.gsub(/[^0-9]/, ""), #=> GLN Betrieb
|
469
|
+
name_1: row[1].to_s, #=> Betriebsname 1
|
470
|
+
name_2: row[2].to_s, #=> Betriebsname 2
|
471
|
+
address: row[3].to_s, #=> Strasse
|
472
|
+
number: row[4].to_s, #=> Nummer
|
473
|
+
post: row[5].to_s, #=> PLZ
|
474
|
+
place: row[6].to_s, #=> Ort
|
475
|
+
region: row[7].to_s, #=> Bewilligungskanton
|
476
|
+
country: row[8].to_s, #=> Land
|
477
|
+
type: row[9].to_s, #=> Betriebstyp
|
478
|
+
authorization: row[10].to_s #=> BTM Berechtigung
|
471
479
|
}
|
472
480
|
end
|
473
481
|
when :person
|
474
|
-
while line = @io.gets
|
482
|
+
while (line = @io.gets)
|
475
483
|
row = line.chomp.split("\t")
|
476
|
-
next if row[0]
|
484
|
+
next if /^GLN/.match?(row[0])
|
477
485
|
data << {
|
478
|
-
:
|
479
|
-
:
|
480
|
-
:
|
481
|
-
:
|
482
|
-
:
|
483
|
-
:
|
484
|
-
:
|
485
|
-
:
|
486
|
-
:
|
487
|
-
:
|
488
|
-
:
|
486
|
+
data_origin: "medreg",
|
487
|
+
gln: row[0].to_s.gsub(/[^0-9]/, ""), #=> GLN Person
|
488
|
+
last_name: row[1].to_s, #=> Name
|
489
|
+
first_name: row[2].to_s, #=> Vorname
|
490
|
+
post: row[3].to_s, #=> PLZ
|
491
|
+
place: row[4].to_s, #=> Ort
|
492
|
+
region: row[5].to_s, #=> Bewilligungskanton
|
493
|
+
country: row[6].to_s, #=> Land
|
494
|
+
license: row[7].to_s, #=> Bewilligung Selbstdispensation
|
495
|
+
certificate: row[8].to_s, #=> Diplom
|
496
|
+
authorization: row[9].to_s #=> BTM Berechtigung
|
489
497
|
}
|
490
498
|
end
|
491
499
|
end
|
492
500
|
data
|
493
501
|
end
|
494
502
|
end
|
503
|
+
|
495
504
|
class ZurroseExtractor < Extractor
|
496
505
|
# see http://dev.ywesee.com/Bbmb/TransferDat
|
497
506
|
def initialize(dat, extended = false, artikelstamm = false)
|
498
507
|
@@extended = extended
|
499
508
|
@artikelstamm = artikelstamm
|
500
|
-
FileUtils.makedirs(
|
501
|
-
@@error_file ||= File.open(File.join(
|
509
|
+
FileUtils.makedirs(WORK_DIR)
|
510
|
+
@@error_file ||= File.open(File.join(WORK_DIR, "duplicate_ean13_from_zur_rose.txt"), "wb+:ISO-8859-14")
|
502
511
|
@@items_without_ean13s ||= 0
|
503
512
|
@@duplicated_ean13s ||= 0
|
504
513
|
@@zur_rose_items ||= 0
|
505
514
|
if dat
|
506
|
-
if File.
|
507
|
-
|
515
|
+
@io = if File.exist?(dat)
|
516
|
+
File.open(dat, "rb:ISO-8859-14")
|
508
517
|
else
|
509
|
-
|
518
|
+
StringIO.new(dat)
|
510
519
|
end
|
511
520
|
@io
|
512
|
-
else
|
513
|
-
nil
|
514
521
|
end
|
515
522
|
end
|
523
|
+
|
516
524
|
def to_hash
|
517
525
|
data = {}
|
518
|
-
|
519
|
-
|
520
|
-
|
521
|
-
|
522
|
-
|
523
|
-
|
524
|
-
|
525
|
-
|
526
|
-
|
527
|
-
|
528
|
-
|
529
|
-
|
530
|
-
|
531
|
-
|
532
|
-
|
533
|
-
|
534
|
-
|
535
|
-
|
536
|
-
|
537
|
-
|
538
|
-
|
539
|
-
|
540
|
-
|
541
|
-
|
526
|
+
if @io
|
527
|
+
while (line = @io.gets)
|
528
|
+
ean13 = "-1"
|
529
|
+
line = Oddb2xml.patch_some_utf8(line).chomp
|
530
|
+
# next unless /(7680\d{9})(\d{1})$/.match(line) # Skip non pharma
|
531
|
+
next if /(ad us\.* vet)|(\(vet\))/i.match?(line)
|
532
|
+
if @@extended
|
533
|
+
next unless (match_data = line.match(/(\d{13})(\d{1})$/))
|
534
|
+
else
|
535
|
+
next unless (match_data = line.match(/(7680\d{9})(\d{1})$/))
|
536
|
+
end
|
537
|
+
pharma_code = line[3..9]
|
538
|
+
if match_data[1].to_s == "0000000000000"
|
539
|
+
@@items_without_ean13s += 1
|
540
|
+
next if @artikelstamm && pharma_code.to_i == 0
|
541
|
+
ean13 = Oddb2xml::FAKE_GTIN_START + pharma_code.to_s unless @artikelstamm
|
542
|
+
else
|
543
|
+
ean13 = match_data[1]
|
544
|
+
end
|
545
|
+
if data[ean13]
|
546
|
+
@@error_file.puts "Duplicate ean13 #{ean13} in line \nact: #{line.chomp}\norg: #{data[ean13][:line]}"
|
547
|
+
@@items_without_ean13s -= 1
|
548
|
+
@@duplicated_ean13s += 1
|
549
|
+
next
|
550
|
+
end
|
542
551
|
|
543
|
-
|
544
|
-
|
545
|
-
|
546
|
-
|
547
|
-
|
548
|
-
|
549
|
-
|
550
|
-
|
551
|
-
|
552
|
-
|
553
|
-
|
554
|
-
|
555
|
-
|
556
|
-
|
557
|
-
|
558
|
-
|
559
|
-
|
560
|
-
|
561
|
-
|
562
|
-
|
563
|
-
|
564
|
-
|
565
|
-
end
|
566
|
-
if defined?(@@extended)
|
552
|
+
pexf = sprintf("%.2f", line[60, 6].gsub(/(\d{2})$/, '.\1').to_f)
|
553
|
+
ppub = sprintf("%.2f", line[66, 6].gsub(/(\d{2})$/, '.\1').to_f)
|
554
|
+
next if @artikelstamm && /^113/.match(line) && ppub.eql?("0.0") && pexf.eql?("0.0")
|
555
|
+
next unless ean13
|
556
|
+
key = ean13
|
557
|
+
key = (Oddb2xml::FAKE_GTIN_START + pharma_code.to_s) if ean13.to_i <= 0 # dummy ean13
|
558
|
+
data[key] = {
|
559
|
+
data_origin: "zur_rose",
|
560
|
+
line: line.chomp,
|
561
|
+
ean13: ean13,
|
562
|
+
clag: line[73],
|
563
|
+
vat: line[96],
|
564
|
+
description: line[10..59].sub(/\s+$/, ""),
|
565
|
+
quantity: "",
|
566
|
+
pharmacode: pharma_code,
|
567
|
+
price: pexf,
|
568
|
+
pub_price: ppub,
|
569
|
+
type: :nonpharma,
|
570
|
+
cmut: line[2]
|
571
|
+
}
|
572
|
+
@@zur_rose_items += 1
|
573
|
+
end
|
574
|
+
end
|
575
|
+
if defined?(@@extended) && @@extended
|
567
576
|
@@error_file.puts get_error_msg
|
568
577
|
end
|
569
578
|
@@error_file.close
|
570
579
|
@@error_file = nil
|
571
580
|
data
|
572
581
|
end
|
573
|
-
|
574
|
-
|
575
|
-
|
576
|
-
|
582
|
+
if defined?(@@extended) && @@extended
|
583
|
+
at_exit do
|
584
|
+
puts get_error_msg
|
585
|
+
end
|
586
|
+
end
|
587
|
+
|
588
|
+
private
|
589
|
+
|
577
590
|
def get_error_msg
|
578
|
-
if defined?(@@extended)
|
591
|
+
if defined?(@@extended) && @@extended
|
579
592
|
msg = "Added #{@@items_without_ean13s} via pharmacodes of #{@@zur_rose_items} items when extracting the transfer.dat from \"Zur Rose\""
|
580
593
|
msg += "\n found #{@@duplicated_ean13s} lines with duplicated ean13" if @@duplicated_ean13s > 0
|
581
594
|
return msg
|