oddb2xml 2.7.1 → 2.7.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +1 -1
- data/.standard.yml +2 -0
- data/Gemfile +3 -3
- data/History.txt +8 -0
- data/README.md +1 -1
- data/Rakefile +24 -23
- data/bin/check_artikelstamm +11 -11
- data/bin/compare_v5 +23 -23
- data/bin/oddb2xml +14 -13
- data/lib/oddb2xml.rb +1 -1
- data/lib/oddb2xml/builder.rb +1070 -1038
- data/lib/oddb2xml/calc.rb +232 -233
- data/lib/oddb2xml/chapter_70_hack.rb +38 -32
- data/lib/oddb2xml/cli.rb +252 -236
- data/lib/oddb2xml/compare.rb +70 -59
- data/lib/oddb2xml/compositions_syntax.rb +448 -430
- data/lib/oddb2xml/compressor.rb +20 -20
- data/lib/oddb2xml/downloader.rb +153 -127
- data/lib/oddb2xml/extractor.rb +302 -289
- data/lib/oddb2xml/options.rb +34 -35
- data/lib/oddb2xml/parslet_compositions.rb +263 -269
- data/lib/oddb2xml/semantic_check.rb +39 -33
- data/lib/oddb2xml/util.rb +163 -163
- data/lib/oddb2xml/version.rb +1 -1
- data/lib/oddb2xml/xml_definitions.rb +32 -33
- data/oddb2xml.gemspec +31 -32
- data/spec/artikelstamm_spec.rb +111 -110
- data/spec/builder_spec.rb +489 -505
- data/spec/calc_spec.rb +552 -593
- data/spec/check_artikelstamm_spec.rb +26 -26
- data/spec/cli_spec.rb +173 -174
- data/spec/compare_spec.rb +9 -11
- data/spec/composition_syntax_spec.rb +390 -409
- data/spec/compressor_spec.rb +48 -48
- data/spec/data/transfer.dat +1 -0
- data/spec/data_helper.rb +47 -49
- data/spec/downloader_spec.rb +247 -260
- data/spec/extractor_spec.rb +171 -159
- data/spec/galenic_spec.rb +233 -256
- data/spec/options_spec.rb +116 -119
- data/spec/parslet_spec.rb +833 -861
- data/spec/spec_helper.rb +154 -153
- data/test_options.rb +39 -42
- data/tools/win_fetch_cacerts.rb +2 -3
- metadata +19 -3
data/lib/oddb2xml/extractor.rb
CHANGED
@@ -1,11 +1,10 @@
|
|
1
|
-
|
2
|
-
require
|
3
|
-
require
|
4
|
-
require
|
5
|
-
require
|
6
|
-
require
|
7
|
-
require
|
8
|
-
require 'oddb2xml/xml_definitions'
|
1
|
+
require "nokogiri"
|
2
|
+
require "spreadsheet"
|
3
|
+
require "stringio"
|
4
|
+
require "rubyXL"
|
5
|
+
require "rubyXL/convenience_methods/workbook"
|
6
|
+
require "csv"
|
7
|
+
require "oddb2xml/xml_definitions"
|
9
8
|
|
10
9
|
module Oddb2xml
|
11
10
|
module TxtExtractorMethods
|
@@ -13,23 +12,26 @@ module Oddb2xml
|
|
13
12
|
Oddb2xml.log("TxtExtractorMethods #{str} #{str.to_s.size} bytes")
|
14
13
|
@io = StringIO.new(str)
|
15
14
|
end
|
15
|
+
|
16
16
|
def to_hash
|
17
17
|
data = {}
|
18
|
-
while line = @io.gets
|
19
|
-
next unless
|
20
|
-
ean13 = line.chomp.
|
18
|
+
while (line = @io.gets)
|
19
|
+
next unless /\d{13}/.match?(line)
|
20
|
+
ean13 = line.chomp.delete("\"")
|
21
21
|
data[ean13] = true
|
22
22
|
end
|
23
23
|
data
|
24
24
|
end
|
25
25
|
end
|
26
|
+
|
26
27
|
class Extractor
|
27
28
|
attr_accessor :xml
|
28
29
|
def initialize(xml)
|
29
|
-
Oddb2xml.log("Extractor #{xml
|
30
|
+
Oddb2xml.log("Extractor #{xml} xml #{xml.size} bytes")
|
30
31
|
@xml = xml
|
31
32
|
end
|
32
33
|
end
|
34
|
+
|
33
35
|
class LppvExtractor < Extractor
|
34
36
|
include TxtExtractorMethods
|
35
37
|
end
|
@@ -37,156 +39,152 @@ module Oddb2xml
|
|
37
39
|
class BagXmlExtractor < Extractor
|
38
40
|
def to_hash
|
39
41
|
data = {}
|
40
|
-
result = PreparationsEntry.parse(@xml.sub(
|
42
|
+
result = PreparationsEntry.parse(@xml.sub(STRIP_FOR_SAX_MACHINE, ""), lazy: true)
|
41
43
|
result.Preparations.Preparation.each do |seq|
|
42
|
-
if seq.SwissmedicNo5.eql?(
|
44
|
+
if seq.SwissmedicNo5.eql?("0")
|
43
45
|
puts "BagXmlExtractor Skipping SwissmedicNo5 0 for #{seq.NameDe} #{seq.DescriptionDe} #{seq.CommentDe}"
|
44
46
|
next
|
45
47
|
end
|
46
48
|
item = {}
|
47
|
-
item[:data_origin]
|
48
|
-
item[:refdata]
|
49
|
-
item[:product_key]
|
50
|
-
item[:desc_de]
|
51
|
-
item[:desc_fr]
|
52
|
-
item[:name_de]
|
53
|
-
item[:name_fr]
|
54
|
-
item[:swissmedic_number5] = (num5 = seq.SwissmedicNo5) ?
|
55
|
-
item[:org_gen_code] = (orgc = seq.OrgGenCode)
|
56
|
-
item[:deductible]
|
57
|
-
item[:atc_code]
|
58
|
-
item[:comment_de]
|
59
|
-
item[:comment_fr]
|
60
|
-
item[:it_code]
|
49
|
+
item[:data_origin] = "bag_xml"
|
50
|
+
item[:refdata] = true
|
51
|
+
item[:product_key] = seq.ProductCommercial
|
52
|
+
item[:desc_de] = (desc = seq.DescriptionDe) ? desc : ""
|
53
|
+
item[:desc_fr] = (desc = seq.DescriptionFr) ? desc : ""
|
54
|
+
item[:name_de] = (name = seq.NameDe) ? name : ""
|
55
|
+
item[:name_fr] = (name = seq.NameFr) ? name : ""
|
56
|
+
item[:swissmedic_number5] = (num5 = seq.SwissmedicNo5) ? num5.rjust(5, "0") : ""
|
57
|
+
item[:org_gen_code] = (orgc = seq.OrgGenCode) ? orgc : ""
|
58
|
+
item[:deductible] = (ddbl = seq.FlagSB20) ? ddbl : ""
|
59
|
+
item[:atc_code] = (atcc = seq.AtcCode) ? atcc : ""
|
60
|
+
item[:comment_de] = (info = seq.CommentDe) ? info : ""
|
61
|
+
item[:comment_fr] = (info = seq.CommentFr) ? info : ""
|
62
|
+
item[:it_code] = ""
|
61
63
|
seq.ItCodes.ItCode.each do |itc|
|
62
64
|
if item[:it_code].to_s.empty?
|
63
65
|
it_code = itc.Code.to_s
|
64
|
-
item[:it_code] =
|
66
|
+
item[:it_code] = /(\d+)\.(\d+)\.(\d+)./.match?(it_code) ? it_code : ""
|
65
67
|
end
|
66
68
|
end
|
67
69
|
item[:substances] = []
|
68
70
|
seq.Substances.Substance.each_with_index do |sub, i|
|
69
71
|
item[:substances] << {
|
70
|
-
:
|
71
|
-
:
|
72
|
-
:
|
73
|
-
:
|
72
|
+
index: i.to_s,
|
73
|
+
name: (name = sub.DescriptionLa) ? name : "",
|
74
|
+
quantity: (qtty = sub.Quantity) ? qtty : "",
|
75
|
+
unit: (unit = sub.QuantityUnit) ? unit : ""
|
74
76
|
}
|
75
77
|
end
|
76
78
|
item[:pharmacodes] = []
|
77
|
-
item[:packages]
|
79
|
+
item[:packages] = {} # pharmacode => package
|
78
80
|
seq.Packs.Pack.each do |pac|
|
79
81
|
if pac.SwissmedicNo8 && pac.SwissmedicNo8.length < 8
|
80
82
|
puts "BagXmlExtractor: Adding leading zeros for SwissmedicNo8 #{pac.SwissmedicNo8} BagDossierNo #{pac.BagDossierNo} PackId #{pac.PackId} #{item[:name_de]}" if $VERBOSE
|
81
|
-
pac.SwissmedicNo8
|
83
|
+
pac.SwissmedicNo8 = pac.SwissmedicNo8.rjust(8, "0")
|
82
84
|
end
|
83
85
|
unless pac.GTIN
|
84
|
-
|
86
|
+
if pac.SwissmedicNo8
|
87
|
+
ean12 = "7680" + pac.SwissmedicNo8
|
88
|
+
pac.GTIN = (ean12 + Oddb2xml.calc_checksum(ean12)) unless @artikelstamm
|
89
|
+
# puts "BagXmlExtractor: Missing GTIN in SwissmedicNo8 #{pac.SwissmedicNo8} BagDossierNo #{pac.BagDossierNo} PackId #{pac.PackId} #{item[:name_de]}."
|
90
|
+
else
|
85
91
|
puts "BagXmlExtractor: Missing GTIN and SwissmedicNo8 in SwissmedicNo8 #{pac.SwissmedicNo8} BagDossierNo #{pac.BagDossierNo} PackId #{pac.PackId} #{item[:name_de]}"
|
86
92
|
next
|
87
|
-
else
|
88
|
-
ean12 = '7680' + pac.SwissmedicNo8
|
89
|
-
pac.GTIN = (ean12 + Oddb2xml.calc_checksum(ean12)) unless @artikelstamm
|
90
|
-
# puts "BagXmlExtractor: Missing GTIN in SwissmedicNo8 #{pac.SwissmedicNo8} BagDossierNo #{pac.BagDossierNo} PackId #{pac.PackId} #{item[:name_de]}."
|
91
93
|
end
|
92
94
|
end
|
93
95
|
ean13 = pac.GTIN.to_s
|
94
96
|
Oddb2xml.setEan13forNo8(pac.SwissmedicNo8, ean13) if pac.SwissmedicNo8
|
95
97
|
# packages
|
96
|
-
exf = {:
|
97
|
-
if pac
|
98
|
-
exf[:price]
|
99
|
-
exf[:valid_date] =
|
100
|
-
exf[:price_code] =
|
98
|
+
exf = {price: "", valid_date: "", price_code: ""}
|
99
|
+
if pac&.Prices&.ExFactoryPrice
|
100
|
+
exf[:price] = pac.Prices.ExFactoryPrice.Price if pac.Prices.ExFactoryPrice.Price
|
101
|
+
exf[:valid_date] = pac.Prices.ExFactoryPrice.ValidFromDate if pac.Prices.ExFactoryPrice.ValidFromDate
|
102
|
+
exf[:price_code] = pac.Prices.ExFactoryPrice.PriceTypeCode if pac.Prices.ExFactoryPrice.PriceTypeCode
|
101
103
|
end
|
102
|
-
pub = {:
|
103
|
-
if pac
|
104
|
-
pub[:price]
|
105
|
-
pub[:valid_date] =
|
106
|
-
pub[:price_code] =
|
104
|
+
pub = {price: "", valid_date: "", price_code: ""}
|
105
|
+
if pac&.Prices&.PublicPrice
|
106
|
+
pub[:price] = pac.Prices.PublicPrice.Price if pac.Prices.PublicPrice.Price
|
107
|
+
pub[:valid_date] = pac.Prices.PublicPrice.ValidFromDate if pac.Prices.PublicPrice.ValidFromDate
|
108
|
+
pub[:price_code] = pac.Prices.PublicPrice.PriceTypeCode if pac.Prices.PublicPrice.PriceTypeCode
|
107
109
|
end
|
108
110
|
item[:packages][ean13] = {
|
109
|
-
:
|
110
|
-
:
|
111
|
-
:
|
112
|
-
:
|
113
|
-
:
|
114
|
-
:
|
115
|
-
:
|
116
|
-
:
|
117
|
-
:
|
111
|
+
ean13: ean13,
|
112
|
+
name_de: (desc = seq.NameDe) ? desc : "",
|
113
|
+
name_fr: (desc = seq.NameFr) ? desc : "",
|
114
|
+
desc_de: (desc = pac.DescriptionDe) ? desc : "",
|
115
|
+
desc_fr: (desc = pac.DescriptionFr) ? desc : "",
|
116
|
+
sl_entry: true,
|
117
|
+
swissmedic_category: (cat = pac.SwissmedicCategory) ? cat : "",
|
118
|
+
swissmedic_number8: (num = pac.SwissmedicNo8) ? num : "",
|
119
|
+
prices: {exf_price: exf, pub_price: pub}
|
118
120
|
}
|
119
121
|
# related all limitations
|
120
122
|
item[:packages][ean13][:limitations] = []
|
121
|
-
limitations = Hash.new{|h,k| h[k] = [] }
|
122
|
-
if seq.Limitations
|
123
|
-
|
124
|
-
else
|
125
|
-
limitations[:seq] = nil
|
123
|
+
limitations = Hash.new { |h, k| h[k] = [] }
|
124
|
+
limitations[:seq] = if seq.Limitations
|
125
|
+
seq.Limitations.Limitation.collect { |x| x }
|
126
126
|
end
|
127
127
|
# in it-codes
|
128
|
-
if seq
|
128
|
+
if seq&.ItCodes && seq&.ItCodes&.ItCode
|
129
129
|
limitations[:itc] = []
|
130
|
-
seq.ItCodes.ItCode.each { |x|
|
130
|
+
seq.ItCodes.ItCode.each { |x| limitations[:itc] += x.Limitations.Limitation if x.Limitations.Limitation }
|
131
131
|
else
|
132
|
-
limitations[:itc] =nil
|
132
|
+
limitations[:itc] = nil
|
133
133
|
end
|
134
134
|
# in pac
|
135
|
-
if pac
|
136
|
-
|
137
|
-
else
|
138
|
-
limitations[:pac] = nil
|
135
|
+
limitations[:pac] = if pac && pac.Limitations
|
136
|
+
(lims = pac.Limitations.Limitation) ? lims.to_a : nil
|
139
137
|
end
|
140
138
|
limitations.each_pair do |lim_key, lims|
|
141
|
-
key =
|
142
|
-
id
|
139
|
+
key = ""
|
140
|
+
id = ""
|
143
141
|
case lim_key
|
144
142
|
when :seq, :itc
|
145
143
|
key = :swissmedic_number5
|
146
|
-
id
|
144
|
+
id = item[key].to_s
|
147
145
|
when :pac
|
148
146
|
key = :swissmedic_number8
|
149
|
-
id
|
147
|
+
id = item[:packages][ean13][key].to_s
|
150
148
|
end
|
151
|
-
if id.empty? && item[:packages][ean13][
|
149
|
+
if id.empty? && item[:packages][ean13][:swissmedic_number8]
|
152
150
|
key = :swissmedic_number8
|
153
|
-
id
|
151
|
+
id = item[:packages][ean13][key].to_s
|
154
152
|
end
|
155
|
-
lims
|
153
|
+
lims&.each do |lim|
|
156
154
|
limitation = {
|
157
|
-
:
|
158
|
-
:
|
159
|
-
:
|
160
|
-
:
|
161
|
-
:
|
162
|
-
:
|
163
|
-
:
|
164
|
-
:
|
165
|
-
:
|
166
|
-
:
|
155
|
+
it: item[:it_code],
|
156
|
+
key: key,
|
157
|
+
id: id,
|
158
|
+
code: (lic = lim.LimitationCode) ? lic : "",
|
159
|
+
type: (lit = lim.LimitationType) ? lit : "",
|
160
|
+
value: (liv = lim.LimitationValue) ? liv : "",
|
161
|
+
niv: (niv = lim.LimitationNiveau) ? niv : "",
|
162
|
+
desc_de: (dsc = lim.DescriptionDe) ? dsc : "",
|
163
|
+
desc_fr: (dsc = lim.DescriptionFr) ? dsc : "",
|
164
|
+
vdate: (dat = lim.ValidFromDate) ? dat : ""
|
167
165
|
}
|
168
166
|
deleted = false
|
169
|
-
if upto = ((thr = lim.ValidThruDate) ? thr : nil)
|
170
|
-
upto =~ /\d{2}\.\d{2}\.\d{2}/
|
167
|
+
if (upto = ((thr = lim.ValidThruDate) ? thr : nil)) &&
|
168
|
+
upto =~ (/\d{2}\.\d{2}\.\d{2}/)
|
171
169
|
begin
|
172
|
-
deleted = true if Date.strptime(upto,
|
170
|
+
deleted = true if Date.strptime(upto, "%d.%m.%y") >= Date.today
|
173
171
|
rescue ArgumentError
|
174
172
|
end
|
175
173
|
end
|
176
174
|
limitation[:del] = deleted
|
177
175
|
item[:packages][ean13][:limitations] << limitation
|
178
|
-
end
|
176
|
+
end
|
179
177
|
end
|
180
178
|
# limitation points
|
181
179
|
pts = pac.PointLimitations.PointLimitation.first # only first points
|
182
|
-
item[:packages][ean13][:limitation_points] = pts ? pts.Points :
|
180
|
+
item[:packages][ean13][:limitation_points] = pts ? pts.Points : ""
|
183
181
|
if pac.SwissmedicNo8
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
182
|
+
ean12 = "7680" + pac.SwissmedicNo8
|
183
|
+
correct_ean13 = ean12 + Oddb2xml.calc_checksum(ean12)
|
184
|
+
unless pac.GTIN.eql?(correct_ean13)
|
185
|
+
puts "pac.GTIN #{pac.GTIN} should be #{correct_ean13}"
|
186
|
+
item[:packages][ean13][:CORRECT_EAN13] = correct_ean13
|
187
|
+
end
|
190
188
|
end
|
191
189
|
data[ean13] = item
|
192
190
|
end
|
@@ -197,58 +195,60 @@ module Oddb2xml
|
|
197
195
|
|
198
196
|
class RefdataExtractor < Extractor
|
199
197
|
def initialize(xml, type)
|
200
|
-
@type = (type == :pharma ?
|
198
|
+
@type = (type == :pharma ? "PHARMA" : "NONPHARMA")
|
201
199
|
super(xml)
|
202
200
|
end
|
201
|
+
|
203
202
|
def to_hash
|
204
203
|
data = {}
|
205
|
-
result = SwissRegArticleEntry.parse(@xml.sub(
|
204
|
+
result = SwissRegArticleEntry.parse(@xml.sub(STRIP_FOR_SAX_MACHINE, ""), lazy: true)
|
206
205
|
items = result.ARTICLE.ITEM
|
207
206
|
items.each do |pac|
|
208
|
-
ean13 = (gtin = pac.GTIN.to_s) ? gtin:
|
207
|
+
ean13 = (gtin = pac.GTIN.to_s) ? gtin : "0"
|
209
208
|
if ean13.size < 13
|
210
209
|
puts "Refdata #{@type} use 13 chars not #{ean13.size} for #{ean13}" if $VERBOSE
|
211
|
-
ean13 = ean13.rjust(13,
|
210
|
+
ean13 = ean13.rjust(13, "0")
|
212
211
|
end
|
213
|
-
if ean13.size == 14 && ean13[0] ==
|
212
|
+
if ean13.size == 14 && ean13[0] == "0"
|
214
213
|
puts "Refdata #{@type} remove leading '0' for #{ean13}" if $VERBOSE
|
215
214
|
ean13 = ean13[1..-1]
|
216
215
|
end
|
217
216
|
# but in refdata_nonPharma we have a about 700 GTINs which are 14 characters and longer
|
218
217
|
item = {}
|
219
|
-
item[:ean13]
|
220
|
-
item[:no8]
|
221
|
-
item[:data_origin]
|
222
|
-
item[:refdata]
|
223
|
-
item[:_type]
|
224
|
-
item[:last_change]
|
225
|
-
item[:desc_de]
|
226
|
-
item[:desc_fr]
|
227
|
-
item[:atc_code]
|
228
|
-
item[:company_name] = (nam = pac.AUTH_HOLDER_NAME) ? nam:
|
229
|
-
item[:company_ean]
|
218
|
+
item[:ean13] = ean13
|
219
|
+
item[:no8] = pac.SWMC_AUTHNR
|
220
|
+
item[:data_origin] = "refdata"
|
221
|
+
item[:refdata] = true
|
222
|
+
item[:_type] = (typ = pac.ATYPE.downcase.to_sym) ? typ : ""
|
223
|
+
item[:last_change] = (date = Time.parse(pac.DT).to_s) ? date : "" # Date and time of last data change
|
224
|
+
item[:desc_de] = (dscr = pac.NAME_DE) ? dscr : ""
|
225
|
+
item[:desc_fr] = (dscr = pac.NAME_FR) ? dscr : ""
|
226
|
+
item[:atc_code] = (code = pac.ATC) ? code.to_s : ""
|
227
|
+
item[:company_name] = (nam = pac.AUTH_HOLDER_NAME) ? nam : ""
|
228
|
+
item[:company_ean] = (gln = pac.AUTH_HOLDER_GLN) ? gln : ""
|
230
229
|
data[item[:ean13]] = item
|
231
230
|
end
|
232
231
|
data
|
233
232
|
end
|
234
233
|
end
|
234
|
+
|
235
235
|
class SwissmedicExtractor < Extractor
|
236
236
|
def initialize(filename, type)
|
237
|
-
@filename = File.join(
|
238
|
-
@filename = File.join(SpecData, File.basename(filename)) if defined?(RSpec)
|
239
|
-
@type
|
237
|
+
@filename = File.join(DOWNLOADS, File.basename(filename))
|
238
|
+
@filename = File.join(SpecData, File.basename(filename)) if defined?(RSpec) && !File.exist?(@filename)
|
239
|
+
@type = type
|
240
240
|
Oddb2xml.log("SwissmedicExtractor #{@filename} #{File.size(@filename)} bytes")
|
241
|
-
return unless File.
|
241
|
+
return unless File.exist?(@filename)
|
242
242
|
@sheet = RubyXL::Parser.parse(File.expand_path(@filename)).worksheets[0]
|
243
243
|
end
|
244
|
+
|
244
245
|
def to_arry
|
245
246
|
data = []
|
246
247
|
return data unless @sheet
|
247
248
|
case @type
|
248
249
|
when :orphan
|
249
|
-
i = 1
|
250
250
|
col_zulassung = 6
|
251
|
-
raise "Could not find Zulassungsnummer in column #{col_zulassung} of #{@filename}" unless /Zulassungs.*nummer/.match(@sheet[3][col_zulassung].value)
|
251
|
+
raise "Could not find Zulassungsnummer in column #{col_zulassung} of #{@filename}" unless /Zulassungs.*nummer/.match?(@sheet[3][col_zulassung].value)
|
252
252
|
@sheet.each do |row|
|
253
253
|
next unless row[col_zulassung]
|
254
254
|
number = row[col_zulassung].value.to_i
|
@@ -262,24 +262,25 @@ module Oddb2xml
|
|
262
262
|
data.uniq
|
263
263
|
end
|
264
264
|
|
265
|
-
|
265
|
+
# Packungen.xlsx COLUMNS_FEBRUARY_2019
|
266
|
+
def to_hash
|
266
267
|
data = {}
|
267
268
|
return data unless @sheet
|
268
269
|
case @type
|
269
270
|
when :package
|
270
271
|
Oddb2xml.check_column_indices(@sheet)
|
271
|
-
ith
|
272
|
-
iksnr
|
273
|
-
seq_name
|
274
|
-
i_3
|
275
|
-
seqnr
|
276
|
-
cat
|
277
|
-
siz
|
278
|
-
atc
|
272
|
+
ith = COLUMNS_FEBRUARY_2019.keys.index(:index_therapeuticus)
|
273
|
+
iksnr = COLUMNS_FEBRUARY_2019.keys.index(:iksnr)
|
274
|
+
seq_name = COLUMNS_FEBRUARY_2019.keys.index(:name_base)
|
275
|
+
i_3 = COLUMNS_FEBRUARY_2019.keys.index(:ikscd)
|
276
|
+
seqnr = COLUMNS_FEBRUARY_2019.keys.index(:seqnr)
|
277
|
+
cat = COLUMNS_FEBRUARY_2019.keys.index(:ikscat)
|
278
|
+
siz = COLUMNS_FEBRUARY_2019.keys.index(:size)
|
279
|
+
atc = COLUMNS_FEBRUARY_2019.keys.index(:atc_class)
|
279
280
|
list_code = COLUMNS_FEBRUARY_2019.keys.index(:production_science)
|
280
|
-
eht
|
281
|
-
sub
|
282
|
-
comp
|
281
|
+
eht = COLUMNS_FEBRUARY_2019.keys.index(:unit)
|
282
|
+
sub = COLUMNS_FEBRUARY_2019.keys.index(:substances)
|
283
|
+
comp = COLUMNS_FEBRUARY_2019.keys.index(:composition)
|
283
284
|
|
284
285
|
# production_science Heilmittelcode, possible values are
|
285
286
|
# Allergene
|
@@ -299,42 +300,41 @@ module Oddb2xml
|
|
299
300
|
# Tierarzneimittel
|
300
301
|
# Transplantat: Gewebeprodukt
|
301
302
|
@sheet.each_with_index do |row, i|
|
302
|
-
|
303
|
-
next
|
304
|
-
next unless row
|
305
|
-
|
306
|
-
no8 = sprintf('%05d',row[iksnr].value.to_i) + sprintf('%03d',row[i_3].value.to_i)
|
303
|
+
next if i <= 1
|
304
|
+
next unless row && row[iksnr] && row[i_3]
|
305
|
+
next unless (row[iksnr].value.to_i > 0) && (row[i_3].value.to_i > 0)
|
306
|
+
no8 = sprintf("%05d", row[iksnr].value.to_i) + sprintf("%03d", row[i_3].value.to_i)
|
307
307
|
unless no8.empty?
|
308
308
|
next if no8.to_i == 0
|
309
309
|
ean_base12 = "7680#{no8}"
|
310
|
-
prodno =
|
311
|
-
ean13 = (ean_base12.ljust(12,
|
310
|
+
prodno = Oddb2xml.gen_prodno(row[iksnr].value.to_i, row[seqnr].value.to_i)
|
311
|
+
ean13 = (ean_base12.ljust(12, "0") + Oddb2xml.calc_checksum(ean_base12))
|
312
312
|
Oddb2xml.setEan13forProdno(prodno, ean13)
|
313
313
|
Oddb2xml.setEan13forNo8(no8, ean13)
|
314
314
|
data[no8] = {
|
315
|
-
:
|
316
|
-
:
|
317
|
-
:
|
318
|
-
:
|
319
|
-
:
|
320
|
-
:
|
321
|
-
:
|
322
|
-
:
|
323
|
-
:
|
324
|
-
:
|
325
|
-
:
|
326
|
-
:
|
327
|
-
:
|
328
|
-
:
|
329
|
-
:
|
330
|
-
:
|
331
|
-
:
|
332
|
-
:
|
333
|
-
:
|
334
|
-
:
|
335
|
-
:
|
336
|
-
:
|
337
|
-
:
|
315
|
+
iksnr: row[iksnr].value.to_i,
|
316
|
+
no8: no8,
|
317
|
+
ean13: ean13,
|
318
|
+
prodno: prodno,
|
319
|
+
seqnr: row[seqnr].value,
|
320
|
+
ith_swissmedic: row[ith] ? row[ith].value.to_s : "",
|
321
|
+
swissmedic_category: row[cat].value.to_s,
|
322
|
+
atc_code: row[atc] ? Oddb2xml.add_epha_changes_for_ATC(row[iksnr].value.to_s, row[atc].value.to_s) : "",
|
323
|
+
list_code: row[list_code] ? row[list_code].value.to_s : "",
|
324
|
+
package_size: row[siz] ? row[siz].value.to_s : "",
|
325
|
+
einheit_swissmedic: row[eht] ? row[eht].value.to_s : "",
|
326
|
+
substance_swissmedic: row[sub] ? row[sub].value.to_s : "",
|
327
|
+
composition_swissmedic: row[comp] ? row[comp].value.to_s : "",
|
328
|
+
sequence_name: row[seq_name] ? row[seq_name].value.to_s : "",
|
329
|
+
is_tier: (row[list_code] == "Tierarzneimittel"),
|
330
|
+
gen_production: row[COLUMNS_FEBRUARY_2019.keys.index(:gen_production)].value.to_s,
|
331
|
+
insulin_category: row[COLUMNS_FEBRUARY_2019.keys.index(:insulin_category)].value.to_s,
|
332
|
+
drug_index: row[COLUMNS_FEBRUARY_2019.keys.index(:drug_index)].value.to_s,
|
333
|
+
data_origin: "swissmedic_package",
|
334
|
+
expiry_date: row[COLUMNS_FEBRUARY_2019.keys.index(:expiry_date)].value.to_s,
|
335
|
+
company_name: row[COLUMNS_FEBRUARY_2019.keys.index(:company)].value.to_s,
|
336
|
+
size: row[COLUMNS_FEBRUARY_2019.keys.index(:size)].value.to_s,
|
337
|
+
unit: row[COLUMNS_FEBRUARY_2019.keys.index(:unit)].value.to_s
|
338
338
|
}
|
339
339
|
end
|
340
340
|
end
|
@@ -342,21 +342,26 @@ module Oddb2xml
|
|
342
342
|
cleanup_file
|
343
343
|
data
|
344
344
|
end
|
345
|
+
|
345
346
|
private
|
347
|
+
|
346
348
|
def cleanup_file
|
347
|
-
|
348
|
-
|
349
|
+
unless defined?(RSpec)
|
350
|
+
begin
|
351
|
+
File.unlink(@filename) if File.exist?(@filename)
|
349
352
|
rescue Errno::EACCES # Permission Denied on Windows
|
350
|
-
|
353
|
+
end
|
354
|
+
end
|
351
355
|
end
|
352
|
-
|
353
356
|
end
|
357
|
+
|
354
358
|
class MigelExtractor < Extractor
|
355
359
|
def initialize(bin)
|
356
360
|
Oddb2xml.log("MigelExtractor #{io} #{File.size(io)} bytes")
|
357
|
-
book = Spreadsheet.open(io,
|
361
|
+
book = Spreadsheet.open(io, "rb")
|
358
362
|
@sheet = book.worksheet(0)
|
359
363
|
end
|
364
|
+
|
360
365
|
def to_hash
|
361
366
|
data = {}
|
362
367
|
@sheet.each_with_index do |row, i|
|
@@ -366,15 +371,15 @@ module Oddb2xml
|
|
366
371
|
ean13 = row[0]
|
367
372
|
ean13 = phar unless ean13.to_s.length == 13
|
368
373
|
data[ean] = {
|
369
|
-
:
|
370
|
-
:
|
371
|
-
:
|
372
|
-
:
|
373
|
-
:
|
374
|
-
:
|
375
|
-
:
|
376
|
-
:
|
377
|
-
:
|
374
|
+
refdata: true,
|
375
|
+
ean13: ean13,
|
376
|
+
pharmacode: phar,
|
377
|
+
desc_de: row[3],
|
378
|
+
desc_fr: row[4],
|
379
|
+
quantity: row[5], # quantity
|
380
|
+
company_name: row[6],
|
381
|
+
company_ean: row[7],
|
382
|
+
data_origin: "migel"
|
378
383
|
}
|
379
384
|
end
|
380
385
|
data
|
@@ -383,26 +388,26 @@ module Oddb2xml
|
|
383
388
|
|
384
389
|
class SwissmedicInfoExtractor < Extractor
|
385
390
|
def to_hash
|
386
|
-
data = Hash.new{|h,k| h[k] = [] }
|
391
|
+
data = Hash.new { |h, k| h[k] = [] }
|
387
392
|
return data unless @xml.size > 0
|
388
|
-
result = MedicalInformationsContent.parse(@xml.sub(
|
393
|
+
result = MedicalInformationsContent.parse(@xml.sub(STRIP_FOR_SAX_MACHINE, ""), lazy: true)
|
389
394
|
result.medicalInformation.each do |pac|
|
390
395
|
lang = pac.lang.to_s
|
391
|
-
next unless
|
396
|
+
next unless /de|fr/.match?(lang)
|
392
397
|
item = {}
|
393
398
|
item[:refdata] = true
|
394
|
-
item[:data_origin] =
|
395
|
-
item[:name]
|
396
|
-
item[:owner] = (ownr = pac.authHolder) ? ownr :
|
397
|
-
item[:style] =
|
398
|
-
html = Nokogiri::HTML.fragment(pac.content.force_encoding(
|
399
|
+
item[:data_origin] = "swissmedic_info"
|
400
|
+
item[:name] = (name = pac.title) ? name : ""
|
401
|
+
item[:owner] = (ownr = pac.authHolder) ? ownr : ""
|
402
|
+
item[:style] = Nokogiri::HTML.fragment(pac.style).to_html(encoding: "UTF-8")
|
403
|
+
html = Nokogiri::HTML.fragment(pac.content.force_encoding("UTF-8"))
|
399
404
|
item[:paragraph] = html
|
400
|
-
numbers =
|
405
|
+
numbers = /(\d{5})[,\s]*(\d{5})?|(\d{5})[,\s]*(\d{5})?[,\s]*(\d{5})?/.match(html)
|
401
406
|
if numbers
|
402
|
-
|
403
|
-
|
404
|
-
|
405
|
-
|
407
|
+
[$1, $2, $3].compact.each do |n| # plural
|
408
|
+
item[:monid] = n
|
409
|
+
data[lang] << item
|
410
|
+
end
|
406
411
|
end
|
407
412
|
end
|
408
413
|
data
|
@@ -414,27 +419,28 @@ module Oddb2xml
|
|
414
419
|
Oddb2xml.log("EphaExtractor #{str.size} bytes")
|
415
420
|
@io = StringIO.new(str)
|
416
421
|
end
|
422
|
+
|
417
423
|
def to_arry
|
418
424
|
data = []
|
419
425
|
ixno = 0
|
420
426
|
inhalt = @io.read
|
421
427
|
inhalt.split("\n").each do |line|
|
422
428
|
ixno += 1
|
423
|
-
next if /ATC1.*Name1.*ATC2.*Name2/.match(line)
|
424
|
-
#line = '"'+line unless /^"/.match(line)
|
429
|
+
next if /ATC1.*Name1.*ATC2.*Name2/.match?(line)
|
430
|
+
# line = '"'+line unless /^"/.match(line)
|
425
431
|
begin
|
426
|
-
row = CSV.parse_line(line.gsub('""','"'))
|
432
|
+
row = CSV.parse_line(line.gsub('""', '"'))
|
427
433
|
action = {}
|
428
434
|
next unless row.size > 8
|
429
|
-
action[:data_origin] =
|
430
|
-
action[:ixno]
|
431
|
-
action[:title]
|
432
|
-
action[:atc1]
|
433
|
-
action[:atc2]
|
435
|
+
action[:data_origin] = "epha"
|
436
|
+
action[:ixno] = ixno
|
437
|
+
action[:title] = row[4]
|
438
|
+
action[:atc1] = row[0]
|
439
|
+
action[:atc2] = row[2]
|
434
440
|
action[:mechanism] = row[5]
|
435
|
-
action[:effect]
|
436
|
-
action[:measures]
|
437
|
-
action[:grad]
|
441
|
+
action[:effect] = row[6]
|
442
|
+
action[:measures] = row[7]
|
443
|
+
action[:grad] = row[8]
|
438
444
|
data << action
|
439
445
|
rescue CSV::MalformedCSVError
|
440
446
|
puts "CSV::MalformedCSVError in line #{ixno}: #{line}"
|
@@ -443,139 +449,146 @@ module Oddb2xml
|
|
443
449
|
data
|
444
450
|
end
|
445
451
|
end
|
452
|
+
|
446
453
|
class MedregbmExtractor < Extractor
|
447
454
|
def initialize(str, type)
|
448
|
-
@io
|
455
|
+
@io = StringIO.new(str)
|
449
456
|
@type = type
|
450
457
|
end
|
458
|
+
|
451
459
|
def to_arry
|
452
460
|
data = []
|
453
461
|
case @type
|
454
462
|
when :company
|
455
|
-
while line = @io.gets
|
463
|
+
while (line = @io.gets)
|
456
464
|
row = line.chomp.split("\t")
|
457
|
-
next if row[0]
|
465
|
+
next if /^GLN/.match?(row[0])
|
458
466
|
data << {
|
459
|
-
:
|
460
|
-
:
|
461
|
-
:
|
462
|
-
:
|
463
|
-
:
|
464
|
-
:
|
465
|
-
:
|
466
|
-
:
|
467
|
-
:
|
468
|
-
:
|
469
|
-
:
|
470
|
-
:
|
467
|
+
data_origin: "medreg",
|
468
|
+
gln: row[0].to_s.gsub(/[^0-9]/, ""), #=> GLN Betrieb
|
469
|
+
name_1: row[1].to_s, #=> Betriebsname 1
|
470
|
+
name_2: row[2].to_s, #=> Betriebsname 2
|
471
|
+
address: row[3].to_s, #=> Strasse
|
472
|
+
number: row[4].to_s, #=> Nummer
|
473
|
+
post: row[5].to_s, #=> PLZ
|
474
|
+
place: row[6].to_s, #=> Ort
|
475
|
+
region: row[7].to_s, #=> Bewilligungskanton
|
476
|
+
country: row[8].to_s, #=> Land
|
477
|
+
type: row[9].to_s, #=> Betriebstyp
|
478
|
+
authorization: row[10].to_s #=> BTM Berechtigung
|
471
479
|
}
|
472
480
|
end
|
473
481
|
when :person
|
474
|
-
while line = @io.gets
|
482
|
+
while (line = @io.gets)
|
475
483
|
row = line.chomp.split("\t")
|
476
|
-
next if row[0]
|
484
|
+
next if /^GLN/.match?(row[0])
|
477
485
|
data << {
|
478
|
-
:
|
479
|
-
:
|
480
|
-
:
|
481
|
-
:
|
482
|
-
:
|
483
|
-
:
|
484
|
-
:
|
485
|
-
:
|
486
|
-
:
|
487
|
-
:
|
488
|
-
:
|
486
|
+
data_origin: "medreg",
|
487
|
+
gln: row[0].to_s.gsub(/[^0-9]/, ""), #=> GLN Person
|
488
|
+
last_name: row[1].to_s, #=> Name
|
489
|
+
first_name: row[2].to_s, #=> Vorname
|
490
|
+
post: row[3].to_s, #=> PLZ
|
491
|
+
place: row[4].to_s, #=> Ort
|
492
|
+
region: row[5].to_s, #=> Bewilligungskanton
|
493
|
+
country: row[6].to_s, #=> Land
|
494
|
+
license: row[7].to_s, #=> Bewilligung Selbstdispensation
|
495
|
+
certificate: row[8].to_s, #=> Diplom
|
496
|
+
authorization: row[9].to_s #=> BTM Berechtigung
|
489
497
|
}
|
490
498
|
end
|
491
499
|
end
|
492
500
|
data
|
493
501
|
end
|
494
502
|
end
|
503
|
+
|
495
504
|
class ZurroseExtractor < Extractor
|
496
505
|
# see http://dev.ywesee.com/Bbmb/TransferDat
|
497
506
|
def initialize(dat, extended = false, artikelstamm = false)
|
498
507
|
@@extended = extended
|
499
508
|
@artikelstamm = artikelstamm
|
500
|
-
FileUtils.makedirs(
|
501
|
-
@@error_file ||= File.open(File.join(
|
509
|
+
FileUtils.makedirs(WORK_DIR)
|
510
|
+
@@error_file ||= File.open(File.join(WORK_DIR, "duplicate_ean13_from_zur_rose.txt"), "wb+:ISO-8859-14")
|
502
511
|
@@items_without_ean13s ||= 0
|
503
512
|
@@duplicated_ean13s ||= 0
|
504
513
|
@@zur_rose_items ||= 0
|
505
514
|
if dat
|
506
|
-
if File.
|
507
|
-
|
515
|
+
@io = if File.exist?(dat)
|
516
|
+
File.open(dat, "rb:ISO-8859-14")
|
508
517
|
else
|
509
|
-
|
518
|
+
StringIO.new(dat)
|
510
519
|
end
|
511
520
|
@io
|
512
|
-
else
|
513
|
-
nil
|
514
521
|
end
|
515
522
|
end
|
523
|
+
|
516
524
|
def to_hash
|
517
525
|
data = {}
|
518
|
-
|
519
|
-
|
520
|
-
|
521
|
-
|
522
|
-
|
523
|
-
|
524
|
-
|
525
|
-
|
526
|
-
|
527
|
-
|
528
|
-
|
529
|
-
|
530
|
-
|
531
|
-
|
532
|
-
|
533
|
-
|
534
|
-
|
535
|
-
|
536
|
-
|
537
|
-
|
538
|
-
|
539
|
-
|
540
|
-
|
541
|
-
|
526
|
+
if @io
|
527
|
+
while (line = @io.gets)
|
528
|
+
ean13 = "-1"
|
529
|
+
line = Oddb2xml.patch_some_utf8(line).chomp
|
530
|
+
# next unless /(7680\d{9})(\d{1})$/.match(line) # Skip non pharma
|
531
|
+
next if /(ad us\.* vet)|(\(vet\))/i.match?(line)
|
532
|
+
if @@extended
|
533
|
+
next unless (match_data = line.match(/(\d{13})(\d{1})$/))
|
534
|
+
else
|
535
|
+
next unless (match_data = line.match(/(7680\d{9})(\d{1})$/))
|
536
|
+
end
|
537
|
+
pharma_code = line[3..9]
|
538
|
+
if match_data[1].to_s == "0000000000000"
|
539
|
+
@@items_without_ean13s += 1
|
540
|
+
next if @artikelstamm && pharma_code.to_i == 0
|
541
|
+
ean13 = Oddb2xml::FAKE_GTIN_START + pharma_code.to_s unless @artikelstamm
|
542
|
+
else
|
543
|
+
ean13 = match_data[1]
|
544
|
+
end
|
545
|
+
if data[ean13]
|
546
|
+
@@error_file.puts "Duplicate ean13 #{ean13} in line \nact: #{line.chomp}\norg: #{data[ean13][:line]}"
|
547
|
+
@@items_without_ean13s -= 1
|
548
|
+
@@duplicated_ean13s += 1
|
549
|
+
next
|
550
|
+
end
|
542
551
|
|
543
|
-
|
544
|
-
|
545
|
-
|
546
|
-
|
547
|
-
|
548
|
-
|
549
|
-
|
550
|
-
|
551
|
-
|
552
|
-
|
553
|
-
|
554
|
-
|
555
|
-
|
556
|
-
|
557
|
-
|
558
|
-
|
559
|
-
|
560
|
-
|
561
|
-
|
562
|
-
|
563
|
-
|
564
|
-
|
565
|
-
end
|
566
|
-
if defined?(@@extended)
|
552
|
+
pexf = sprintf("%.2f", line[60, 6].gsub(/(\d{2})$/, '.\1').to_f)
|
553
|
+
ppub = sprintf("%.2f", line[66, 6].gsub(/(\d{2})$/, '.\1').to_f)
|
554
|
+
next if @artikelstamm && /^113/.match(line) && ppub.eql?("0.0") && pexf.eql?("0.0")
|
555
|
+
next unless ean13
|
556
|
+
key = ean13
|
557
|
+
key = (Oddb2xml::FAKE_GTIN_START + pharma_code.to_s) if ean13.to_i <= 0 # dummy ean13
|
558
|
+
data[key] = {
|
559
|
+
data_origin: "zur_rose",
|
560
|
+
line: line.chomp,
|
561
|
+
ean13: ean13,
|
562
|
+
clag: line[73],
|
563
|
+
vat: line[96],
|
564
|
+
description: line[10..59].sub(/\s+$/, ""),
|
565
|
+
quantity: "",
|
566
|
+
pharmacode: pharma_code,
|
567
|
+
price: pexf,
|
568
|
+
pub_price: ppub,
|
569
|
+
type: :nonpharma,
|
570
|
+
cmut: line[2]
|
571
|
+
}
|
572
|
+
@@zur_rose_items += 1
|
573
|
+
end
|
574
|
+
end
|
575
|
+
if defined?(@@extended) && @@extended
|
567
576
|
@@error_file.puts get_error_msg
|
568
577
|
end
|
569
578
|
@@error_file.close
|
570
579
|
@@error_file = nil
|
571
580
|
data
|
572
581
|
end
|
573
|
-
|
574
|
-
|
575
|
-
|
576
|
-
|
582
|
+
if defined?(@@extended) && @@extended
|
583
|
+
at_exit do
|
584
|
+
puts get_error_msg
|
585
|
+
end
|
586
|
+
end
|
587
|
+
|
588
|
+
private
|
589
|
+
|
577
590
|
def get_error_msg
|
578
|
-
if defined?(@@extended)
|
591
|
+
if defined?(@@extended) && @@extended
|
579
592
|
msg = "Added #{@@items_without_ean13s} via pharmacodes of #{@@zur_rose_items} items when extracting the transfer.dat from \"Zur Rose\""
|
580
593
|
msg += "\n found #{@@duplicated_ean13s} lines with duplicated ean13" if @@duplicated_ean13s > 0
|
581
594
|
return msg
|