oddb2xml 2.7.1 → 2.7.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +1 -2
- data/.standard.yml +2 -0
- data/Gemfile +3 -3
- data/History.txt +24 -0
- data/README.md +3 -3
- data/Rakefile +24 -23
- data/bin/check_artikelstamm +11 -11
- data/bin/compare_v5 +23 -23
- data/bin/oddb2xml +14 -13
- data/lib/oddb2xml/builder.rb +1070 -1038
- data/lib/oddb2xml/calc.rb +232 -233
- data/lib/oddb2xml/chapter_70_hack.rb +38 -32
- data/lib/oddb2xml/cli.rb +252 -236
- data/lib/oddb2xml/compare.rb +70 -59
- data/lib/oddb2xml/compositions_syntax.rb +451 -430
- data/lib/oddb2xml/compressor.rb +20 -20
- data/lib/oddb2xml/downloader.rb +157 -129
- data/lib/oddb2xml/extractor.rb +295 -295
- data/lib/oddb2xml/options.rb +34 -35
- data/lib/oddb2xml/parslet_compositions.rb +265 -269
- data/lib/oddb2xml/semantic_check.rb +39 -33
- data/lib/oddb2xml/util.rb +163 -163
- data/lib/oddb2xml/version.rb +1 -1
- data/lib/oddb2xml/xml_definitions.rb +32 -33
- data/lib/oddb2xml.rb +1 -1
- data/oddb2xml.gemspec +34 -34
- data/shell.nix +17 -0
- data/spec/artikelstamm_spec.rb +111 -110
- data/spec/builder_spec.rb +490 -505
- data/spec/calc_spec.rb +552 -593
- data/spec/check_artikelstamm_spec.rb +26 -26
- data/spec/cli_spec.rb +173 -174
- data/spec/compare_spec.rb +9 -11
- data/spec/composition_syntax_spec.rb +390 -409
- data/spec/compressor_spec.rb +48 -48
- data/spec/data/transfer.dat +1 -0
- data/spec/data_helper.rb +47 -49
- data/spec/downloader_spec.rb +251 -260
- data/spec/extractor_spec.rb +171 -159
- data/spec/fixtures/vcr_cassettes/oddb2xml.json +1 -1
- data/spec/galenic_spec.rb +233 -256
- data/spec/options_spec.rb +116 -119
- data/spec/parslet_spec.rb +896 -863
- data/spec/spec_helper.rb +153 -153
- data/test_options.rb +39 -42
- data/tools/win_fetch_cacerts.rb +2 -3
- metadata +42 -12
data/lib/oddb2xml/extractor.rb
CHANGED
@@ -1,11 +1,10 @@
|
|
1
|
-
|
2
|
-
require
|
3
|
-
require
|
4
|
-
require
|
5
|
-
require
|
6
|
-
require
|
7
|
-
require
|
8
|
-
require 'oddb2xml/xml_definitions'
|
1
|
+
require "nokogiri"
|
2
|
+
require "spreadsheet"
|
3
|
+
require "stringio"
|
4
|
+
require "rubyXL"
|
5
|
+
require "rubyXL/convenience_methods/workbook"
|
6
|
+
require "csv"
|
7
|
+
require "oddb2xml/xml_definitions"
|
9
8
|
|
10
9
|
module Oddb2xml
|
11
10
|
module TxtExtractorMethods
|
@@ -13,23 +12,26 @@ module Oddb2xml
|
|
13
12
|
Oddb2xml.log("TxtExtractorMethods #{str} #{str.to_s.size} bytes")
|
14
13
|
@io = StringIO.new(str)
|
15
14
|
end
|
15
|
+
|
16
16
|
def to_hash
|
17
17
|
data = {}
|
18
|
-
while line = @io.gets
|
19
|
-
next unless
|
20
|
-
ean13 = line.chomp.
|
18
|
+
while (line = @io.gets)
|
19
|
+
next unless /\d{13}/.match?(line)
|
20
|
+
ean13 = line.chomp.delete("\"")
|
21
21
|
data[ean13] = true
|
22
22
|
end
|
23
23
|
data
|
24
24
|
end
|
25
25
|
end
|
26
|
+
|
26
27
|
class Extractor
|
27
28
|
attr_accessor :xml
|
28
29
|
def initialize(xml)
|
29
|
-
Oddb2xml.log("Extractor #{xml
|
30
|
+
Oddb2xml.log("Extractor #{xml} xml #{xml.size} bytes")
|
30
31
|
@xml = xml
|
31
32
|
end
|
32
33
|
end
|
34
|
+
|
33
35
|
class LppvExtractor < Extractor
|
34
36
|
include TxtExtractorMethods
|
35
37
|
end
|
@@ -37,156 +39,152 @@ module Oddb2xml
|
|
37
39
|
class BagXmlExtractor < Extractor
|
38
40
|
def to_hash
|
39
41
|
data = {}
|
40
|
-
result = PreparationsEntry.parse(@xml.sub(
|
42
|
+
result = PreparationsEntry.parse(@xml.sub(STRIP_FOR_SAX_MACHINE, ""), lazy: true)
|
41
43
|
result.Preparations.Preparation.each do |seq|
|
42
|
-
if seq.SwissmedicNo5.eql?(
|
44
|
+
if seq.SwissmedicNo5.eql?("0")
|
43
45
|
puts "BagXmlExtractor Skipping SwissmedicNo5 0 for #{seq.NameDe} #{seq.DescriptionDe} #{seq.CommentDe}"
|
44
46
|
next
|
45
47
|
end
|
46
48
|
item = {}
|
47
|
-
item[:data_origin]
|
48
|
-
item[:refdata]
|
49
|
-
item[:product_key]
|
50
|
-
item[:desc_de]
|
51
|
-
item[:desc_fr]
|
52
|
-
item[:name_de]
|
53
|
-
item[:name_fr]
|
54
|
-
item[:swissmedic_number5] = (num5 = seq.SwissmedicNo5) ?
|
55
|
-
item[:org_gen_code] = (orgc = seq.OrgGenCode)
|
56
|
-
item[:deductible]
|
57
|
-
item[:atc_code]
|
58
|
-
item[:comment_de]
|
59
|
-
item[:comment_fr]
|
60
|
-
item[:it_code]
|
49
|
+
item[:data_origin] = "bag_xml"
|
50
|
+
item[:refdata] = true
|
51
|
+
item[:product_key] = seq.ProductCommercial
|
52
|
+
item[:desc_de] = (desc = seq.DescriptionDe) ? desc : ""
|
53
|
+
item[:desc_fr] = (desc = seq.DescriptionFr) ? desc : ""
|
54
|
+
item[:name_de] = (name = seq.NameDe) ? name : ""
|
55
|
+
item[:name_fr] = (name = seq.NameFr) ? name : ""
|
56
|
+
item[:swissmedic_number5] = (num5 = seq.SwissmedicNo5) ? num5.rjust(5, "0") : ""
|
57
|
+
item[:org_gen_code] = (orgc = seq.OrgGenCode) ? orgc : ""
|
58
|
+
item[:deductible] = (ddbl = seq.FlagSB20) ? ddbl : ""
|
59
|
+
item[:atc_code] = (atcc = seq.AtcCode) ? atcc : ""
|
60
|
+
item[:comment_de] = (info = seq.CommentDe) ? info : ""
|
61
|
+
item[:comment_fr] = (info = seq.CommentFr) ? info : ""
|
62
|
+
item[:it_code] = ""
|
61
63
|
seq.ItCodes.ItCode.each do |itc|
|
62
64
|
if item[:it_code].to_s.empty?
|
63
65
|
it_code = itc.Code.to_s
|
64
|
-
item[:it_code] =
|
66
|
+
item[:it_code] = /(\d+)\.(\d+)\.(\d+)./.match?(it_code) ? it_code : ""
|
65
67
|
end
|
66
68
|
end
|
67
69
|
item[:substances] = []
|
68
70
|
seq.Substances.Substance.each_with_index do |sub, i|
|
69
71
|
item[:substances] << {
|
70
|
-
:
|
71
|
-
:
|
72
|
-
:
|
73
|
-
:
|
72
|
+
index: i.to_s,
|
73
|
+
name: (name = sub.DescriptionLa) ? name : "",
|
74
|
+
quantity: (qtty = sub.Quantity) ? qtty : "",
|
75
|
+
unit: (unit = sub.QuantityUnit) ? unit : ""
|
74
76
|
}
|
75
77
|
end
|
76
78
|
item[:pharmacodes] = []
|
77
|
-
item[:packages]
|
79
|
+
item[:packages] = {} # pharmacode => package
|
78
80
|
seq.Packs.Pack.each do |pac|
|
79
81
|
if pac.SwissmedicNo8 && pac.SwissmedicNo8.length < 8
|
80
82
|
puts "BagXmlExtractor: Adding leading zeros for SwissmedicNo8 #{pac.SwissmedicNo8} BagDossierNo #{pac.BagDossierNo} PackId #{pac.PackId} #{item[:name_de]}" if $VERBOSE
|
81
|
-
pac.SwissmedicNo8
|
83
|
+
pac.SwissmedicNo8 = pac.SwissmedicNo8.rjust(8, "0")
|
82
84
|
end
|
83
85
|
unless pac.GTIN
|
84
|
-
|
86
|
+
if pac.SwissmedicNo8
|
87
|
+
ean12 = "7680" + pac.SwissmedicNo8
|
88
|
+
pac.GTIN = (ean12 + Oddb2xml.calc_checksum(ean12)) unless @artikelstamm
|
89
|
+
# puts "BagXmlExtractor: Missing GTIN in SwissmedicNo8 #{pac.SwissmedicNo8} BagDossierNo #{pac.BagDossierNo} PackId #{pac.PackId} #{item[:name_de]}."
|
90
|
+
else
|
85
91
|
puts "BagXmlExtractor: Missing GTIN and SwissmedicNo8 in SwissmedicNo8 #{pac.SwissmedicNo8} BagDossierNo #{pac.BagDossierNo} PackId #{pac.PackId} #{item[:name_de]}"
|
86
92
|
next
|
87
|
-
else
|
88
|
-
ean12 = '7680' + pac.SwissmedicNo8
|
89
|
-
pac.GTIN = (ean12 + Oddb2xml.calc_checksum(ean12)) unless @artikelstamm
|
90
|
-
# puts "BagXmlExtractor: Missing GTIN in SwissmedicNo8 #{pac.SwissmedicNo8} BagDossierNo #{pac.BagDossierNo} PackId #{pac.PackId} #{item[:name_de]}."
|
91
93
|
end
|
92
94
|
end
|
93
95
|
ean13 = pac.GTIN.to_s
|
94
96
|
Oddb2xml.setEan13forNo8(pac.SwissmedicNo8, ean13) if pac.SwissmedicNo8
|
95
97
|
# packages
|
96
|
-
exf = {:
|
97
|
-
if pac
|
98
|
-
exf[:price]
|
99
|
-
exf[:valid_date] =
|
100
|
-
exf[:price_code] =
|
98
|
+
exf = {price: "", valid_date: "", price_code: ""}
|
99
|
+
if pac&.Prices&.ExFactoryPrice
|
100
|
+
exf[:price] = pac.Prices.ExFactoryPrice.Price if pac.Prices.ExFactoryPrice.Price
|
101
|
+
exf[:valid_date] = pac.Prices.ExFactoryPrice.ValidFromDate if pac.Prices.ExFactoryPrice.ValidFromDate
|
102
|
+
exf[:price_code] = pac.Prices.ExFactoryPrice.PriceTypeCode if pac.Prices.ExFactoryPrice.PriceTypeCode
|
101
103
|
end
|
102
|
-
pub = {:
|
103
|
-
if pac
|
104
|
-
pub[:price]
|
105
|
-
pub[:valid_date] =
|
106
|
-
pub[:price_code] =
|
104
|
+
pub = {price: "", valid_date: "", price_code: ""}
|
105
|
+
if pac&.Prices&.PublicPrice
|
106
|
+
pub[:price] = pac.Prices.PublicPrice.Price if pac.Prices.PublicPrice.Price
|
107
|
+
pub[:valid_date] = pac.Prices.PublicPrice.ValidFromDate if pac.Prices.PublicPrice.ValidFromDate
|
108
|
+
pub[:price_code] = pac.Prices.PublicPrice.PriceTypeCode if pac.Prices.PublicPrice.PriceTypeCode
|
107
109
|
end
|
108
110
|
item[:packages][ean13] = {
|
109
|
-
:
|
110
|
-
:
|
111
|
-
:
|
112
|
-
:
|
113
|
-
:
|
114
|
-
:
|
115
|
-
:
|
116
|
-
:
|
117
|
-
:
|
111
|
+
ean13: ean13,
|
112
|
+
name_de: (desc = seq.NameDe) ? desc : "",
|
113
|
+
name_fr: (desc = seq.NameFr) ? desc : "",
|
114
|
+
desc_de: (desc = pac.DescriptionDe) ? desc : "",
|
115
|
+
desc_fr: (desc = pac.DescriptionFr) ? desc : "",
|
116
|
+
sl_entry: true,
|
117
|
+
swissmedic_category: (cat = pac.SwissmedicCategory) ? cat : "",
|
118
|
+
swissmedic_number8: (num = pac.SwissmedicNo8) ? num : "",
|
119
|
+
prices: {exf_price: exf, pub_price: pub}
|
118
120
|
}
|
119
121
|
# related all limitations
|
120
122
|
item[:packages][ean13][:limitations] = []
|
121
|
-
limitations = Hash.new{|h,k| h[k] = [] }
|
122
|
-
if seq.Limitations
|
123
|
-
|
124
|
-
else
|
125
|
-
limitations[:seq] = nil
|
123
|
+
limitations = Hash.new { |h, k| h[k] = [] }
|
124
|
+
limitations[:seq] = if seq.Limitations
|
125
|
+
seq.Limitations.Limitation.collect { |x| x }
|
126
126
|
end
|
127
127
|
# in it-codes
|
128
|
-
if seq
|
128
|
+
if seq&.ItCodes && seq&.ItCodes&.ItCode
|
129
129
|
limitations[:itc] = []
|
130
|
-
seq.ItCodes.ItCode.each { |x|
|
130
|
+
seq.ItCodes.ItCode.each { |x| limitations[:itc] += x.Limitations.Limitation if x.Limitations.Limitation }
|
131
131
|
else
|
132
|
-
limitations[:itc] =nil
|
132
|
+
limitations[:itc] = nil
|
133
133
|
end
|
134
134
|
# in pac
|
135
|
-
if pac
|
136
|
-
|
137
|
-
else
|
138
|
-
limitations[:pac] = nil
|
135
|
+
limitations[:pac] = if pac && pac.Limitations
|
136
|
+
(lims = pac.Limitations.Limitation) ? lims.to_a : nil
|
139
137
|
end
|
140
138
|
limitations.each_pair do |lim_key, lims|
|
141
|
-
key =
|
142
|
-
id
|
139
|
+
key = ""
|
140
|
+
id = ""
|
143
141
|
case lim_key
|
144
142
|
when :seq, :itc
|
145
143
|
key = :swissmedic_number5
|
146
|
-
id
|
144
|
+
id = item[key].to_s
|
147
145
|
when :pac
|
148
146
|
key = :swissmedic_number8
|
149
|
-
id
|
147
|
+
id = item[:packages][ean13][key].to_s
|
150
148
|
end
|
151
|
-
if id.empty? && item[:packages][ean13][
|
149
|
+
if id.empty? && item[:packages][ean13][:swissmedic_number8]
|
152
150
|
key = :swissmedic_number8
|
153
|
-
id
|
151
|
+
id = item[:packages][ean13][key].to_s
|
154
152
|
end
|
155
|
-
lims
|
153
|
+
lims&.each do |lim|
|
156
154
|
limitation = {
|
157
|
-
:
|
158
|
-
:
|
159
|
-
:
|
160
|
-
:
|
161
|
-
:
|
162
|
-
:
|
163
|
-
:
|
164
|
-
:
|
165
|
-
:
|
166
|
-
:
|
155
|
+
it: item[:it_code],
|
156
|
+
key: key,
|
157
|
+
id: id,
|
158
|
+
code: (lic = lim.LimitationCode) ? lic : "",
|
159
|
+
type: (lit = lim.LimitationType) ? lit : "",
|
160
|
+
value: (liv = lim.LimitationValue) ? liv : "",
|
161
|
+
niv: (niv = lim.LimitationNiveau) ? niv : "",
|
162
|
+
desc_de: (dsc = lim.DescriptionDe) ? dsc : "",
|
163
|
+
desc_fr: (dsc = lim.DescriptionFr) ? dsc : "",
|
164
|
+
vdate: (dat = lim.ValidFromDate) ? dat : ""
|
167
165
|
}
|
168
166
|
deleted = false
|
169
|
-
if upto = ((thr = lim.ValidThruDate) ? thr : nil)
|
170
|
-
upto =~ /\d{2}\.\d{2}\.\d{2}/
|
167
|
+
if (upto = ((thr = lim.ValidThruDate) ? thr : nil)) &&
|
168
|
+
upto =~ (/\d{2}\.\d{2}\.\d{2}/)
|
171
169
|
begin
|
172
|
-
deleted = true if Date.strptime(upto,
|
170
|
+
deleted = true if Date.strptime(upto, "%d.%m.%y") >= Date.today
|
173
171
|
rescue ArgumentError
|
174
172
|
end
|
175
173
|
end
|
176
174
|
limitation[:del] = deleted
|
177
175
|
item[:packages][ean13][:limitations] << limitation
|
178
|
-
end
|
176
|
+
end
|
179
177
|
end
|
180
178
|
# limitation points
|
181
179
|
pts = pac.PointLimitations.PointLimitation.first # only first points
|
182
|
-
item[:packages][ean13][:limitation_points] = pts ? pts.Points :
|
180
|
+
item[:packages][ean13][:limitation_points] = pts ? pts.Points : ""
|
183
181
|
if pac.SwissmedicNo8
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
182
|
+
ean12 = "7680" + pac.SwissmedicNo8
|
183
|
+
correct_ean13 = ean12 + Oddb2xml.calc_checksum(ean12)
|
184
|
+
unless pac.GTIN.eql?(correct_ean13)
|
185
|
+
puts "pac.GTIN #{pac.GTIN} should be #{correct_ean13}"
|
186
|
+
item[:packages][ean13][:CORRECT_EAN13] = correct_ean13
|
187
|
+
end
|
190
188
|
end
|
191
189
|
data[ean13] = item
|
192
190
|
end
|
@@ -197,58 +195,60 @@ module Oddb2xml
|
|
197
195
|
|
198
196
|
class RefdataExtractor < Extractor
|
199
197
|
def initialize(xml, type)
|
200
|
-
@type = (type == :pharma ?
|
198
|
+
@type = (type == :pharma ? "PHARMA" : "NONPHARMA")
|
201
199
|
super(xml)
|
202
200
|
end
|
201
|
+
|
203
202
|
def to_hash
|
204
203
|
data = {}
|
205
|
-
result = SwissRegArticleEntry.parse(@xml.sub(
|
204
|
+
result = SwissRegArticleEntry.parse(@xml.sub(STRIP_FOR_SAX_MACHINE, ""), lazy: true)
|
206
205
|
items = result.ARTICLE.ITEM
|
207
206
|
items.each do |pac|
|
208
|
-
ean13 = (gtin = pac.GTIN.to_s) ? gtin:
|
207
|
+
ean13 = (gtin = pac.GTIN.to_s) ? gtin : "0"
|
209
208
|
if ean13.size < 13
|
210
209
|
puts "Refdata #{@type} use 13 chars not #{ean13.size} for #{ean13}" if $VERBOSE
|
211
|
-
ean13 = ean13.rjust(13,
|
210
|
+
ean13 = ean13.rjust(13, "0")
|
212
211
|
end
|
213
|
-
if ean13.size == 14 && ean13[0] ==
|
212
|
+
if ean13.size == 14 && ean13[0] == "0"
|
214
213
|
puts "Refdata #{@type} remove leading '0' for #{ean13}" if $VERBOSE
|
215
214
|
ean13 = ean13[1..-1]
|
216
215
|
end
|
217
216
|
# but in refdata_nonPharma we have a about 700 GTINs which are 14 characters and longer
|
218
217
|
item = {}
|
219
|
-
item[:ean13]
|
220
|
-
item[:no8]
|
221
|
-
item[:data_origin]
|
222
|
-
item[:refdata]
|
223
|
-
item[:_type]
|
224
|
-
item[:last_change]
|
225
|
-
item[:desc_de]
|
226
|
-
item[:desc_fr]
|
227
|
-
item[:atc_code]
|
228
|
-
item[:company_name] = (nam = pac.AUTH_HOLDER_NAME) ? nam:
|
229
|
-
item[:company_ean]
|
218
|
+
item[:ean13] = ean13
|
219
|
+
item[:no8] = pac.SWMC_AUTHNR
|
220
|
+
item[:data_origin] = "refdata"
|
221
|
+
item[:refdata] = true
|
222
|
+
item[:_type] = (typ = pac.ATYPE.downcase.to_sym) ? typ : ""
|
223
|
+
item[:last_change] = (date = Time.parse(pac.DT).to_s) ? date : "" # Date and time of last data change
|
224
|
+
item[:desc_de] = (dscr = pac.NAME_DE) ? dscr : ""
|
225
|
+
item[:desc_fr] = (dscr = pac.NAME_FR) ? dscr : ""
|
226
|
+
item[:atc_code] = (code = pac.ATC) ? code.to_s : ""
|
227
|
+
item[:company_name] = (nam = pac.AUTH_HOLDER_NAME) ? nam : ""
|
228
|
+
item[:company_ean] = (gln = pac.AUTH_HOLDER_GLN) ? gln : ""
|
230
229
|
data[item[:ean13]] = item
|
231
230
|
end
|
232
231
|
data
|
233
232
|
end
|
234
233
|
end
|
234
|
+
|
235
235
|
class SwissmedicExtractor < Extractor
|
236
236
|
def initialize(filename, type)
|
237
|
-
@filename = File.join(
|
238
|
-
@filename = File.join(SpecData, File.basename(filename)) if defined?(RSpec)
|
239
|
-
@type
|
237
|
+
@filename = File.join(DOWNLOADS, File.basename(filename))
|
238
|
+
@filename = File.join(SpecData, File.basename(filename)) if defined?(RSpec) && !File.exist?(@filename)
|
239
|
+
@type = type
|
240
240
|
Oddb2xml.log("SwissmedicExtractor #{@filename} #{File.size(@filename)} bytes")
|
241
|
-
return unless File.
|
241
|
+
return unless File.exist?(@filename)
|
242
242
|
@sheet = RubyXL::Parser.parse(File.expand_path(@filename)).worksheets[0]
|
243
243
|
end
|
244
|
+
|
244
245
|
def to_arry
|
245
246
|
data = []
|
246
247
|
return data unless @sheet
|
247
248
|
case @type
|
248
249
|
when :orphan
|
249
|
-
i = 1
|
250
250
|
col_zulassung = 6
|
251
|
-
raise "Could not find Zulassungsnummer in column #{col_zulassung} of #{@filename}" unless /Zulassungs.*nummer/.match(@sheet[3][col_zulassung].value)
|
251
|
+
raise "Could not find Zulassungsnummer in column #{col_zulassung} of #{@filename}" unless /Zulassungs.*nummer/.match?(@sheet[3][col_zulassung].value)
|
252
252
|
@sheet.each do |row|
|
253
253
|
next unless row[col_zulassung]
|
254
254
|
number = row[col_zulassung].value.to_i
|
@@ -257,29 +257,29 @@ module Oddb2xml
|
|
257
257
|
end
|
258
258
|
end
|
259
259
|
end
|
260
|
-
cleanup_file
|
261
260
|
# puts "found #{data.uniq.size} entities for type #{@type}"
|
262
261
|
data.uniq
|
263
262
|
end
|
264
263
|
|
265
|
-
|
264
|
+
# Packungen.xlsx COLUMNS_FEBRUARY_2019
|
265
|
+
def to_hash
|
266
266
|
data = {}
|
267
267
|
return data unless @sheet
|
268
268
|
case @type
|
269
269
|
when :package
|
270
270
|
Oddb2xml.check_column_indices(@sheet)
|
271
|
-
ith
|
272
|
-
iksnr
|
273
|
-
seq_name
|
274
|
-
i_3
|
275
|
-
seqnr
|
276
|
-
cat
|
277
|
-
siz
|
278
|
-
atc
|
271
|
+
ith = COLUMNS_FEBRUARY_2019.keys.index(:index_therapeuticus)
|
272
|
+
iksnr = COLUMNS_FEBRUARY_2019.keys.index(:iksnr)
|
273
|
+
seq_name = COLUMNS_FEBRUARY_2019.keys.index(:name_base)
|
274
|
+
i_3 = COLUMNS_FEBRUARY_2019.keys.index(:ikscd)
|
275
|
+
seqnr = COLUMNS_FEBRUARY_2019.keys.index(:seqnr)
|
276
|
+
cat = COLUMNS_FEBRUARY_2019.keys.index(:ikscat)
|
277
|
+
siz = COLUMNS_FEBRUARY_2019.keys.index(:size)
|
278
|
+
atc = COLUMNS_FEBRUARY_2019.keys.index(:atc_class)
|
279
279
|
list_code = COLUMNS_FEBRUARY_2019.keys.index(:production_science)
|
280
|
-
eht
|
281
|
-
sub
|
282
|
-
comp
|
280
|
+
eht = COLUMNS_FEBRUARY_2019.keys.index(:unit)
|
281
|
+
sub = COLUMNS_FEBRUARY_2019.keys.index(:substances)
|
282
|
+
comp = COLUMNS_FEBRUARY_2019.keys.index(:composition)
|
283
283
|
|
284
284
|
# production_science Heilmittelcode, possible values are
|
285
285
|
# Allergene
|
@@ -299,64 +299,56 @@ module Oddb2xml
|
|
299
299
|
# Tierarzneimittel
|
300
300
|
# Transplantat: Gewebeprodukt
|
301
301
|
@sheet.each_with_index do |row, i|
|
302
|
-
|
303
|
-
next
|
304
|
-
next unless row
|
305
|
-
|
306
|
-
no8 = sprintf('%05d',row[iksnr].value.to_i) + sprintf('%03d',row[i_3].value.to_i)
|
302
|
+
next if i <= 1
|
303
|
+
next unless row && row[iksnr] && row[i_3]
|
304
|
+
next unless (row[iksnr].value.to_i > 0) && (row[i_3].value.to_i > 0)
|
305
|
+
no8 = sprintf("%05d", row[iksnr].value.to_i) + sprintf("%03d", row[i_3].value.to_i)
|
307
306
|
unless no8.empty?
|
308
307
|
next if no8.to_i == 0
|
309
308
|
ean_base12 = "7680#{no8}"
|
310
|
-
prodno =
|
311
|
-
ean13 = (ean_base12.ljust(12,
|
309
|
+
prodno = Oddb2xml.gen_prodno(row[iksnr].value.to_i, row[seqnr].value.to_i)
|
310
|
+
ean13 = (ean_base12.ljust(12, "0") + Oddb2xml.calc_checksum(ean_base12))
|
312
311
|
Oddb2xml.setEan13forProdno(prodno, ean13)
|
313
312
|
Oddb2xml.setEan13forNo8(no8, ean13)
|
314
313
|
data[no8] = {
|
315
|
-
:
|
316
|
-
:
|
317
|
-
:
|
318
|
-
:
|
319
|
-
:
|
320
|
-
:
|
321
|
-
:
|
322
|
-
:
|
323
|
-
:
|
324
|
-
:
|
325
|
-
:
|
326
|
-
:
|
327
|
-
:
|
328
|
-
:
|
329
|
-
:
|
330
|
-
:
|
331
|
-
:
|
332
|
-
:
|
333
|
-
:
|
334
|
-
:
|
335
|
-
:
|
336
|
-
:
|
337
|
-
:
|
314
|
+
iksnr: row[iksnr].value.to_i,
|
315
|
+
no8: no8,
|
316
|
+
ean13: ean13,
|
317
|
+
prodno: prodno,
|
318
|
+
seqnr: row[seqnr].value,
|
319
|
+
ith_swissmedic: row[ith] ? row[ith].value.to_s : "",
|
320
|
+
swissmedic_category: row[cat].value.to_s,
|
321
|
+
atc_code: row[atc] ? Oddb2xml.add_epha_changes_for_ATC(row[iksnr].value.to_s, row[atc].value.to_s) : "",
|
322
|
+
list_code: row[list_code] ? row[list_code].value.to_s : "",
|
323
|
+
package_size: row[siz] ? row[siz].value.to_s : "",
|
324
|
+
einheit_swissmedic: row[eht] ? row[eht].value.to_s : "",
|
325
|
+
substance_swissmedic: row[sub] ? row[sub].value.to_s : "",
|
326
|
+
composition_swissmedic: row[comp] ? row[comp].value.to_s : "",
|
327
|
+
sequence_name: row[seq_name] ? row[seq_name].value.to_s : "",
|
328
|
+
is_tier: (row[list_code] == "Tierarzneimittel"),
|
329
|
+
gen_production: row[COLUMNS_FEBRUARY_2019.keys.index(:gen_production)].value.to_s,
|
330
|
+
insulin_category: row[COLUMNS_FEBRUARY_2019.keys.index(:insulin_category)].value.to_s,
|
331
|
+
drug_index: row[COLUMNS_FEBRUARY_2019.keys.index(:drug_index)].value.to_s,
|
332
|
+
data_origin: "swissmedic_package",
|
333
|
+
expiry_date: row[COLUMNS_FEBRUARY_2019.keys.index(:expiry_date)].value.to_s,
|
334
|
+
company_name: row[COLUMNS_FEBRUARY_2019.keys.index(:company)].value.to_s,
|
335
|
+
size: row[COLUMNS_FEBRUARY_2019.keys.index(:size)].value.to_s,
|
336
|
+
unit: row[COLUMNS_FEBRUARY_2019.keys.index(:unit)].value.to_s
|
338
337
|
}
|
339
338
|
end
|
340
339
|
end
|
341
340
|
end
|
342
|
-
cleanup_file
|
343
341
|
data
|
344
342
|
end
|
345
|
-
private
|
346
|
-
def cleanup_file
|
347
|
-
begin
|
348
|
-
File.unlink(@filename) if File.exists?(@filename)
|
349
|
-
rescue Errno::EACCES # Permission Denied on Windows
|
350
|
-
end unless defined?(RSpec)
|
351
|
-
end
|
352
|
-
|
353
343
|
end
|
344
|
+
|
354
345
|
class MigelExtractor < Extractor
|
355
346
|
def initialize(bin)
|
356
347
|
Oddb2xml.log("MigelExtractor #{io} #{File.size(io)} bytes")
|
357
|
-
book = Spreadsheet.open(io,
|
348
|
+
book = Spreadsheet.open(io, "rb")
|
358
349
|
@sheet = book.worksheet(0)
|
359
350
|
end
|
351
|
+
|
360
352
|
def to_hash
|
361
353
|
data = {}
|
362
354
|
@sheet.each_with_index do |row, i|
|
@@ -366,15 +358,15 @@ module Oddb2xml
|
|
366
358
|
ean13 = row[0]
|
367
359
|
ean13 = phar unless ean13.to_s.length == 13
|
368
360
|
data[ean] = {
|
369
|
-
:
|
370
|
-
:
|
371
|
-
:
|
372
|
-
:
|
373
|
-
:
|
374
|
-
:
|
375
|
-
:
|
376
|
-
:
|
377
|
-
:
|
361
|
+
refdata: true,
|
362
|
+
ean13: ean13,
|
363
|
+
pharmacode: phar,
|
364
|
+
desc_de: row[3],
|
365
|
+
desc_fr: row[4],
|
366
|
+
quantity: row[5], # quantity
|
367
|
+
company_name: row[6],
|
368
|
+
company_ean: row[7],
|
369
|
+
data_origin: "migel"
|
378
370
|
}
|
379
371
|
end
|
380
372
|
data
|
@@ -383,26 +375,26 @@ module Oddb2xml
|
|
383
375
|
|
384
376
|
class SwissmedicInfoExtractor < Extractor
|
385
377
|
def to_hash
|
386
|
-
data = Hash.new{|h,k| h[k] = [] }
|
378
|
+
data = Hash.new { |h, k| h[k] = [] }
|
387
379
|
return data unless @xml.size > 0
|
388
|
-
result = MedicalInformationsContent.parse(@xml.sub(
|
380
|
+
result = MedicalInformationsContent.parse(@xml.sub(STRIP_FOR_SAX_MACHINE, ""), lazy: true)
|
389
381
|
result.medicalInformation.each do |pac|
|
390
382
|
lang = pac.lang.to_s
|
391
|
-
next unless
|
383
|
+
next unless /de|fr/.match?(lang)
|
392
384
|
item = {}
|
393
385
|
item[:refdata] = true
|
394
|
-
item[:data_origin] =
|
395
|
-
item[:name]
|
396
|
-
item[:owner] = (ownr = pac.authHolder) ? ownr :
|
397
|
-
item[:style] =
|
398
|
-
html = Nokogiri::HTML.fragment(pac.content.force_encoding(
|
386
|
+
item[:data_origin] = "swissmedic_info"
|
387
|
+
item[:name] = (name = pac.title) ? name : ""
|
388
|
+
item[:owner] = (ownr = pac.authHolder) ? ownr : ""
|
389
|
+
item[:style] = Nokogiri::HTML.fragment(pac.style).to_html(encoding: "UTF-8")
|
390
|
+
html = Nokogiri::HTML.fragment(pac.content.force_encoding("UTF-8"))
|
399
391
|
item[:paragraph] = html
|
400
|
-
numbers =
|
392
|
+
numbers = /(\d{5})[,\s]*(\d{5})?|(\d{5})[,\s]*(\d{5})?[,\s]*(\d{5})?/.match(html)
|
401
393
|
if numbers
|
402
|
-
|
403
|
-
|
404
|
-
|
405
|
-
|
394
|
+
[$1, $2, $3].compact.each do |n| # plural
|
395
|
+
item[:monid] = n
|
396
|
+
data[lang] << item
|
397
|
+
end
|
406
398
|
end
|
407
399
|
end
|
408
400
|
data
|
@@ -414,27 +406,28 @@ module Oddb2xml
|
|
414
406
|
Oddb2xml.log("EphaExtractor #{str.size} bytes")
|
415
407
|
@io = StringIO.new(str)
|
416
408
|
end
|
409
|
+
|
417
410
|
def to_arry
|
418
411
|
data = []
|
419
412
|
ixno = 0
|
420
413
|
inhalt = @io.read
|
421
414
|
inhalt.split("\n").each do |line|
|
422
415
|
ixno += 1
|
423
|
-
next if /ATC1.*Name1.*ATC2.*Name2/.match(line)
|
424
|
-
#line = '"'+line unless /^"/.match(line)
|
416
|
+
next if /ATC1.*Name1.*ATC2.*Name2/.match?(line)
|
417
|
+
# line = '"'+line unless /^"/.match(line)
|
425
418
|
begin
|
426
|
-
row = CSV.parse_line(line.gsub('""','"'))
|
419
|
+
row = CSV.parse_line(line.gsub('""', '"'))
|
427
420
|
action = {}
|
428
421
|
next unless row.size > 8
|
429
|
-
action[:data_origin] =
|
430
|
-
action[:ixno]
|
431
|
-
action[:title]
|
432
|
-
action[:atc1]
|
433
|
-
action[:atc2]
|
422
|
+
action[:data_origin] = "epha"
|
423
|
+
action[:ixno] = ixno
|
424
|
+
action[:title] = row[4]
|
425
|
+
action[:atc1] = row[0]
|
426
|
+
action[:atc2] = row[2]
|
434
427
|
action[:mechanism] = row[5]
|
435
|
-
action[:effect]
|
436
|
-
action[:measures]
|
437
|
-
action[:grad]
|
428
|
+
action[:effect] = row[6]
|
429
|
+
action[:measures] = row[7]
|
430
|
+
action[:grad] = row[8]
|
438
431
|
data << action
|
439
432
|
rescue CSV::MalformedCSVError
|
440
433
|
puts "CSV::MalformedCSVError in line #{ixno}: #{line}"
|
@@ -443,139 +436,146 @@ module Oddb2xml
|
|
443
436
|
data
|
444
437
|
end
|
445
438
|
end
|
439
|
+
|
446
440
|
class MedregbmExtractor < Extractor
|
447
441
|
def initialize(str, type)
|
448
|
-
@io
|
442
|
+
@io = StringIO.new(str)
|
449
443
|
@type = type
|
450
444
|
end
|
445
|
+
|
451
446
|
def to_arry
|
452
447
|
data = []
|
453
448
|
case @type
|
454
449
|
when :company
|
455
|
-
while line = @io.gets
|
450
|
+
while (line = @io.gets)
|
456
451
|
row = line.chomp.split("\t")
|
457
|
-
next if row[0]
|
452
|
+
next if /^GLN/.match?(row[0])
|
458
453
|
data << {
|
459
|
-
:
|
460
|
-
:
|
461
|
-
:
|
462
|
-
:
|
463
|
-
:
|
464
|
-
:
|
465
|
-
:
|
466
|
-
:
|
467
|
-
:
|
468
|
-
:
|
469
|
-
:
|
470
|
-
:
|
454
|
+
data_origin: "medreg",
|
455
|
+
gln: row[0].to_s.gsub(/[^0-9]/, ""), #=> GLN Betrieb
|
456
|
+
name_1: row[1].to_s, #=> Betriebsname 1
|
457
|
+
name_2: row[2].to_s, #=> Betriebsname 2
|
458
|
+
address: row[3].to_s, #=> Strasse
|
459
|
+
number: row[4].to_s, #=> Nummer
|
460
|
+
post: row[5].to_s, #=> PLZ
|
461
|
+
place: row[6].to_s, #=> Ort
|
462
|
+
region: row[7].to_s, #=> Bewilligungskanton
|
463
|
+
country: row[8].to_s, #=> Land
|
464
|
+
type: row[9].to_s, #=> Betriebstyp
|
465
|
+
authorization: row[10].to_s #=> BTM Berechtigung
|
471
466
|
}
|
472
467
|
end
|
473
468
|
when :person
|
474
|
-
while line = @io.gets
|
469
|
+
while (line = @io.gets)
|
475
470
|
row = line.chomp.split("\t")
|
476
|
-
next if row[0]
|
471
|
+
next if /^GLN/.match?(row[0])
|
477
472
|
data << {
|
478
|
-
:
|
479
|
-
:
|
480
|
-
:
|
481
|
-
:
|
482
|
-
:
|
483
|
-
:
|
484
|
-
:
|
485
|
-
:
|
486
|
-
:
|
487
|
-
:
|
488
|
-
:
|
473
|
+
data_origin: "medreg",
|
474
|
+
gln: row[0].to_s.gsub(/[^0-9]/, ""), #=> GLN Person
|
475
|
+
last_name: row[1].to_s, #=> Name
|
476
|
+
first_name: row[2].to_s, #=> Vorname
|
477
|
+
post: row[3].to_s, #=> PLZ
|
478
|
+
place: row[4].to_s, #=> Ort
|
479
|
+
region: row[5].to_s, #=> Bewilligungskanton
|
480
|
+
country: row[6].to_s, #=> Land
|
481
|
+
license: row[7].to_s, #=> Bewilligung Selbstdispensation
|
482
|
+
certificate: row[8].to_s, #=> Diplom
|
483
|
+
authorization: row[9].to_s #=> BTM Berechtigung
|
489
484
|
}
|
490
485
|
end
|
491
486
|
end
|
492
487
|
data
|
493
488
|
end
|
494
489
|
end
|
490
|
+
|
495
491
|
class ZurroseExtractor < Extractor
|
496
492
|
# see http://dev.ywesee.com/Bbmb/TransferDat
|
497
493
|
def initialize(dat, extended = false, artikelstamm = false)
|
498
494
|
@@extended = extended
|
499
495
|
@artikelstamm = artikelstamm
|
500
|
-
FileUtils.makedirs(
|
501
|
-
@@error_file ||= File.open(File.join(
|
496
|
+
FileUtils.makedirs(WORK_DIR)
|
497
|
+
@@error_file ||= File.open(File.join(WORK_DIR, "duplicate_ean13_from_zur_rose.txt"), "wb+:ISO-8859-14")
|
502
498
|
@@items_without_ean13s ||= 0
|
503
499
|
@@duplicated_ean13s ||= 0
|
504
500
|
@@zur_rose_items ||= 0
|
505
501
|
if dat
|
506
|
-
if File.
|
507
|
-
|
502
|
+
@io = if File.exist?(dat)
|
503
|
+
File.open(dat, "rb:ISO-8859-14")
|
508
504
|
else
|
509
|
-
|
505
|
+
StringIO.new(dat)
|
510
506
|
end
|
511
507
|
@io
|
512
|
-
else
|
513
|
-
nil
|
514
508
|
end
|
515
509
|
end
|
510
|
+
|
516
511
|
def to_hash
|
517
512
|
data = {}
|
518
|
-
|
519
|
-
|
520
|
-
|
521
|
-
|
522
|
-
|
523
|
-
|
524
|
-
|
525
|
-
|
526
|
-
|
527
|
-
|
528
|
-
|
529
|
-
|
530
|
-
|
531
|
-
|
532
|
-
|
533
|
-
|
534
|
-
|
535
|
-
|
536
|
-
|
537
|
-
|
538
|
-
|
539
|
-
|
540
|
-
|
541
|
-
|
513
|
+
if @io
|
514
|
+
while (line = @io.gets)
|
515
|
+
ean13 = "-1"
|
516
|
+
line = Oddb2xml.patch_some_utf8(line).chomp
|
517
|
+
# next unless /(7680\d{9})(\d{1})$/.match(line) # Skip non pharma
|
518
|
+
next if /(ad us\.* vet)|(\(vet\))/i.match?(line)
|
519
|
+
if @@extended
|
520
|
+
next unless (match_data = line.match(/(\d{13})(\d{1})$/))
|
521
|
+
else
|
522
|
+
next unless (match_data = line.match(/(7680\d{9})(\d{1})$/))
|
523
|
+
end
|
524
|
+
pharma_code = line[3..9]
|
525
|
+
if match_data[1].to_s == "0000000000000"
|
526
|
+
@@items_without_ean13s += 1
|
527
|
+
next if @artikelstamm && pharma_code.to_i == 0
|
528
|
+
ean13 = Oddb2xml::FAKE_GTIN_START + pharma_code.to_s unless @artikelstamm
|
529
|
+
else
|
530
|
+
ean13 = match_data[1]
|
531
|
+
end
|
532
|
+
if data[ean13]
|
533
|
+
@@error_file.puts "Duplicate ean13 #{ean13} in line \nact: #{line.chomp}\norg: #{data[ean13][:line]}"
|
534
|
+
@@items_without_ean13s -= 1
|
535
|
+
@@duplicated_ean13s += 1
|
536
|
+
next
|
537
|
+
end
|
542
538
|
|
543
|
-
|
544
|
-
|
545
|
-
|
546
|
-
|
547
|
-
|
548
|
-
|
549
|
-
|
550
|
-
|
551
|
-
|
552
|
-
|
553
|
-
|
554
|
-
|
555
|
-
|
556
|
-
|
557
|
-
|
558
|
-
|
559
|
-
|
560
|
-
|
561
|
-
|
562
|
-
|
563
|
-
|
564
|
-
|
565
|
-
end
|
566
|
-
if defined?(@@extended)
|
539
|
+
pexf = sprintf("%.2f", line[60, 6].gsub(/(\d{2})$/, '.\1').to_f)
|
540
|
+
ppub = sprintf("%.2f", line[66, 6].gsub(/(\d{2})$/, '.\1').to_f)
|
541
|
+
next if @artikelstamm && /^113/.match(line) && ppub.eql?("0.0") && pexf.eql?("0.0")
|
542
|
+
next unless ean13
|
543
|
+
key = ean13
|
544
|
+
key = (Oddb2xml::FAKE_GTIN_START + pharma_code.to_s) if ean13.to_i <= 0 # dummy ean13
|
545
|
+
data[key] = {
|
546
|
+
data_origin: "zur_rose",
|
547
|
+
line: line.chomp,
|
548
|
+
ean13: ean13,
|
549
|
+
clag: line[73],
|
550
|
+
vat: line[96],
|
551
|
+
description: line[10..59].sub(/\s+$/, ""),
|
552
|
+
quantity: "",
|
553
|
+
pharmacode: pharma_code,
|
554
|
+
price: pexf,
|
555
|
+
pub_price: ppub,
|
556
|
+
type: :nonpharma,
|
557
|
+
cmut: line[2]
|
558
|
+
}
|
559
|
+
@@zur_rose_items += 1
|
560
|
+
end
|
561
|
+
end
|
562
|
+
if defined?(@@extended) && @@extended
|
567
563
|
@@error_file.puts get_error_msg
|
568
564
|
end
|
569
565
|
@@error_file.close
|
570
566
|
@@error_file = nil
|
571
567
|
data
|
572
568
|
end
|
573
|
-
|
574
|
-
|
575
|
-
|
576
|
-
|
569
|
+
if defined?(@@extended) && @@extended
|
570
|
+
at_exit do
|
571
|
+
puts get_error_msg
|
572
|
+
end
|
573
|
+
end
|
574
|
+
|
575
|
+
private
|
576
|
+
|
577
577
|
def get_error_msg
|
578
|
-
if defined?(@@extended)
|
578
|
+
if defined?(@@extended) && @@extended
|
579
579
|
msg = "Added #{@@items_without_ean13s} via pharmacodes of #{@@zur_rose_items} items when extracting the transfer.dat from \"Zur Rose\""
|
580
580
|
msg += "\n found #{@@duplicated_ean13s} lines with duplicated ean13" if @@duplicated_ean13s > 0
|
581
581
|
return msg
|