oddb2xml 2.6.9 → 2.7.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.github/workflows/ruby.yml +40 -0
- data/.standard.yml +2 -0
- data/Elexis_Artikelstamm_v5.xsd +0 -3
- data/Gemfile +3 -3
- data/History.txt +28 -0
- data/README.md +3 -3
- data/Rakefile +24 -23
- data/bin/check_artikelstamm +11 -11
- data/bin/compare_v5 +23 -23
- data/bin/oddb2xml +14 -13
- data/lib/oddb2xml.rb +1 -1
- data/lib/oddb2xml/builder.rb +1075 -1048
- data/lib/oddb2xml/calc.rb +232 -233
- data/lib/oddb2xml/chapter_70_hack.rb +38 -32
- data/lib/oddb2xml/cli.rb +252 -235
- data/lib/oddb2xml/compare.rb +70 -59
- data/lib/oddb2xml/compositions_syntax.rb +448 -430
- data/lib/oddb2xml/compressor.rb +20 -20
- data/lib/oddb2xml/downloader.rb +156 -128
- data/lib/oddb2xml/extractor.rb +295 -302
- data/lib/oddb2xml/options.rb +34 -35
- data/lib/oddb2xml/parslet_compositions.rb +263 -269
- data/lib/oddb2xml/semantic_check.rb +39 -33
- data/lib/oddb2xml/util.rb +166 -164
- data/lib/oddb2xml/version.rb +1 -1
- data/lib/oddb2xml/xml_definitions.rb +32 -33
- data/oddb2xml.gemspec +32 -31
- data/spec/artikelstamm_spec.rb +116 -135
- data/spec/builder_spec.rb +495 -524
- data/spec/calc_spec.rb +552 -593
- data/spec/check_artikelstamm_spec.rb +26 -26
- data/spec/cli_spec.rb +173 -174
- data/spec/compare_spec.rb +9 -11
- data/spec/composition_syntax_spec.rb +390 -409
- data/spec/compressor_spec.rb +48 -48
- data/spec/data/refdata_NonPharma.xml +0 -3
- data/spec/data/refdata_Pharma.xml +0 -26
- data/spec/data/transfer.dat +1 -0
- data/spec/data/varia_De.htm +2 -2
- data/spec/data_helper.rb +47 -49
- data/spec/downloader_spec.rb +251 -260
- data/spec/extractor_spec.rb +172 -164
- data/spec/galenic_spec.rb +233 -256
- data/spec/options_spec.rb +116 -119
- data/spec/parslet_spec.rb +833 -861
- data/spec/spec_helper.rb +153 -153
- data/test_options.rb +39 -42
- data/tools/win_fetch_cacerts.rb +2 -3
- metadata +48 -5
- data/.travis.yml +0 -29
data/lib/oddb2xml/extractor.rb
CHANGED
@@ -1,11 +1,10 @@
|
|
1
|
-
|
2
|
-
require
|
3
|
-
require
|
4
|
-
require
|
5
|
-
require
|
6
|
-
require
|
7
|
-
require
|
8
|
-
require 'oddb2xml/xml_definitions'
|
1
|
+
require "nokogiri"
|
2
|
+
require "spreadsheet"
|
3
|
+
require "stringio"
|
4
|
+
require "rubyXL"
|
5
|
+
require "rubyXL/convenience_methods/workbook"
|
6
|
+
require "csv"
|
7
|
+
require "oddb2xml/xml_definitions"
|
9
8
|
|
10
9
|
module Oddb2xml
|
11
10
|
module TxtExtractorMethods
|
@@ -13,23 +12,26 @@ module Oddb2xml
|
|
13
12
|
Oddb2xml.log("TxtExtractorMethods #{str} #{str.to_s.size} bytes")
|
14
13
|
@io = StringIO.new(str)
|
15
14
|
end
|
15
|
+
|
16
16
|
def to_hash
|
17
17
|
data = {}
|
18
|
-
while line = @io.gets
|
19
|
-
next unless
|
20
|
-
ean13 = line.chomp.
|
18
|
+
while (line = @io.gets)
|
19
|
+
next unless /\d{13}/.match?(line)
|
20
|
+
ean13 = line.chomp.delete("\"")
|
21
21
|
data[ean13] = true
|
22
22
|
end
|
23
23
|
data
|
24
24
|
end
|
25
25
|
end
|
26
|
+
|
26
27
|
class Extractor
|
27
28
|
attr_accessor :xml
|
28
29
|
def initialize(xml)
|
29
|
-
Oddb2xml.log("Extractor #{xml
|
30
|
+
Oddb2xml.log("Extractor #{xml} xml #{xml.size} bytes")
|
30
31
|
@xml = xml
|
31
32
|
end
|
32
33
|
end
|
34
|
+
|
33
35
|
class LppvExtractor < Extractor
|
34
36
|
include TxtExtractorMethods
|
35
37
|
end
|
@@ -37,156 +39,152 @@ module Oddb2xml
|
|
37
39
|
class BagXmlExtractor < Extractor
|
38
40
|
def to_hash
|
39
41
|
data = {}
|
40
|
-
result = PreparationsEntry.parse(@xml.sub(
|
42
|
+
result = PreparationsEntry.parse(@xml.sub(STRIP_FOR_SAX_MACHINE, ""), lazy: true)
|
41
43
|
result.Preparations.Preparation.each do |seq|
|
42
|
-
if seq.SwissmedicNo5.eql?(
|
44
|
+
if seq.SwissmedicNo5.eql?("0")
|
43
45
|
puts "BagXmlExtractor Skipping SwissmedicNo5 0 for #{seq.NameDe} #{seq.DescriptionDe} #{seq.CommentDe}"
|
44
46
|
next
|
45
47
|
end
|
46
48
|
item = {}
|
47
|
-
item[:data_origin]
|
48
|
-
item[:refdata]
|
49
|
-
item[:product_key]
|
50
|
-
item[:desc_de]
|
51
|
-
item[:desc_fr]
|
52
|
-
item[:name_de]
|
53
|
-
item[:name_fr]
|
54
|
-
item[:swissmedic_number5] = (num5 = seq.SwissmedicNo5) ?
|
55
|
-
item[:org_gen_code] = (orgc = seq.OrgGenCode)
|
56
|
-
item[:deductible]
|
57
|
-
item[:atc_code]
|
58
|
-
item[:comment_de]
|
59
|
-
item[:comment_fr]
|
60
|
-
item[:it_code]
|
49
|
+
item[:data_origin] = "bag_xml"
|
50
|
+
item[:refdata] = true
|
51
|
+
item[:product_key] = seq.ProductCommercial
|
52
|
+
item[:desc_de] = (desc = seq.DescriptionDe) ? desc : ""
|
53
|
+
item[:desc_fr] = (desc = seq.DescriptionFr) ? desc : ""
|
54
|
+
item[:name_de] = (name = seq.NameDe) ? name : ""
|
55
|
+
item[:name_fr] = (name = seq.NameFr) ? name : ""
|
56
|
+
item[:swissmedic_number5] = (num5 = seq.SwissmedicNo5) ? num5.rjust(5, "0") : ""
|
57
|
+
item[:org_gen_code] = (orgc = seq.OrgGenCode) ? orgc : ""
|
58
|
+
item[:deductible] = (ddbl = seq.FlagSB20) ? ddbl : ""
|
59
|
+
item[:atc_code] = (atcc = seq.AtcCode) ? atcc : ""
|
60
|
+
item[:comment_de] = (info = seq.CommentDe) ? info : ""
|
61
|
+
item[:comment_fr] = (info = seq.CommentFr) ? info : ""
|
62
|
+
item[:it_code] = ""
|
61
63
|
seq.ItCodes.ItCode.each do |itc|
|
62
64
|
if item[:it_code].to_s.empty?
|
63
65
|
it_code = itc.Code.to_s
|
64
|
-
item[:it_code] =
|
66
|
+
item[:it_code] = /(\d+)\.(\d+)\.(\d+)./.match?(it_code) ? it_code : ""
|
65
67
|
end
|
66
68
|
end
|
67
69
|
item[:substances] = []
|
68
70
|
seq.Substances.Substance.each_with_index do |sub, i|
|
69
71
|
item[:substances] << {
|
70
|
-
:
|
71
|
-
:
|
72
|
-
:
|
73
|
-
:
|
72
|
+
index: i.to_s,
|
73
|
+
name: (name = sub.DescriptionLa) ? name : "",
|
74
|
+
quantity: (qtty = sub.Quantity) ? qtty : "",
|
75
|
+
unit: (unit = sub.QuantityUnit) ? unit : ""
|
74
76
|
}
|
75
77
|
end
|
76
78
|
item[:pharmacodes] = []
|
77
|
-
item[:packages]
|
79
|
+
item[:packages] = {} # pharmacode => package
|
78
80
|
seq.Packs.Pack.each do |pac|
|
79
81
|
if pac.SwissmedicNo8 && pac.SwissmedicNo8.length < 8
|
80
82
|
puts "BagXmlExtractor: Adding leading zeros for SwissmedicNo8 #{pac.SwissmedicNo8} BagDossierNo #{pac.BagDossierNo} PackId #{pac.PackId} #{item[:name_de]}" if $VERBOSE
|
81
|
-
pac.SwissmedicNo8
|
83
|
+
pac.SwissmedicNo8 = pac.SwissmedicNo8.rjust(8, "0")
|
82
84
|
end
|
83
85
|
unless pac.GTIN
|
84
|
-
|
86
|
+
if pac.SwissmedicNo8
|
87
|
+
ean12 = "7680" + pac.SwissmedicNo8
|
88
|
+
pac.GTIN = (ean12 + Oddb2xml.calc_checksum(ean12)) unless @artikelstamm
|
89
|
+
# puts "BagXmlExtractor: Missing GTIN in SwissmedicNo8 #{pac.SwissmedicNo8} BagDossierNo #{pac.BagDossierNo} PackId #{pac.PackId} #{item[:name_de]}."
|
90
|
+
else
|
85
91
|
puts "BagXmlExtractor: Missing GTIN and SwissmedicNo8 in SwissmedicNo8 #{pac.SwissmedicNo8} BagDossierNo #{pac.BagDossierNo} PackId #{pac.PackId} #{item[:name_de]}"
|
86
92
|
next
|
87
|
-
else
|
88
|
-
ean12 = '7680' + pac.SwissmedicNo8
|
89
|
-
pac.GTIN = (ean12 + Oddb2xml.calc_checksum(ean12)) unless @artikelstamm
|
90
|
-
# puts "BagXmlExtractor: Missing GTIN in SwissmedicNo8 #{pac.SwissmedicNo8} BagDossierNo #{pac.BagDossierNo} PackId #{pac.PackId} #{item[:name_de]}."
|
91
93
|
end
|
92
94
|
end
|
93
95
|
ean13 = pac.GTIN.to_s
|
94
96
|
Oddb2xml.setEan13forNo8(pac.SwissmedicNo8, ean13) if pac.SwissmedicNo8
|
95
97
|
# packages
|
96
|
-
exf = {:
|
97
|
-
if pac
|
98
|
-
exf[:price]
|
99
|
-
exf[:valid_date] =
|
100
|
-
exf[:price_code] =
|
98
|
+
exf = {price: "", valid_date: "", price_code: ""}
|
99
|
+
if pac&.Prices&.ExFactoryPrice
|
100
|
+
exf[:price] = pac.Prices.ExFactoryPrice.Price if pac.Prices.ExFactoryPrice.Price
|
101
|
+
exf[:valid_date] = pac.Prices.ExFactoryPrice.ValidFromDate if pac.Prices.ExFactoryPrice.ValidFromDate
|
102
|
+
exf[:price_code] = pac.Prices.ExFactoryPrice.PriceTypeCode if pac.Prices.ExFactoryPrice.PriceTypeCode
|
101
103
|
end
|
102
|
-
pub = {:
|
103
|
-
if pac
|
104
|
-
pub[:price]
|
105
|
-
pub[:valid_date] =
|
106
|
-
pub[:price_code] =
|
104
|
+
pub = {price: "", valid_date: "", price_code: ""}
|
105
|
+
if pac&.Prices&.PublicPrice
|
106
|
+
pub[:price] = pac.Prices.PublicPrice.Price if pac.Prices.PublicPrice.Price
|
107
|
+
pub[:valid_date] = pac.Prices.PublicPrice.ValidFromDate if pac.Prices.PublicPrice.ValidFromDate
|
108
|
+
pub[:price_code] = pac.Prices.PublicPrice.PriceTypeCode if pac.Prices.PublicPrice.PriceTypeCode
|
107
109
|
end
|
108
110
|
item[:packages][ean13] = {
|
109
|
-
:
|
110
|
-
:
|
111
|
-
:
|
112
|
-
:
|
113
|
-
:
|
114
|
-
:
|
115
|
-
:
|
116
|
-
:
|
117
|
-
:
|
111
|
+
ean13: ean13,
|
112
|
+
name_de: (desc = seq.NameDe) ? desc : "",
|
113
|
+
name_fr: (desc = seq.NameFr) ? desc : "",
|
114
|
+
desc_de: (desc = pac.DescriptionDe) ? desc : "",
|
115
|
+
desc_fr: (desc = pac.DescriptionFr) ? desc : "",
|
116
|
+
sl_entry: true,
|
117
|
+
swissmedic_category: (cat = pac.SwissmedicCategory) ? cat : "",
|
118
|
+
swissmedic_number8: (num = pac.SwissmedicNo8) ? num : "",
|
119
|
+
prices: {exf_price: exf, pub_price: pub}
|
118
120
|
}
|
119
121
|
# related all limitations
|
120
122
|
item[:packages][ean13][:limitations] = []
|
121
|
-
limitations = Hash.new{|h,k| h[k] = [] }
|
122
|
-
if seq.Limitations
|
123
|
-
|
124
|
-
else
|
125
|
-
limitations[:seq] = nil
|
123
|
+
limitations = Hash.new { |h, k| h[k] = [] }
|
124
|
+
limitations[:seq] = if seq.Limitations
|
125
|
+
seq.Limitations.Limitation.collect { |x| x }
|
126
126
|
end
|
127
127
|
# in it-codes
|
128
|
-
if seq
|
128
|
+
if seq&.ItCodes && seq&.ItCodes&.ItCode
|
129
129
|
limitations[:itc] = []
|
130
|
-
seq.ItCodes.ItCode.each { |x|
|
130
|
+
seq.ItCodes.ItCode.each { |x| limitations[:itc] += x.Limitations.Limitation if x.Limitations.Limitation }
|
131
131
|
else
|
132
|
-
limitations[:itc] =nil
|
132
|
+
limitations[:itc] = nil
|
133
133
|
end
|
134
134
|
# in pac
|
135
|
-
if pac
|
136
|
-
|
137
|
-
else
|
138
|
-
limitations[:pac] = nil
|
135
|
+
limitations[:pac] = if pac && pac.Limitations
|
136
|
+
(lims = pac.Limitations.Limitation) ? lims.to_a : nil
|
139
137
|
end
|
140
138
|
limitations.each_pair do |lim_key, lims|
|
141
|
-
key =
|
142
|
-
id
|
139
|
+
key = ""
|
140
|
+
id = ""
|
143
141
|
case lim_key
|
144
142
|
when :seq, :itc
|
145
143
|
key = :swissmedic_number5
|
146
|
-
id
|
144
|
+
id = item[key].to_s
|
147
145
|
when :pac
|
148
146
|
key = :swissmedic_number8
|
149
|
-
id
|
147
|
+
id = item[:packages][ean13][key].to_s
|
150
148
|
end
|
151
|
-
if id.empty? && item[:packages][ean13][
|
149
|
+
if id.empty? && item[:packages][ean13][:swissmedic_number8]
|
152
150
|
key = :swissmedic_number8
|
153
|
-
id
|
151
|
+
id = item[:packages][ean13][key].to_s
|
154
152
|
end
|
155
|
-
lims
|
153
|
+
lims&.each do |lim|
|
156
154
|
limitation = {
|
157
|
-
:
|
158
|
-
:
|
159
|
-
:
|
160
|
-
:
|
161
|
-
:
|
162
|
-
:
|
163
|
-
:
|
164
|
-
:
|
165
|
-
:
|
166
|
-
:
|
155
|
+
it: item[:it_code],
|
156
|
+
key: key,
|
157
|
+
id: id,
|
158
|
+
code: (lic = lim.LimitationCode) ? lic : "",
|
159
|
+
type: (lit = lim.LimitationType) ? lit : "",
|
160
|
+
value: (liv = lim.LimitationValue) ? liv : "",
|
161
|
+
niv: (niv = lim.LimitationNiveau) ? niv : "",
|
162
|
+
desc_de: (dsc = lim.DescriptionDe) ? dsc : "",
|
163
|
+
desc_fr: (dsc = lim.DescriptionFr) ? dsc : "",
|
164
|
+
vdate: (dat = lim.ValidFromDate) ? dat : ""
|
167
165
|
}
|
168
166
|
deleted = false
|
169
|
-
if upto = ((thr = lim.ValidThruDate) ? thr : nil)
|
170
|
-
upto =~ /\d{2}\.\d{2}\.\d{2}/
|
167
|
+
if (upto = ((thr = lim.ValidThruDate) ? thr : nil)) &&
|
168
|
+
upto =~ (/\d{2}\.\d{2}\.\d{2}/)
|
171
169
|
begin
|
172
|
-
deleted = true if Date.strptime(upto,
|
170
|
+
deleted = true if Date.strptime(upto, "%d.%m.%y") >= Date.today
|
173
171
|
rescue ArgumentError
|
174
172
|
end
|
175
173
|
end
|
176
174
|
limitation[:del] = deleted
|
177
175
|
item[:packages][ean13][:limitations] << limitation
|
178
|
-
end
|
176
|
+
end
|
179
177
|
end
|
180
178
|
# limitation points
|
181
179
|
pts = pac.PointLimitations.PointLimitation.first # only first points
|
182
|
-
item[:packages][ean13][:limitation_points] = pts ? pts.Points :
|
180
|
+
item[:packages][ean13][:limitation_points] = pts ? pts.Points : ""
|
183
181
|
if pac.SwissmedicNo8
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
182
|
+
ean12 = "7680" + pac.SwissmedicNo8
|
183
|
+
correct_ean13 = ean12 + Oddb2xml.calc_checksum(ean12)
|
184
|
+
unless pac.GTIN.eql?(correct_ean13)
|
185
|
+
puts "pac.GTIN #{pac.GTIN} should be #{correct_ean13}"
|
186
|
+
item[:packages][ean13][:CORRECT_EAN13] = correct_ean13
|
187
|
+
end
|
190
188
|
end
|
191
189
|
data[ean13] = item
|
192
190
|
end
|
@@ -197,65 +195,60 @@ module Oddb2xml
|
|
197
195
|
|
198
196
|
class RefdataExtractor < Extractor
|
199
197
|
def initialize(xml, type)
|
200
|
-
@type = (type == :pharma ?
|
198
|
+
@type = (type == :pharma ? "PHARMA" : "NONPHARMA")
|
201
199
|
super(xml)
|
202
200
|
end
|
201
|
+
|
203
202
|
def to_hash
|
204
203
|
data = {}
|
205
|
-
result = SwissRegArticleEntry.parse(@xml.sub(
|
204
|
+
result = SwissRegArticleEntry.parse(@xml.sub(STRIP_FOR_SAX_MACHINE, ""), lazy: true)
|
206
205
|
items = result.ARTICLE.ITEM
|
207
206
|
items.each do |pac|
|
208
|
-
ean13 = (gtin = pac.GTIN.to_s) ? gtin:
|
207
|
+
ean13 = (gtin = pac.GTIN.to_s) ? gtin : "0"
|
209
208
|
if ean13.size < 13
|
210
209
|
puts "Refdata #{@type} use 13 chars not #{ean13.size} for #{ean13}" if $VERBOSE
|
211
|
-
ean13 = ean13.rjust(13,
|
210
|
+
ean13 = ean13.rjust(13, "0")
|
212
211
|
end
|
213
|
-
if ean13.size == 14 && ean13[0] ==
|
212
|
+
if ean13.size == 14 && ean13[0] == "0"
|
214
213
|
puts "Refdata #{@type} remove leading '0' for #{ean13}" if $VERBOSE
|
215
214
|
ean13 = ean13[1..-1]
|
216
215
|
end
|
217
216
|
# but in refdata_nonPharma we have a about 700 GTINs which are 14 characters and longer
|
218
217
|
item = {}
|
219
|
-
item[:ean13]
|
220
|
-
item[:no8]
|
221
|
-
item[:
|
222
|
-
item[:
|
223
|
-
item[:
|
224
|
-
item[:
|
225
|
-
item[:
|
226
|
-
item[:
|
227
|
-
item[:
|
228
|
-
item[:
|
229
|
-
item[:
|
230
|
-
item[:company_ean] = (gln = pac.AUTH_HOLDER_GLN) ? gln: ''
|
231
|
-
unless item[:pharmacode]
|
232
|
-
item[:pharmacode] = phar
|
233
|
-
unless data[item[:pharmacode]] # pharmacode => GTINs
|
234
|
-
data[item[:ean13]] = []
|
235
|
-
end
|
236
|
-
end
|
218
|
+
item[:ean13] = ean13
|
219
|
+
item[:no8] = pac.SWMC_AUTHNR
|
220
|
+
item[:data_origin] = "refdata"
|
221
|
+
item[:refdata] = true
|
222
|
+
item[:_type] = (typ = pac.ATYPE.downcase.to_sym) ? typ : ""
|
223
|
+
item[:last_change] = (date = Time.parse(pac.DT).to_s) ? date : "" # Date and time of last data change
|
224
|
+
item[:desc_de] = (dscr = pac.NAME_DE) ? dscr : ""
|
225
|
+
item[:desc_fr] = (dscr = pac.NAME_FR) ? dscr : ""
|
226
|
+
item[:atc_code] = (code = pac.ATC) ? code.to_s : ""
|
227
|
+
item[:company_name] = (nam = pac.AUTH_HOLDER_NAME) ? nam : ""
|
228
|
+
item[:company_ean] = (gln = pac.AUTH_HOLDER_GLN) ? gln : ""
|
237
229
|
data[item[:ean13]] = item
|
238
230
|
end
|
239
231
|
data
|
240
232
|
end
|
241
233
|
end
|
234
|
+
|
242
235
|
class SwissmedicExtractor < Extractor
|
243
236
|
def initialize(filename, type)
|
244
|
-
@filename = File.join(
|
245
|
-
@filename = File.join(SpecData, File.basename(filename)) if defined?(RSpec)
|
246
|
-
@type
|
237
|
+
@filename = File.join(DOWNLOADS, File.basename(filename))
|
238
|
+
@filename = File.join(SpecData, File.basename(filename)) if defined?(RSpec) && !File.exist?(@filename)
|
239
|
+
@type = type
|
247
240
|
Oddb2xml.log("SwissmedicExtractor #{@filename} #{File.size(@filename)} bytes")
|
248
|
-
return unless File.
|
241
|
+
return unless File.exist?(@filename)
|
249
242
|
@sheet = RubyXL::Parser.parse(File.expand_path(@filename)).worksheets[0]
|
250
243
|
end
|
244
|
+
|
251
245
|
def to_arry
|
252
246
|
data = []
|
253
247
|
return data unless @sheet
|
254
248
|
case @type
|
255
249
|
when :orphan
|
256
|
-
i = 1
|
257
250
|
col_zulassung = 6
|
258
|
-
raise "Could not find Zulassungsnummer in column #{col_zulassung} of #{@filename}" unless /Zulassungs.*nummer/.match(@sheet[3][col_zulassung].value)
|
251
|
+
raise "Could not find Zulassungsnummer in column #{col_zulassung} of #{@filename}" unless /Zulassungs.*nummer/.match?(@sheet[3][col_zulassung].value)
|
259
252
|
@sheet.each do |row|
|
260
253
|
next unless row[col_zulassung]
|
261
254
|
number = row[col_zulassung].value.to_i
|
@@ -264,29 +257,29 @@ module Oddb2xml
|
|
264
257
|
end
|
265
258
|
end
|
266
259
|
end
|
267
|
-
cleanup_file
|
268
260
|
# puts "found #{data.uniq.size} entities for type #{@type}"
|
269
261
|
data.uniq
|
270
262
|
end
|
271
263
|
|
272
|
-
|
264
|
+
# Packungen.xlsx COLUMNS_FEBRUARY_2019
|
265
|
+
def to_hash
|
273
266
|
data = {}
|
274
267
|
return data unless @sheet
|
275
268
|
case @type
|
276
269
|
when :package
|
277
270
|
Oddb2xml.check_column_indices(@sheet)
|
278
|
-
ith
|
279
|
-
iksnr
|
280
|
-
seq_name
|
281
|
-
i_3
|
282
|
-
seqnr
|
283
|
-
cat
|
284
|
-
siz
|
285
|
-
atc
|
271
|
+
ith = COLUMNS_FEBRUARY_2019.keys.index(:index_therapeuticus)
|
272
|
+
iksnr = COLUMNS_FEBRUARY_2019.keys.index(:iksnr)
|
273
|
+
seq_name = COLUMNS_FEBRUARY_2019.keys.index(:name_base)
|
274
|
+
i_3 = COLUMNS_FEBRUARY_2019.keys.index(:ikscd)
|
275
|
+
seqnr = COLUMNS_FEBRUARY_2019.keys.index(:seqnr)
|
276
|
+
cat = COLUMNS_FEBRUARY_2019.keys.index(:ikscat)
|
277
|
+
siz = COLUMNS_FEBRUARY_2019.keys.index(:size)
|
278
|
+
atc = COLUMNS_FEBRUARY_2019.keys.index(:atc_class)
|
286
279
|
list_code = COLUMNS_FEBRUARY_2019.keys.index(:production_science)
|
287
|
-
eht
|
288
|
-
sub
|
289
|
-
comp
|
280
|
+
eht = COLUMNS_FEBRUARY_2019.keys.index(:unit)
|
281
|
+
sub = COLUMNS_FEBRUARY_2019.keys.index(:substances)
|
282
|
+
comp = COLUMNS_FEBRUARY_2019.keys.index(:composition)
|
290
283
|
|
291
284
|
# production_science Heilmittelcode, possible values are
|
292
285
|
# Allergene
|
@@ -306,64 +299,56 @@ module Oddb2xml
|
|
306
299
|
# Tierarzneimittel
|
307
300
|
# Transplantat: Gewebeprodukt
|
308
301
|
@sheet.each_with_index do |row, i|
|
309
|
-
|
310
|
-
next
|
311
|
-
next unless row
|
312
|
-
|
313
|
-
no8 = sprintf('%05d',row[iksnr].value.to_i) + sprintf('%03d',row[i_3].value.to_i)
|
302
|
+
next if i <= 1
|
303
|
+
next unless row && row[iksnr] && row[i_3]
|
304
|
+
next unless (row[iksnr].value.to_i > 0) && (row[i_3].value.to_i > 0)
|
305
|
+
no8 = sprintf("%05d", row[iksnr].value.to_i) + sprintf("%03d", row[i_3].value.to_i)
|
314
306
|
unless no8.empty?
|
315
307
|
next if no8.to_i == 0
|
316
308
|
ean_base12 = "7680#{no8}"
|
317
|
-
prodno =
|
318
|
-
ean13 = (ean_base12.ljust(12,
|
309
|
+
prodno = Oddb2xml.gen_prodno(row[iksnr].value.to_i, row[seqnr].value.to_i)
|
310
|
+
ean13 = (ean_base12.ljust(12, "0") + Oddb2xml.calc_checksum(ean_base12))
|
319
311
|
Oddb2xml.setEan13forProdno(prodno, ean13)
|
320
312
|
Oddb2xml.setEan13forNo8(no8, ean13)
|
321
313
|
data[no8] = {
|
322
|
-
:
|
323
|
-
:
|
324
|
-
:
|
325
|
-
:
|
326
|
-
:
|
327
|
-
:
|
328
|
-
:
|
329
|
-
:
|
330
|
-
:
|
331
|
-
:
|
332
|
-
:
|
333
|
-
:
|
334
|
-
:
|
335
|
-
:
|
336
|
-
:
|
337
|
-
:
|
338
|
-
:
|
339
|
-
:
|
340
|
-
:
|
341
|
-
:
|
342
|
-
:
|
343
|
-
:
|
344
|
-
:
|
314
|
+
iksnr: row[iksnr].value.to_i,
|
315
|
+
no8: no8,
|
316
|
+
ean13: ean13,
|
317
|
+
prodno: prodno,
|
318
|
+
seqnr: row[seqnr].value,
|
319
|
+
ith_swissmedic: row[ith] ? row[ith].value.to_s : "",
|
320
|
+
swissmedic_category: row[cat].value.to_s,
|
321
|
+
atc_code: row[atc] ? Oddb2xml.add_epha_changes_for_ATC(row[iksnr].value.to_s, row[atc].value.to_s) : "",
|
322
|
+
list_code: row[list_code] ? row[list_code].value.to_s : "",
|
323
|
+
package_size: row[siz] ? row[siz].value.to_s : "",
|
324
|
+
einheit_swissmedic: row[eht] ? row[eht].value.to_s : "",
|
325
|
+
substance_swissmedic: row[sub] ? row[sub].value.to_s : "",
|
326
|
+
composition_swissmedic: row[comp] ? row[comp].value.to_s : "",
|
327
|
+
sequence_name: row[seq_name] ? row[seq_name].value.to_s : "",
|
328
|
+
is_tier: (row[list_code] == "Tierarzneimittel"),
|
329
|
+
gen_production: row[COLUMNS_FEBRUARY_2019.keys.index(:gen_production)].value.to_s,
|
330
|
+
insulin_category: row[COLUMNS_FEBRUARY_2019.keys.index(:insulin_category)].value.to_s,
|
331
|
+
drug_index: row[COLUMNS_FEBRUARY_2019.keys.index(:drug_index)].value.to_s,
|
332
|
+
data_origin: "swissmedic_package",
|
333
|
+
expiry_date: row[COLUMNS_FEBRUARY_2019.keys.index(:expiry_date)].value.to_s,
|
334
|
+
company_name: row[COLUMNS_FEBRUARY_2019.keys.index(:company)].value.to_s,
|
335
|
+
size: row[COLUMNS_FEBRUARY_2019.keys.index(:size)].value.to_s,
|
336
|
+
unit: row[COLUMNS_FEBRUARY_2019.keys.index(:unit)].value.to_s
|
345
337
|
}
|
346
338
|
end
|
347
339
|
end
|
348
340
|
end
|
349
|
-
cleanup_file
|
350
341
|
data
|
351
342
|
end
|
352
|
-
private
|
353
|
-
def cleanup_file
|
354
|
-
begin
|
355
|
-
File.unlink(@filename) if File.exists?(@filename)
|
356
|
-
rescue Errno::EACCES # Permission Denied on Windows
|
357
|
-
end unless defined?(RSpec)
|
358
|
-
end
|
359
|
-
|
360
343
|
end
|
344
|
+
|
361
345
|
class MigelExtractor < Extractor
|
362
346
|
def initialize(bin)
|
363
347
|
Oddb2xml.log("MigelExtractor #{io} #{File.size(io)} bytes")
|
364
|
-
book = Spreadsheet.open(io,
|
348
|
+
book = Spreadsheet.open(io, "rb")
|
365
349
|
@sheet = book.worksheet(0)
|
366
350
|
end
|
351
|
+
|
367
352
|
def to_hash
|
368
353
|
data = {}
|
369
354
|
@sheet.each_with_index do |row, i|
|
@@ -373,15 +358,15 @@ module Oddb2xml
|
|
373
358
|
ean13 = row[0]
|
374
359
|
ean13 = phar unless ean13.to_s.length == 13
|
375
360
|
data[ean] = {
|
376
|
-
:
|
377
|
-
:
|
378
|
-
:
|
379
|
-
:
|
380
|
-
:
|
381
|
-
:
|
382
|
-
:
|
383
|
-
:
|
384
|
-
:
|
361
|
+
refdata: true,
|
362
|
+
ean13: ean13,
|
363
|
+
pharmacode: phar,
|
364
|
+
desc_de: row[3],
|
365
|
+
desc_fr: row[4],
|
366
|
+
quantity: row[5], # quantity
|
367
|
+
company_name: row[6],
|
368
|
+
company_ean: row[7],
|
369
|
+
data_origin: "migel"
|
385
370
|
}
|
386
371
|
end
|
387
372
|
data
|
@@ -390,26 +375,26 @@ module Oddb2xml
|
|
390
375
|
|
391
376
|
class SwissmedicInfoExtractor < Extractor
|
392
377
|
def to_hash
|
393
|
-
data = Hash.new{|h,k| h[k] = [] }
|
378
|
+
data = Hash.new { |h, k| h[k] = [] }
|
394
379
|
return data unless @xml.size > 0
|
395
|
-
result = MedicalInformationsContent.parse(@xml.sub(
|
380
|
+
result = MedicalInformationsContent.parse(@xml.sub(STRIP_FOR_SAX_MACHINE, ""), lazy: true)
|
396
381
|
result.medicalInformation.each do |pac|
|
397
382
|
lang = pac.lang.to_s
|
398
|
-
next unless
|
383
|
+
next unless /de|fr/.match?(lang)
|
399
384
|
item = {}
|
400
385
|
item[:refdata] = true
|
401
|
-
item[:data_origin] =
|
402
|
-
item[:name]
|
403
|
-
item[:owner] = (ownr = pac.authHolder) ? ownr :
|
404
|
-
item[:style] =
|
405
|
-
html = Nokogiri::HTML.fragment(pac.content.force_encoding(
|
386
|
+
item[:data_origin] = "swissmedic_info"
|
387
|
+
item[:name] = (name = pac.title) ? name : ""
|
388
|
+
item[:owner] = (ownr = pac.authHolder) ? ownr : ""
|
389
|
+
item[:style] = Nokogiri::HTML.fragment(pac.style).to_html(encoding: "UTF-8")
|
390
|
+
html = Nokogiri::HTML.fragment(pac.content.force_encoding("UTF-8"))
|
406
391
|
item[:paragraph] = html
|
407
|
-
numbers =
|
392
|
+
numbers = /(\d{5})[,\s]*(\d{5})?|(\d{5})[,\s]*(\d{5})?[,\s]*(\d{5})?/.match(html)
|
408
393
|
if numbers
|
409
|
-
|
410
|
-
|
411
|
-
|
412
|
-
|
394
|
+
[$1, $2, $3].compact.each do |n| # plural
|
395
|
+
item[:monid] = n
|
396
|
+
data[lang] << item
|
397
|
+
end
|
413
398
|
end
|
414
399
|
end
|
415
400
|
data
|
@@ -421,27 +406,28 @@ module Oddb2xml
|
|
421
406
|
Oddb2xml.log("EphaExtractor #{str.size} bytes")
|
422
407
|
@io = StringIO.new(str)
|
423
408
|
end
|
409
|
+
|
424
410
|
def to_arry
|
425
411
|
data = []
|
426
412
|
ixno = 0
|
427
413
|
inhalt = @io.read
|
428
414
|
inhalt.split("\n").each do |line|
|
429
415
|
ixno += 1
|
430
|
-
next if /ATC1.*Name1.*ATC2.*Name2/.match(line)
|
431
|
-
#line = '"'+line unless /^"/.match(line)
|
416
|
+
next if /ATC1.*Name1.*ATC2.*Name2/.match?(line)
|
417
|
+
# line = '"'+line unless /^"/.match(line)
|
432
418
|
begin
|
433
|
-
row = CSV.parse_line(line.gsub('""','"'))
|
419
|
+
row = CSV.parse_line(line.gsub('""', '"'))
|
434
420
|
action = {}
|
435
421
|
next unless row.size > 8
|
436
|
-
action[:data_origin] =
|
437
|
-
action[:ixno]
|
438
|
-
action[:title]
|
439
|
-
action[:atc1]
|
440
|
-
action[:atc2]
|
422
|
+
action[:data_origin] = "epha"
|
423
|
+
action[:ixno] = ixno
|
424
|
+
action[:title] = row[4]
|
425
|
+
action[:atc1] = row[0]
|
426
|
+
action[:atc2] = row[2]
|
441
427
|
action[:mechanism] = row[5]
|
442
|
-
action[:effect]
|
443
|
-
action[:measures]
|
444
|
-
action[:grad]
|
428
|
+
action[:effect] = row[6]
|
429
|
+
action[:measures] = row[7]
|
430
|
+
action[:grad] = row[8]
|
445
431
|
data << action
|
446
432
|
rescue CSV::MalformedCSVError
|
447
433
|
puts "CSV::MalformedCSVError in line #{ixno}: #{line}"
|
@@ -450,139 +436,146 @@ module Oddb2xml
|
|
450
436
|
data
|
451
437
|
end
|
452
438
|
end
|
439
|
+
|
453
440
|
class MedregbmExtractor < Extractor
|
454
441
|
def initialize(str, type)
|
455
|
-
@io
|
442
|
+
@io = StringIO.new(str)
|
456
443
|
@type = type
|
457
444
|
end
|
445
|
+
|
458
446
|
def to_arry
|
459
447
|
data = []
|
460
448
|
case @type
|
461
449
|
when :company
|
462
|
-
while line = @io.gets
|
450
|
+
while (line = @io.gets)
|
463
451
|
row = line.chomp.split("\t")
|
464
|
-
next if row[0]
|
452
|
+
next if /^GLN/.match?(row[0])
|
465
453
|
data << {
|
466
|
-
:
|
467
|
-
:
|
468
|
-
:
|
469
|
-
:
|
470
|
-
:
|
471
|
-
:
|
472
|
-
:
|
473
|
-
:
|
474
|
-
:
|
475
|
-
:
|
476
|
-
:
|
477
|
-
:
|
454
|
+
data_origin: "medreg",
|
455
|
+
gln: row[0].to_s.gsub(/[^0-9]/, ""), #=> GLN Betrieb
|
456
|
+
name_1: row[1].to_s, #=> Betriebsname 1
|
457
|
+
name_2: row[2].to_s, #=> Betriebsname 2
|
458
|
+
address: row[3].to_s, #=> Strasse
|
459
|
+
number: row[4].to_s, #=> Nummer
|
460
|
+
post: row[5].to_s, #=> PLZ
|
461
|
+
place: row[6].to_s, #=> Ort
|
462
|
+
region: row[7].to_s, #=> Bewilligungskanton
|
463
|
+
country: row[8].to_s, #=> Land
|
464
|
+
type: row[9].to_s, #=> Betriebstyp
|
465
|
+
authorization: row[10].to_s #=> BTM Berechtigung
|
478
466
|
}
|
479
467
|
end
|
480
468
|
when :person
|
481
|
-
while line = @io.gets
|
469
|
+
while (line = @io.gets)
|
482
470
|
row = line.chomp.split("\t")
|
483
|
-
next if row[0]
|
471
|
+
next if /^GLN/.match?(row[0])
|
484
472
|
data << {
|
485
|
-
:
|
486
|
-
:
|
487
|
-
:
|
488
|
-
:
|
489
|
-
:
|
490
|
-
:
|
491
|
-
:
|
492
|
-
:
|
493
|
-
:
|
494
|
-
:
|
495
|
-
:
|
473
|
+
data_origin: "medreg",
|
474
|
+
gln: row[0].to_s.gsub(/[^0-9]/, ""), #=> GLN Person
|
475
|
+
last_name: row[1].to_s, #=> Name
|
476
|
+
first_name: row[2].to_s, #=> Vorname
|
477
|
+
post: row[3].to_s, #=> PLZ
|
478
|
+
place: row[4].to_s, #=> Ort
|
479
|
+
region: row[5].to_s, #=> Bewilligungskanton
|
480
|
+
country: row[6].to_s, #=> Land
|
481
|
+
license: row[7].to_s, #=> Bewilligung Selbstdispensation
|
482
|
+
certificate: row[8].to_s, #=> Diplom
|
483
|
+
authorization: row[9].to_s #=> BTM Berechtigung
|
496
484
|
}
|
497
485
|
end
|
498
486
|
end
|
499
487
|
data
|
500
488
|
end
|
501
489
|
end
|
490
|
+
|
502
491
|
class ZurroseExtractor < Extractor
|
503
492
|
# see http://dev.ywesee.com/Bbmb/TransferDat
|
504
493
|
def initialize(dat, extended = false, artikelstamm = false)
|
505
494
|
@@extended = extended
|
506
495
|
@artikelstamm = artikelstamm
|
507
|
-
FileUtils.makedirs(
|
508
|
-
@@error_file ||= File.open(File.join(
|
496
|
+
FileUtils.makedirs(WORK_DIR)
|
497
|
+
@@error_file ||= File.open(File.join(WORK_DIR, "duplicate_ean13_from_zur_rose.txt"), "wb+:ISO-8859-14")
|
509
498
|
@@items_without_ean13s ||= 0
|
510
499
|
@@duplicated_ean13s ||= 0
|
511
500
|
@@zur_rose_items ||= 0
|
512
501
|
if dat
|
513
|
-
if File.
|
514
|
-
|
502
|
+
@io = if File.exist?(dat)
|
503
|
+
File.open(dat, "rb:ISO-8859-14")
|
515
504
|
else
|
516
|
-
|
505
|
+
StringIO.new(dat)
|
517
506
|
end
|
518
507
|
@io
|
519
|
-
else
|
520
|
-
nil
|
521
508
|
end
|
522
509
|
end
|
510
|
+
|
523
511
|
def to_hash
|
524
512
|
data = {}
|
525
|
-
|
526
|
-
|
527
|
-
|
528
|
-
|
529
|
-
|
530
|
-
|
531
|
-
|
532
|
-
|
533
|
-
|
534
|
-
|
535
|
-
|
536
|
-
|
537
|
-
|
538
|
-
|
539
|
-
|
540
|
-
|
541
|
-
|
542
|
-
|
543
|
-
|
544
|
-
|
545
|
-
|
546
|
-
|
547
|
-
|
548
|
-
|
513
|
+
if @io
|
514
|
+
while (line = @io.gets)
|
515
|
+
ean13 = "-1"
|
516
|
+
line = Oddb2xml.patch_some_utf8(line).chomp
|
517
|
+
# next unless /(7680\d{9})(\d{1})$/.match(line) # Skip non pharma
|
518
|
+
next if /(ad us\.* vet)|(\(vet\))/i.match?(line)
|
519
|
+
if @@extended
|
520
|
+
next unless (match_data = line.match(/(\d{13})(\d{1})$/))
|
521
|
+
else
|
522
|
+
next unless (match_data = line.match(/(7680\d{9})(\d{1})$/))
|
523
|
+
end
|
524
|
+
pharma_code = line[3..9]
|
525
|
+
if match_data[1].to_s == "0000000000000"
|
526
|
+
@@items_without_ean13s += 1
|
527
|
+
next if @artikelstamm && pharma_code.to_i == 0
|
528
|
+
ean13 = Oddb2xml::FAKE_GTIN_START + pharma_code.to_s unless @artikelstamm
|
529
|
+
else
|
530
|
+
ean13 = match_data[1]
|
531
|
+
end
|
532
|
+
if data[ean13]
|
533
|
+
@@error_file.puts "Duplicate ean13 #{ean13} in line \nact: #{line.chomp}\norg: #{data[ean13][:line]}"
|
534
|
+
@@items_without_ean13s -= 1
|
535
|
+
@@duplicated_ean13s += 1
|
536
|
+
next
|
537
|
+
end
|
549
538
|
|
550
|
-
|
551
|
-
|
552
|
-
|
553
|
-
|
554
|
-
|
555
|
-
|
556
|
-
|
557
|
-
|
558
|
-
|
559
|
-
|
560
|
-
|
561
|
-
|
562
|
-
|
563
|
-
|
564
|
-
|
565
|
-
|
566
|
-
|
567
|
-
|
568
|
-
|
569
|
-
|
570
|
-
|
571
|
-
|
572
|
-
end
|
573
|
-
if defined?(@@extended)
|
539
|
+
pexf = sprintf("%.2f", line[60, 6].gsub(/(\d{2})$/, '.\1').to_f)
|
540
|
+
ppub = sprintf("%.2f", line[66, 6].gsub(/(\d{2})$/, '.\1').to_f)
|
541
|
+
next if @artikelstamm && /^113/.match(line) && ppub.eql?("0.0") && pexf.eql?("0.0")
|
542
|
+
next unless ean13
|
543
|
+
key = ean13
|
544
|
+
key = (Oddb2xml::FAKE_GTIN_START + pharma_code.to_s) if ean13.to_i <= 0 # dummy ean13
|
545
|
+
data[key] = {
|
546
|
+
data_origin: "zur_rose",
|
547
|
+
line: line.chomp,
|
548
|
+
ean13: ean13,
|
549
|
+
clag: line[73],
|
550
|
+
vat: line[96],
|
551
|
+
description: line[10..59].sub(/\s+$/, ""),
|
552
|
+
quantity: "",
|
553
|
+
pharmacode: pharma_code,
|
554
|
+
price: pexf,
|
555
|
+
pub_price: ppub,
|
556
|
+
type: :nonpharma,
|
557
|
+
cmut: line[2]
|
558
|
+
}
|
559
|
+
@@zur_rose_items += 1
|
560
|
+
end
|
561
|
+
end
|
562
|
+
if defined?(@@extended) && @@extended
|
574
563
|
@@error_file.puts get_error_msg
|
575
564
|
end
|
576
565
|
@@error_file.close
|
577
566
|
@@error_file = nil
|
578
567
|
data
|
579
568
|
end
|
580
|
-
|
581
|
-
|
582
|
-
|
583
|
-
|
569
|
+
if defined?(@@extended) && @@extended
|
570
|
+
at_exit do
|
571
|
+
puts get_error_msg
|
572
|
+
end
|
573
|
+
end
|
574
|
+
|
575
|
+
private
|
576
|
+
|
584
577
|
def get_error_msg
|
585
|
-
if defined?(@@extended)
|
578
|
+
if defined?(@@extended) && @@extended
|
586
579
|
msg = "Added #{@@items_without_ean13s} via pharmacodes of #{@@zur_rose_items} items when extracting the transfer.dat from \"Zur Rose\""
|
587
580
|
msg += "\n found #{@@duplicated_ean13s} lines with duplicated ean13" if @@duplicated_ean13s > 0
|
588
581
|
return msg
|