oddb2xml 2.6.7 → 2.7.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +40 -0
- data/.standard.yml +2 -0
- data/Elexis_Artikelstamm_v5.xsd +0 -3
- data/Gemfile +3 -3
- data/History.txt +30 -0
- data/README.md +1 -1
- data/Rakefile +24 -23
- data/bin/check_artikelstamm +11 -11
- data/bin/compare_v5 +23 -23
- data/bin/oddb2xml +14 -13
- data/lib/oddb2xml.rb +1 -1
- data/lib/oddb2xml/builder.rb +1077 -1039
- data/lib/oddb2xml/calc.rb +232 -233
- data/lib/oddb2xml/chapter_70_hack.rb +38 -32
- data/lib/oddb2xml/cli.rb +252 -233
- data/lib/oddb2xml/compare.rb +70 -59
- data/lib/oddb2xml/compositions_syntax.rb +448 -430
- data/lib/oddb2xml/compressor.rb +20 -20
- data/lib/oddb2xml/downloader.rb +155 -129
- data/lib/oddb2xml/extractor.rb +302 -296
- data/lib/oddb2xml/options.rb +34 -35
- data/lib/oddb2xml/parslet_compositions.rb +263 -265
- data/lib/oddb2xml/semantic_check.rb +39 -33
- data/lib/oddb2xml/util.rb +169 -159
- data/lib/oddb2xml/version.rb +1 -1
- data/lib/oddb2xml/xml_definitions.rb +32 -33
- data/oddb2xml.gemspec +32 -30
- data/spec/artikelstamm_spec.rb +139 -132
- data/spec/builder_spec.rb +495 -524
- data/spec/calc_spec.rb +552 -593
- data/spec/check_artikelstamm_spec.rb +26 -26
- data/spec/cli_spec.rb +182 -157
- data/spec/compare_spec.rb +9 -11
- data/spec/composition_syntax_spec.rb +390 -409
- data/spec/compressor_spec.rb +48 -48
- data/spec/data/Preparations.xml +139 -3
- data/spec/data/refdata_NonPharma.xml +0 -3
- data/spec/data/refdata_Pharma.xml +10 -25
- data/spec/data/swissmedic_package.xlsx +0 -0
- data/spec/data/transfer.dat +3 -1
- data/spec/data/varia_De.htm +2 -2
- data/spec/data_helper.rb +47 -49
- data/spec/downloader_spec.rb +247 -260
- data/spec/extractor_spec.rb +173 -165
- data/spec/galenic_spec.rb +233 -256
- data/spec/options_spec.rb +116 -119
- data/spec/parslet_spec.rb +833 -861
- data/spec/spec_helper.rb +154 -153
- data/test_options.rb +39 -42
- data/tools/win_fetch_cacerts.rb +2 -3
- metadata +49 -5
- data/.travis.yml +0 -30
data/lib/oddb2xml/extractor.rb
CHANGED
@@ -1,11 +1,10 @@
|
|
1
|
-
|
2
|
-
require
|
3
|
-
require
|
4
|
-
require
|
5
|
-
require
|
6
|
-
require
|
7
|
-
require
|
8
|
-
require 'oddb2xml/xml_definitions'
|
1
|
+
require "nokogiri"
|
2
|
+
require "spreadsheet"
|
3
|
+
require "stringio"
|
4
|
+
require "rubyXL"
|
5
|
+
require "rubyXL/convenience_methods/workbook"
|
6
|
+
require "csv"
|
7
|
+
require "oddb2xml/xml_definitions"
|
9
8
|
|
10
9
|
module Oddb2xml
|
11
10
|
module TxtExtractorMethods
|
@@ -13,23 +12,26 @@ module Oddb2xml
|
|
13
12
|
Oddb2xml.log("TxtExtractorMethods #{str} #{str.to_s.size} bytes")
|
14
13
|
@io = StringIO.new(str)
|
15
14
|
end
|
15
|
+
|
16
16
|
def to_hash
|
17
17
|
data = {}
|
18
|
-
while line = @io.gets
|
19
|
-
next unless
|
20
|
-
ean13 = line.chomp.
|
18
|
+
while (line = @io.gets)
|
19
|
+
next unless /\d{13}/.match?(line)
|
20
|
+
ean13 = line.chomp.delete("\"")
|
21
21
|
data[ean13] = true
|
22
22
|
end
|
23
23
|
data
|
24
24
|
end
|
25
25
|
end
|
26
|
+
|
26
27
|
class Extractor
|
27
28
|
attr_accessor :xml
|
28
29
|
def initialize(xml)
|
29
|
-
Oddb2xml.log("Extractor #{xml
|
30
|
+
Oddb2xml.log("Extractor #{xml} xml #{xml.size} bytes")
|
30
31
|
@xml = xml
|
31
32
|
end
|
32
33
|
end
|
34
|
+
|
33
35
|
class LppvExtractor < Extractor
|
34
36
|
include TxtExtractorMethods
|
35
37
|
end
|
@@ -37,156 +39,152 @@ module Oddb2xml
|
|
37
39
|
class BagXmlExtractor < Extractor
|
38
40
|
def to_hash
|
39
41
|
data = {}
|
40
|
-
result = PreparationsEntry.parse(@xml.sub(
|
42
|
+
result = PreparationsEntry.parse(@xml.sub(STRIP_FOR_SAX_MACHINE, ""), lazy: true)
|
41
43
|
result.Preparations.Preparation.each do |seq|
|
42
|
-
if seq.SwissmedicNo5.eql?(
|
44
|
+
if seq.SwissmedicNo5.eql?("0")
|
43
45
|
puts "BagXmlExtractor Skipping SwissmedicNo5 0 for #{seq.NameDe} #{seq.DescriptionDe} #{seq.CommentDe}"
|
44
46
|
next
|
45
47
|
end
|
46
48
|
item = {}
|
47
|
-
item[:data_origin]
|
48
|
-
item[:refdata]
|
49
|
-
item[:product_key]
|
50
|
-
item[:desc_de]
|
51
|
-
item[:desc_fr]
|
52
|
-
item[:name_de]
|
53
|
-
item[:name_fr]
|
54
|
-
item[:swissmedic_number5] = (num5 = seq.SwissmedicNo5) ?
|
55
|
-
item[:org_gen_code] = (orgc = seq.OrgGenCode)
|
56
|
-
item[:deductible]
|
57
|
-
item[:atc_code]
|
58
|
-
item[:comment_de]
|
59
|
-
item[:comment_fr]
|
60
|
-
item[:it_code]
|
49
|
+
item[:data_origin] = "bag_xml"
|
50
|
+
item[:refdata] = true
|
51
|
+
item[:product_key] = seq.ProductCommercial
|
52
|
+
item[:desc_de] = (desc = seq.DescriptionDe) ? desc : ""
|
53
|
+
item[:desc_fr] = (desc = seq.DescriptionFr) ? desc : ""
|
54
|
+
item[:name_de] = (name = seq.NameDe) ? name : ""
|
55
|
+
item[:name_fr] = (name = seq.NameFr) ? name : ""
|
56
|
+
item[:swissmedic_number5] = (num5 = seq.SwissmedicNo5) ? num5.rjust(5, "0") : ""
|
57
|
+
item[:org_gen_code] = (orgc = seq.OrgGenCode) ? orgc : ""
|
58
|
+
item[:deductible] = (ddbl = seq.FlagSB20) ? ddbl : ""
|
59
|
+
item[:atc_code] = (atcc = seq.AtcCode) ? atcc : ""
|
60
|
+
item[:comment_de] = (info = seq.CommentDe) ? info : ""
|
61
|
+
item[:comment_fr] = (info = seq.CommentFr) ? info : ""
|
62
|
+
item[:it_code] = ""
|
61
63
|
seq.ItCodes.ItCode.each do |itc|
|
62
64
|
if item[:it_code].to_s.empty?
|
63
65
|
it_code = itc.Code.to_s
|
64
|
-
item[:it_code] =
|
66
|
+
item[:it_code] = /(\d+)\.(\d+)\.(\d+)./.match?(it_code) ? it_code : ""
|
65
67
|
end
|
66
68
|
end
|
67
69
|
item[:substances] = []
|
68
70
|
seq.Substances.Substance.each_with_index do |sub, i|
|
69
71
|
item[:substances] << {
|
70
|
-
:
|
71
|
-
:
|
72
|
-
:
|
73
|
-
:
|
72
|
+
index: i.to_s,
|
73
|
+
name: (name = sub.DescriptionLa) ? name : "",
|
74
|
+
quantity: (qtty = sub.Quantity) ? qtty : "",
|
75
|
+
unit: (unit = sub.QuantityUnit) ? unit : ""
|
74
76
|
}
|
75
77
|
end
|
76
78
|
item[:pharmacodes] = []
|
77
|
-
item[:packages]
|
79
|
+
item[:packages] = {} # pharmacode => package
|
78
80
|
seq.Packs.Pack.each do |pac|
|
79
81
|
if pac.SwissmedicNo8 && pac.SwissmedicNo8.length < 8
|
80
82
|
puts "BagXmlExtractor: Adding leading zeros for SwissmedicNo8 #{pac.SwissmedicNo8} BagDossierNo #{pac.BagDossierNo} PackId #{pac.PackId} #{item[:name_de]}" if $VERBOSE
|
81
|
-
pac.SwissmedicNo8
|
83
|
+
pac.SwissmedicNo8 = pac.SwissmedicNo8.rjust(8, "0")
|
82
84
|
end
|
83
85
|
unless pac.GTIN
|
84
|
-
|
86
|
+
if pac.SwissmedicNo8
|
87
|
+
ean12 = "7680" + pac.SwissmedicNo8
|
88
|
+
pac.GTIN = (ean12 + Oddb2xml.calc_checksum(ean12)) unless @artikelstamm
|
89
|
+
# puts "BagXmlExtractor: Missing GTIN in SwissmedicNo8 #{pac.SwissmedicNo8} BagDossierNo #{pac.BagDossierNo} PackId #{pac.PackId} #{item[:name_de]}."
|
90
|
+
else
|
85
91
|
puts "BagXmlExtractor: Missing GTIN and SwissmedicNo8 in SwissmedicNo8 #{pac.SwissmedicNo8} BagDossierNo #{pac.BagDossierNo} PackId #{pac.PackId} #{item[:name_de]}"
|
86
92
|
next
|
87
|
-
else
|
88
|
-
ean12 = '7680' + pac.SwissmedicNo8
|
89
|
-
pac.GTIN = (ean12 + Oddb2xml.calc_checksum(ean12)) unless @artikelstamm
|
90
|
-
# puts "BagXmlExtractor: Missing GTIN in SwissmedicNo8 #{pac.SwissmedicNo8} BagDossierNo #{pac.BagDossierNo} PackId #{pac.PackId} #{item[:name_de]}."
|
91
93
|
end
|
92
94
|
end
|
93
95
|
ean13 = pac.GTIN.to_s
|
94
96
|
Oddb2xml.setEan13forNo8(pac.SwissmedicNo8, ean13) if pac.SwissmedicNo8
|
95
97
|
# packages
|
96
|
-
exf = {:
|
97
|
-
if pac
|
98
|
-
exf[:price]
|
99
|
-
exf[:valid_date] =
|
100
|
-
exf[:price_code] =
|
98
|
+
exf = {price: "", valid_date: "", price_code: ""}
|
99
|
+
if pac&.Prices&.ExFactoryPrice
|
100
|
+
exf[:price] = pac.Prices.ExFactoryPrice.Price if pac.Prices.ExFactoryPrice.Price
|
101
|
+
exf[:valid_date] = pac.Prices.ExFactoryPrice.ValidFromDate if pac.Prices.ExFactoryPrice.ValidFromDate
|
102
|
+
exf[:price_code] = pac.Prices.ExFactoryPrice.PriceTypeCode if pac.Prices.ExFactoryPrice.PriceTypeCode
|
101
103
|
end
|
102
|
-
pub = {:
|
103
|
-
if pac
|
104
|
-
pub[:price]
|
105
|
-
pub[:valid_date] =
|
106
|
-
pub[:price_code] =
|
104
|
+
pub = {price: "", valid_date: "", price_code: ""}
|
105
|
+
if pac&.Prices&.PublicPrice
|
106
|
+
pub[:price] = pac.Prices.PublicPrice.Price if pac.Prices.PublicPrice.Price
|
107
|
+
pub[:valid_date] = pac.Prices.PublicPrice.ValidFromDate if pac.Prices.PublicPrice.ValidFromDate
|
108
|
+
pub[:price_code] = pac.Prices.PublicPrice.PriceTypeCode if pac.Prices.PublicPrice.PriceTypeCode
|
107
109
|
end
|
108
110
|
item[:packages][ean13] = {
|
109
|
-
:
|
110
|
-
:
|
111
|
-
:
|
112
|
-
:
|
113
|
-
:
|
114
|
-
:
|
115
|
-
:
|
116
|
-
:
|
117
|
-
:
|
111
|
+
ean13: ean13,
|
112
|
+
name_de: (desc = seq.NameDe) ? desc : "",
|
113
|
+
name_fr: (desc = seq.NameFr) ? desc : "",
|
114
|
+
desc_de: (desc = pac.DescriptionDe) ? desc : "",
|
115
|
+
desc_fr: (desc = pac.DescriptionFr) ? desc : "",
|
116
|
+
sl_entry: true,
|
117
|
+
swissmedic_category: (cat = pac.SwissmedicCategory) ? cat : "",
|
118
|
+
swissmedic_number8: (num = pac.SwissmedicNo8) ? num : "",
|
119
|
+
prices: {exf_price: exf, pub_price: pub}
|
118
120
|
}
|
119
121
|
# related all limitations
|
120
122
|
item[:packages][ean13][:limitations] = []
|
121
|
-
limitations = Hash.new{|h,k| h[k] = [] }
|
122
|
-
if seq.Limitations
|
123
|
-
|
124
|
-
else
|
125
|
-
limitations[:seq] = nil
|
123
|
+
limitations = Hash.new { |h, k| h[k] = [] }
|
124
|
+
limitations[:seq] = if seq.Limitations
|
125
|
+
seq.Limitations.Limitation.collect { |x| x }
|
126
126
|
end
|
127
127
|
# in it-codes
|
128
|
-
if seq
|
128
|
+
if seq&.ItCodes && seq&.ItCodes&.ItCode
|
129
129
|
limitations[:itc] = []
|
130
|
-
seq.ItCodes.ItCode.each { |x|
|
130
|
+
seq.ItCodes.ItCode.each { |x| limitations[:itc] += x.Limitations.Limitation if x.Limitations.Limitation }
|
131
131
|
else
|
132
|
-
limitations[:itc] =nil
|
132
|
+
limitations[:itc] = nil
|
133
133
|
end
|
134
134
|
# in pac
|
135
|
-
if pac
|
136
|
-
|
137
|
-
else
|
138
|
-
limitations[:pac] = nil
|
135
|
+
limitations[:pac] = if pac && pac.Limitations
|
136
|
+
(lims = pac.Limitations.Limitation) ? lims.to_a : nil
|
139
137
|
end
|
140
138
|
limitations.each_pair do |lim_key, lims|
|
141
|
-
key =
|
142
|
-
id
|
139
|
+
key = ""
|
140
|
+
id = ""
|
143
141
|
case lim_key
|
144
142
|
when :seq, :itc
|
145
143
|
key = :swissmedic_number5
|
146
|
-
id
|
144
|
+
id = item[key].to_s
|
147
145
|
when :pac
|
148
146
|
key = :swissmedic_number8
|
149
|
-
id
|
147
|
+
id = item[:packages][ean13][key].to_s
|
150
148
|
end
|
151
|
-
if id.empty? && item[:packages][ean13][
|
149
|
+
if id.empty? && item[:packages][ean13][:swissmedic_number8]
|
152
150
|
key = :swissmedic_number8
|
153
|
-
id
|
151
|
+
id = item[:packages][ean13][key].to_s
|
154
152
|
end
|
155
|
-
lims
|
153
|
+
lims&.each do |lim|
|
156
154
|
limitation = {
|
157
|
-
:
|
158
|
-
:
|
159
|
-
:
|
160
|
-
:
|
161
|
-
:
|
162
|
-
:
|
163
|
-
:
|
164
|
-
:
|
165
|
-
:
|
166
|
-
:
|
155
|
+
it: item[:it_code],
|
156
|
+
key: key,
|
157
|
+
id: id,
|
158
|
+
code: (lic = lim.LimitationCode) ? lic : "",
|
159
|
+
type: (lit = lim.LimitationType) ? lit : "",
|
160
|
+
value: (liv = lim.LimitationValue) ? liv : "",
|
161
|
+
niv: (niv = lim.LimitationNiveau) ? niv : "",
|
162
|
+
desc_de: (dsc = lim.DescriptionDe) ? dsc : "",
|
163
|
+
desc_fr: (dsc = lim.DescriptionFr) ? dsc : "",
|
164
|
+
vdate: (dat = lim.ValidFromDate) ? dat : ""
|
167
165
|
}
|
168
166
|
deleted = false
|
169
|
-
if upto = ((thr = lim.ValidThruDate) ? thr : nil)
|
170
|
-
upto =~ /\d{2}\.\d{2}\.\d{2}/
|
167
|
+
if (upto = ((thr = lim.ValidThruDate) ? thr : nil)) &&
|
168
|
+
upto =~ (/\d{2}\.\d{2}\.\d{2}/)
|
171
169
|
begin
|
172
|
-
deleted = true if Date.strptime(upto,
|
170
|
+
deleted = true if Date.strptime(upto, "%d.%m.%y") >= Date.today
|
173
171
|
rescue ArgumentError
|
174
172
|
end
|
175
173
|
end
|
176
174
|
limitation[:del] = deleted
|
177
175
|
item[:packages][ean13][:limitations] << limitation
|
178
|
-
end
|
176
|
+
end
|
179
177
|
end
|
180
178
|
# limitation points
|
181
179
|
pts = pac.PointLimitations.PointLimitation.first # only first points
|
182
|
-
item[:packages][ean13][:limitation_points] = pts ? pts.Points :
|
180
|
+
item[:packages][ean13][:limitation_points] = pts ? pts.Points : ""
|
183
181
|
if pac.SwissmedicNo8
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
182
|
+
ean12 = "7680" + pac.SwissmedicNo8
|
183
|
+
correct_ean13 = ean12 + Oddb2xml.calc_checksum(ean12)
|
184
|
+
unless pac.GTIN.eql?(correct_ean13)
|
185
|
+
puts "pac.GTIN #{pac.GTIN} should be #{correct_ean13}"
|
186
|
+
item[:packages][ean13][:CORRECT_EAN13] = correct_ean13
|
187
|
+
end
|
190
188
|
end
|
191
189
|
data[ean13] = item
|
192
190
|
end
|
@@ -197,65 +195,60 @@ module Oddb2xml
|
|
197
195
|
|
198
196
|
class RefdataExtractor < Extractor
|
199
197
|
def initialize(xml, type)
|
200
|
-
@type = (type == :pharma ?
|
198
|
+
@type = (type == :pharma ? "PHARMA" : "NONPHARMA")
|
201
199
|
super(xml)
|
202
200
|
end
|
201
|
+
|
203
202
|
def to_hash
|
204
203
|
data = {}
|
205
|
-
result = SwissRegArticleEntry.parse(@xml.sub(
|
204
|
+
result = SwissRegArticleEntry.parse(@xml.sub(STRIP_FOR_SAX_MACHINE, ""), lazy: true)
|
206
205
|
items = result.ARTICLE.ITEM
|
207
206
|
items.each do |pac|
|
208
|
-
ean13 = (gtin = pac.GTIN.to_s) ? gtin:
|
207
|
+
ean13 = (gtin = pac.GTIN.to_s) ? gtin : "0"
|
209
208
|
if ean13.size < 13
|
210
209
|
puts "Refdata #{@type} use 13 chars not #{ean13.size} for #{ean13}" if $VERBOSE
|
211
|
-
ean13 = ean13.rjust(13,
|
210
|
+
ean13 = ean13.rjust(13, "0")
|
212
211
|
end
|
213
|
-
if ean13.size == 14 && ean13[0] ==
|
212
|
+
if ean13.size == 14 && ean13[0] == "0"
|
214
213
|
puts "Refdata #{@type} remove leading '0' for #{ean13}" if $VERBOSE
|
215
214
|
ean13 = ean13[1..-1]
|
216
215
|
end
|
217
216
|
# but in refdata_nonPharma we have a about 700 GTINs which are 14 characters and longer
|
218
217
|
item = {}
|
219
|
-
item[:ean13]
|
220
|
-
item[:no8]
|
221
|
-
item[:
|
222
|
-
item[:
|
223
|
-
item[:
|
224
|
-
item[:
|
225
|
-
item[:
|
226
|
-
item[:
|
227
|
-
item[:
|
228
|
-
item[:
|
229
|
-
item[:
|
230
|
-
item[:company_ean] = (gln = pac.AUTH_HOLDER_GLN) ? gln: ''
|
231
|
-
unless item[:pharmacode]
|
232
|
-
item[:pharmacode] = phar
|
233
|
-
unless data[item[:pharmacode]] # pharmacode => GTINs
|
234
|
-
data[item[:ean13]] = []
|
235
|
-
end
|
236
|
-
end
|
218
|
+
item[:ean13] = ean13
|
219
|
+
item[:no8] = pac.SWMC_AUTHNR
|
220
|
+
item[:data_origin] = "refdata"
|
221
|
+
item[:refdata] = true
|
222
|
+
item[:_type] = (typ = pac.ATYPE.downcase.to_sym) ? typ : ""
|
223
|
+
item[:last_change] = (date = Time.parse(pac.DT).to_s) ? date : "" # Date and time of last data change
|
224
|
+
item[:desc_de] = (dscr = pac.NAME_DE) ? dscr : ""
|
225
|
+
item[:desc_fr] = (dscr = pac.NAME_FR) ? dscr : ""
|
226
|
+
item[:atc_code] = (code = pac.ATC) ? code.to_s : ""
|
227
|
+
item[:company_name] = (nam = pac.AUTH_HOLDER_NAME) ? nam : ""
|
228
|
+
item[:company_ean] = (gln = pac.AUTH_HOLDER_GLN) ? gln : ""
|
237
229
|
data[item[:ean13]] = item
|
238
230
|
end
|
239
231
|
data
|
240
232
|
end
|
241
233
|
end
|
234
|
+
|
242
235
|
class SwissmedicExtractor < Extractor
|
243
236
|
def initialize(filename, type)
|
244
|
-
@filename = File.join(
|
245
|
-
@filename = File.join(SpecData, File.basename(filename)) if defined?(RSpec)
|
246
|
-
@type
|
237
|
+
@filename = File.join(DOWNLOADS, File.basename(filename))
|
238
|
+
@filename = File.join(SpecData, File.basename(filename)) if defined?(RSpec) && !File.exist?(@filename)
|
239
|
+
@type = type
|
247
240
|
Oddb2xml.log("SwissmedicExtractor #{@filename} #{File.size(@filename)} bytes")
|
248
|
-
return unless File.
|
241
|
+
return unless File.exist?(@filename)
|
249
242
|
@sheet = RubyXL::Parser.parse(File.expand_path(@filename)).worksheets[0]
|
250
243
|
end
|
244
|
+
|
251
245
|
def to_arry
|
252
246
|
data = []
|
253
247
|
return data unless @sheet
|
254
248
|
case @type
|
255
249
|
when :orphan
|
256
|
-
i = 1
|
257
250
|
col_zulassung = 6
|
258
|
-
raise "Could not find Zulassungsnummer in column #{col_zulassung} of #{@filename}" unless /Zulassungs.*nummer/.match(@sheet[3][col_zulassung].value)
|
251
|
+
raise "Could not find Zulassungsnummer in column #{col_zulassung} of #{@filename}" unless /Zulassungs.*nummer/.match?(@sheet[3][col_zulassung].value)
|
259
252
|
@sheet.each do |row|
|
260
253
|
next unless row[col_zulassung]
|
261
254
|
number = row[col_zulassung].value.to_i
|
@@ -269,24 +262,25 @@ module Oddb2xml
|
|
269
262
|
data.uniq
|
270
263
|
end
|
271
264
|
|
272
|
-
|
265
|
+
# Packungen.xlsx COLUMNS_FEBRUARY_2019
|
266
|
+
def to_hash
|
273
267
|
data = {}
|
274
268
|
return data unless @sheet
|
275
269
|
case @type
|
276
270
|
when :package
|
277
271
|
Oddb2xml.check_column_indices(@sheet)
|
278
|
-
ith
|
279
|
-
iksnr
|
280
|
-
seq_name
|
281
|
-
i_3
|
282
|
-
seqnr
|
283
|
-
cat
|
284
|
-
siz
|
285
|
-
atc
|
272
|
+
ith = COLUMNS_FEBRUARY_2019.keys.index(:index_therapeuticus)
|
273
|
+
iksnr = COLUMNS_FEBRUARY_2019.keys.index(:iksnr)
|
274
|
+
seq_name = COLUMNS_FEBRUARY_2019.keys.index(:name_base)
|
275
|
+
i_3 = COLUMNS_FEBRUARY_2019.keys.index(:ikscd)
|
276
|
+
seqnr = COLUMNS_FEBRUARY_2019.keys.index(:seqnr)
|
277
|
+
cat = COLUMNS_FEBRUARY_2019.keys.index(:ikscat)
|
278
|
+
siz = COLUMNS_FEBRUARY_2019.keys.index(:size)
|
279
|
+
atc = COLUMNS_FEBRUARY_2019.keys.index(:atc_class)
|
286
280
|
list_code = COLUMNS_FEBRUARY_2019.keys.index(:production_science)
|
287
|
-
eht
|
288
|
-
sub
|
289
|
-
comp
|
281
|
+
eht = COLUMNS_FEBRUARY_2019.keys.index(:unit)
|
282
|
+
sub = COLUMNS_FEBRUARY_2019.keys.index(:substances)
|
283
|
+
comp = COLUMNS_FEBRUARY_2019.keys.index(:composition)
|
290
284
|
|
291
285
|
# production_science Heilmittelcode, possible values are
|
292
286
|
# Allergene
|
@@ -306,42 +300,41 @@ module Oddb2xml
|
|
306
300
|
# Tierarzneimittel
|
307
301
|
# Transplantat: Gewebeprodukt
|
308
302
|
@sheet.each_with_index do |row, i|
|
309
|
-
|
310
|
-
next
|
311
|
-
next unless row
|
312
|
-
|
313
|
-
no8 = sprintf('%05d',row[iksnr].value.to_i) + sprintf('%03d',row[i_3].value.to_i)
|
303
|
+
next if i <= 1
|
304
|
+
next unless row && row[iksnr] && row[i_3]
|
305
|
+
next unless (row[iksnr].value.to_i > 0) && (row[i_3].value.to_i > 0)
|
306
|
+
no8 = sprintf("%05d", row[iksnr].value.to_i) + sprintf("%03d", row[i_3].value.to_i)
|
314
307
|
unless no8.empty?
|
315
308
|
next if no8.to_i == 0
|
316
309
|
ean_base12 = "7680#{no8}"
|
317
|
-
prodno =
|
318
|
-
ean13 = (ean_base12.ljust(12,
|
310
|
+
prodno = Oddb2xml.gen_prodno(row[iksnr].value.to_i, row[seqnr].value.to_i)
|
311
|
+
ean13 = (ean_base12.ljust(12, "0") + Oddb2xml.calc_checksum(ean_base12))
|
319
312
|
Oddb2xml.setEan13forProdno(prodno, ean13)
|
320
313
|
Oddb2xml.setEan13forNo8(no8, ean13)
|
321
314
|
data[no8] = {
|
322
|
-
:
|
323
|
-
:
|
324
|
-
:
|
325
|
-
:
|
326
|
-
:
|
327
|
-
:
|
328
|
-
:
|
329
|
-
:
|
330
|
-
:
|
331
|
-
:
|
332
|
-
:
|
333
|
-
:
|
334
|
-
:
|
335
|
-
:
|
336
|
-
:
|
337
|
-
:
|
338
|
-
:
|
339
|
-
:
|
340
|
-
:
|
341
|
-
:
|
342
|
-
:
|
343
|
-
:
|
344
|
-
:
|
315
|
+
iksnr: row[iksnr].value.to_i,
|
316
|
+
no8: no8,
|
317
|
+
ean13: ean13,
|
318
|
+
prodno: prodno,
|
319
|
+
seqnr: row[seqnr].value,
|
320
|
+
ith_swissmedic: row[ith] ? row[ith].value.to_s : "",
|
321
|
+
swissmedic_category: row[cat].value.to_s,
|
322
|
+
atc_code: row[atc] ? Oddb2xml.add_epha_changes_for_ATC(row[iksnr].value.to_s, row[atc].value.to_s) : "",
|
323
|
+
list_code: row[list_code] ? row[list_code].value.to_s : "",
|
324
|
+
package_size: row[siz] ? row[siz].value.to_s : "",
|
325
|
+
einheit_swissmedic: row[eht] ? row[eht].value.to_s : "",
|
326
|
+
substance_swissmedic: row[sub] ? row[sub].value.to_s : "",
|
327
|
+
composition_swissmedic: row[comp] ? row[comp].value.to_s : "",
|
328
|
+
sequence_name: row[seq_name] ? row[seq_name].value.to_s : "",
|
329
|
+
is_tier: (row[list_code] == "Tierarzneimittel"),
|
330
|
+
gen_production: row[COLUMNS_FEBRUARY_2019.keys.index(:gen_production)].value.to_s,
|
331
|
+
insulin_category: row[COLUMNS_FEBRUARY_2019.keys.index(:insulin_category)].value.to_s,
|
332
|
+
drug_index: row[COLUMNS_FEBRUARY_2019.keys.index(:drug_index)].value.to_s,
|
333
|
+
data_origin: "swissmedic_package",
|
334
|
+
expiry_date: row[COLUMNS_FEBRUARY_2019.keys.index(:expiry_date)].value.to_s,
|
335
|
+
company_name: row[COLUMNS_FEBRUARY_2019.keys.index(:company)].value.to_s,
|
336
|
+
size: row[COLUMNS_FEBRUARY_2019.keys.index(:size)].value.to_s,
|
337
|
+
unit: row[COLUMNS_FEBRUARY_2019.keys.index(:unit)].value.to_s
|
345
338
|
}
|
346
339
|
end
|
347
340
|
end
|
@@ -349,21 +342,26 @@ module Oddb2xml
|
|
349
342
|
cleanup_file
|
350
343
|
data
|
351
344
|
end
|
345
|
+
|
352
346
|
private
|
347
|
+
|
353
348
|
def cleanup_file
|
354
|
-
|
355
|
-
|
349
|
+
unless defined?(RSpec)
|
350
|
+
begin
|
351
|
+
File.unlink(@filename) if File.exist?(@filename)
|
356
352
|
rescue Errno::EACCES # Permission Denied on Windows
|
357
|
-
|
353
|
+
end
|
354
|
+
end
|
358
355
|
end
|
359
|
-
|
360
356
|
end
|
357
|
+
|
361
358
|
class MigelExtractor < Extractor
|
362
359
|
def initialize(bin)
|
363
360
|
Oddb2xml.log("MigelExtractor #{io} #{File.size(io)} bytes")
|
364
|
-
book = Spreadsheet.open(io,
|
361
|
+
book = Spreadsheet.open(io, "rb")
|
365
362
|
@sheet = book.worksheet(0)
|
366
363
|
end
|
364
|
+
|
367
365
|
def to_hash
|
368
366
|
data = {}
|
369
367
|
@sheet.each_with_index do |row, i|
|
@@ -373,15 +371,15 @@ module Oddb2xml
|
|
373
371
|
ean13 = row[0]
|
374
372
|
ean13 = phar unless ean13.to_s.length == 13
|
375
373
|
data[ean] = {
|
376
|
-
:
|
377
|
-
:
|
378
|
-
:
|
379
|
-
:
|
380
|
-
:
|
381
|
-
:
|
382
|
-
:
|
383
|
-
:
|
384
|
-
:
|
374
|
+
refdata: true,
|
375
|
+
ean13: ean13,
|
376
|
+
pharmacode: phar,
|
377
|
+
desc_de: row[3],
|
378
|
+
desc_fr: row[4],
|
379
|
+
quantity: row[5], # quantity
|
380
|
+
company_name: row[6],
|
381
|
+
company_ean: row[7],
|
382
|
+
data_origin: "migel"
|
385
383
|
}
|
386
384
|
end
|
387
385
|
data
|
@@ -390,26 +388,26 @@ module Oddb2xml
|
|
390
388
|
|
391
389
|
class SwissmedicInfoExtractor < Extractor
|
392
390
|
def to_hash
|
393
|
-
data = Hash.new{|h,k| h[k] = [] }
|
391
|
+
data = Hash.new { |h, k| h[k] = [] }
|
394
392
|
return data unless @xml.size > 0
|
395
|
-
result = MedicalInformationsContent.parse(@xml.sub(
|
393
|
+
result = MedicalInformationsContent.parse(@xml.sub(STRIP_FOR_SAX_MACHINE, ""), lazy: true)
|
396
394
|
result.medicalInformation.each do |pac|
|
397
395
|
lang = pac.lang.to_s
|
398
|
-
next unless
|
396
|
+
next unless /de|fr/.match?(lang)
|
399
397
|
item = {}
|
400
398
|
item[:refdata] = true
|
401
|
-
item[:data_origin] =
|
402
|
-
item[:name]
|
403
|
-
item[:owner] = (ownr = pac.authHolder) ? ownr :
|
404
|
-
item[:style] =
|
405
|
-
html = Nokogiri::HTML.fragment(pac.content.force_encoding(
|
399
|
+
item[:data_origin] = "swissmedic_info"
|
400
|
+
item[:name] = (name = pac.title) ? name : ""
|
401
|
+
item[:owner] = (ownr = pac.authHolder) ? ownr : ""
|
402
|
+
item[:style] = Nokogiri::HTML.fragment(pac.style).to_html(encoding: "UTF-8")
|
403
|
+
html = Nokogiri::HTML.fragment(pac.content.force_encoding("UTF-8"))
|
406
404
|
item[:paragraph] = html
|
407
|
-
numbers =
|
405
|
+
numbers = /(\d{5})[,\s]*(\d{5})?|(\d{5})[,\s]*(\d{5})?[,\s]*(\d{5})?/.match(html)
|
408
406
|
if numbers
|
409
|
-
|
410
|
-
|
411
|
-
|
412
|
-
|
407
|
+
[$1, $2, $3].compact.each do |n| # plural
|
408
|
+
item[:monid] = n
|
409
|
+
data[lang] << item
|
410
|
+
end
|
413
411
|
end
|
414
412
|
end
|
415
413
|
data
|
@@ -421,27 +419,28 @@ module Oddb2xml
|
|
421
419
|
Oddb2xml.log("EphaExtractor #{str.size} bytes")
|
422
420
|
@io = StringIO.new(str)
|
423
421
|
end
|
422
|
+
|
424
423
|
def to_arry
|
425
424
|
data = []
|
426
425
|
ixno = 0
|
427
426
|
inhalt = @io.read
|
428
427
|
inhalt.split("\n").each do |line|
|
429
428
|
ixno += 1
|
430
|
-
next if /ATC1.*Name1.*ATC2.*Name2/.match(line)
|
431
|
-
#line = '"'+line unless /^"/.match(line)
|
429
|
+
next if /ATC1.*Name1.*ATC2.*Name2/.match?(line)
|
430
|
+
# line = '"'+line unless /^"/.match(line)
|
432
431
|
begin
|
433
|
-
row = CSV.parse_line(line.gsub('""','"'))
|
432
|
+
row = CSV.parse_line(line.gsub('""', '"'))
|
434
433
|
action = {}
|
435
434
|
next unless row.size > 8
|
436
|
-
action[:data_origin] =
|
437
|
-
action[:ixno]
|
438
|
-
action[:title]
|
439
|
-
action[:atc1]
|
440
|
-
action[:atc2]
|
435
|
+
action[:data_origin] = "epha"
|
436
|
+
action[:ixno] = ixno
|
437
|
+
action[:title] = row[4]
|
438
|
+
action[:atc1] = row[0]
|
439
|
+
action[:atc2] = row[2]
|
441
440
|
action[:mechanism] = row[5]
|
442
|
-
action[:effect]
|
443
|
-
action[:measures]
|
444
|
-
action[:grad]
|
441
|
+
action[:effect] = row[6]
|
442
|
+
action[:measures] = row[7]
|
443
|
+
action[:grad] = row[8]
|
445
444
|
data << action
|
446
445
|
rescue CSV::MalformedCSVError
|
447
446
|
puts "CSV::MalformedCSVError in line #{ixno}: #{line}"
|
@@ -450,139 +449,146 @@ module Oddb2xml
|
|
450
449
|
data
|
451
450
|
end
|
452
451
|
end
|
452
|
+
|
453
453
|
class MedregbmExtractor < Extractor
|
454
454
|
def initialize(str, type)
|
455
|
-
@io
|
455
|
+
@io = StringIO.new(str)
|
456
456
|
@type = type
|
457
457
|
end
|
458
|
+
|
458
459
|
def to_arry
|
459
460
|
data = []
|
460
461
|
case @type
|
461
462
|
when :company
|
462
|
-
while line = @io.gets
|
463
|
+
while (line = @io.gets)
|
463
464
|
row = line.chomp.split("\t")
|
464
|
-
next if row[0]
|
465
|
+
next if /^GLN/.match?(row[0])
|
465
466
|
data << {
|
466
|
-
:
|
467
|
-
:
|
468
|
-
:
|
469
|
-
:
|
470
|
-
:
|
471
|
-
:
|
472
|
-
:
|
473
|
-
:
|
474
|
-
:
|
475
|
-
:
|
476
|
-
:
|
477
|
-
:
|
467
|
+
data_origin: "medreg",
|
468
|
+
gln: row[0].to_s.gsub(/[^0-9]/, ""), #=> GLN Betrieb
|
469
|
+
name_1: row[1].to_s, #=> Betriebsname 1
|
470
|
+
name_2: row[2].to_s, #=> Betriebsname 2
|
471
|
+
address: row[3].to_s, #=> Strasse
|
472
|
+
number: row[4].to_s, #=> Nummer
|
473
|
+
post: row[5].to_s, #=> PLZ
|
474
|
+
place: row[6].to_s, #=> Ort
|
475
|
+
region: row[7].to_s, #=> Bewilligungskanton
|
476
|
+
country: row[8].to_s, #=> Land
|
477
|
+
type: row[9].to_s, #=> Betriebstyp
|
478
|
+
authorization: row[10].to_s #=> BTM Berechtigung
|
478
479
|
}
|
479
480
|
end
|
480
481
|
when :person
|
481
|
-
while line = @io.gets
|
482
|
+
while (line = @io.gets)
|
482
483
|
row = line.chomp.split("\t")
|
483
|
-
next if row[0]
|
484
|
+
next if /^GLN/.match?(row[0])
|
484
485
|
data << {
|
485
|
-
:
|
486
|
-
:
|
487
|
-
:
|
488
|
-
:
|
489
|
-
:
|
490
|
-
:
|
491
|
-
:
|
492
|
-
:
|
493
|
-
:
|
494
|
-
:
|
495
|
-
:
|
486
|
+
data_origin: "medreg",
|
487
|
+
gln: row[0].to_s.gsub(/[^0-9]/, ""), #=> GLN Person
|
488
|
+
last_name: row[1].to_s, #=> Name
|
489
|
+
first_name: row[2].to_s, #=> Vorname
|
490
|
+
post: row[3].to_s, #=> PLZ
|
491
|
+
place: row[4].to_s, #=> Ort
|
492
|
+
region: row[5].to_s, #=> Bewilligungskanton
|
493
|
+
country: row[6].to_s, #=> Land
|
494
|
+
license: row[7].to_s, #=> Bewilligung Selbstdispensation
|
495
|
+
certificate: row[8].to_s, #=> Diplom
|
496
|
+
authorization: row[9].to_s #=> BTM Berechtigung
|
496
497
|
}
|
497
498
|
end
|
498
499
|
end
|
499
500
|
data
|
500
501
|
end
|
501
502
|
end
|
503
|
+
|
502
504
|
class ZurroseExtractor < Extractor
|
503
505
|
# see http://dev.ywesee.com/Bbmb/TransferDat
|
504
506
|
def initialize(dat, extended = false, artikelstamm = false)
|
505
507
|
@@extended = extended
|
506
508
|
@artikelstamm = artikelstamm
|
507
|
-
FileUtils.makedirs(
|
508
|
-
@@error_file ||= File.open(File.join(
|
509
|
+
FileUtils.makedirs(WORK_DIR)
|
510
|
+
@@error_file ||= File.open(File.join(WORK_DIR, "duplicate_ean13_from_zur_rose.txt"), "wb+:ISO-8859-14")
|
509
511
|
@@items_without_ean13s ||= 0
|
510
512
|
@@duplicated_ean13s ||= 0
|
511
513
|
@@zur_rose_items ||= 0
|
512
514
|
if dat
|
513
|
-
if File.
|
514
|
-
|
515
|
+
@io = if File.exist?(dat)
|
516
|
+
File.open(dat, "rb:ISO-8859-14")
|
515
517
|
else
|
516
|
-
|
518
|
+
StringIO.new(dat)
|
517
519
|
end
|
518
520
|
@io
|
519
|
-
else
|
520
|
-
nil
|
521
521
|
end
|
522
522
|
end
|
523
|
+
|
523
524
|
def to_hash
|
524
525
|
data = {}
|
525
|
-
|
526
|
-
|
527
|
-
|
528
|
-
|
529
|
-
|
530
|
-
|
531
|
-
|
532
|
-
|
533
|
-
|
534
|
-
|
535
|
-
|
536
|
-
|
537
|
-
|
538
|
-
|
539
|
-
|
540
|
-
|
541
|
-
|
542
|
-
|
543
|
-
|
544
|
-
|
545
|
-
|
546
|
-
|
547
|
-
|
548
|
-
|
526
|
+
if @io
|
527
|
+
while (line = @io.gets)
|
528
|
+
ean13 = "-1"
|
529
|
+
line = Oddb2xml.patch_some_utf8(line).chomp
|
530
|
+
# next unless /(7680\d{9})(\d{1})$/.match(line) # Skip non pharma
|
531
|
+
next if /(ad us\.* vet)|(\(vet\))/i.match?(line)
|
532
|
+
if @@extended
|
533
|
+
next unless (match_data = line.match(/(\d{13})(\d{1})$/))
|
534
|
+
else
|
535
|
+
next unless (match_data = line.match(/(7680\d{9})(\d{1})$/))
|
536
|
+
end
|
537
|
+
pharma_code = line[3..9]
|
538
|
+
if match_data[1].to_s == "0000000000000"
|
539
|
+
@@items_without_ean13s += 1
|
540
|
+
next if @artikelstamm && pharma_code.to_i == 0
|
541
|
+
ean13 = Oddb2xml::FAKE_GTIN_START + pharma_code.to_s unless @artikelstamm
|
542
|
+
else
|
543
|
+
ean13 = match_data[1]
|
544
|
+
end
|
545
|
+
if data[ean13]
|
546
|
+
@@error_file.puts "Duplicate ean13 #{ean13} in line \nact: #{line.chomp}\norg: #{data[ean13][:line]}"
|
547
|
+
@@items_without_ean13s -= 1
|
548
|
+
@@duplicated_ean13s += 1
|
549
|
+
next
|
550
|
+
end
|
549
551
|
|
550
|
-
|
551
|
-
|
552
|
-
|
553
|
-
|
554
|
-
|
555
|
-
|
556
|
-
|
557
|
-
|
558
|
-
|
559
|
-
|
560
|
-
|
561
|
-
|
562
|
-
|
563
|
-
|
564
|
-
|
565
|
-
|
566
|
-
|
567
|
-
|
568
|
-
|
569
|
-
|
570
|
-
|
571
|
-
|
572
|
-
end
|
573
|
-
if defined?(@@extended)
|
552
|
+
pexf = sprintf("%.2f", line[60, 6].gsub(/(\d{2})$/, '.\1').to_f)
|
553
|
+
ppub = sprintf("%.2f", line[66, 6].gsub(/(\d{2})$/, '.\1').to_f)
|
554
|
+
next if @artikelstamm && /^113/.match(line) && ppub.eql?("0.0") && pexf.eql?("0.0")
|
555
|
+
next unless ean13
|
556
|
+
key = ean13
|
557
|
+
key = (Oddb2xml::FAKE_GTIN_START + pharma_code.to_s) if ean13.to_i <= 0 # dummy ean13
|
558
|
+
data[key] = {
|
559
|
+
data_origin: "zur_rose",
|
560
|
+
line: line.chomp,
|
561
|
+
ean13: ean13,
|
562
|
+
clag: line[73],
|
563
|
+
vat: line[96],
|
564
|
+
description: line[10..59].sub(/\s+$/, ""),
|
565
|
+
quantity: "",
|
566
|
+
pharmacode: pharma_code,
|
567
|
+
price: pexf,
|
568
|
+
pub_price: ppub,
|
569
|
+
type: :nonpharma,
|
570
|
+
cmut: line[2]
|
571
|
+
}
|
572
|
+
@@zur_rose_items += 1
|
573
|
+
end
|
574
|
+
end
|
575
|
+
if defined?(@@extended) && @@extended
|
574
576
|
@@error_file.puts get_error_msg
|
575
577
|
end
|
576
578
|
@@error_file.close
|
577
579
|
@@error_file = nil
|
578
580
|
data
|
579
581
|
end
|
580
|
-
|
581
|
-
|
582
|
-
|
583
|
-
|
582
|
+
if defined?(@@extended) && @@extended
|
583
|
+
at_exit do
|
584
|
+
puts get_error_msg
|
585
|
+
end
|
586
|
+
end
|
587
|
+
|
588
|
+
private
|
589
|
+
|
584
590
|
def get_error_msg
|
585
|
-
if defined?(@@extended)
|
591
|
+
if defined?(@@extended) && @@extended
|
586
592
|
msg = "Added #{@@items_without_ean13s} via pharmacodes of #{@@zur_rose_items} items when extracting the transfer.dat from \"Zur Rose\""
|
587
593
|
msg += "\n found #{@@duplicated_ean13s} lines with duplicated ean13" if @@duplicated_ean13s > 0
|
588
594
|
return msg
|