swissmedic-diff 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gemtest +0 -0
- data/.gitignore +2 -0
- data/.travis.yml +18 -0
- data/Gemfile +8 -0
- data/Gemfile.lock +37 -0
- data/History.txt +18 -0
- data/LICENSE +339 -0
- data/Manifest.txt +20 -0
- data/README.txt +68 -0
- data/Rakefile +24 -0
- data/bin/swissmedic-diff +45 -0
- data/lib/swissmedic-diff.rb +317 -0
- data/setup.rb +1345 -0
- data/swissmedic-diff.gemspec +22 -0
- data/test/data/Packungen-2013.10.14.xls +0 -0
- data/test/data/Packungen.older.xls +0 -0
- data/test/data/Packungen.xls +0 -0
- data/test/data/Packungen_error_column.xls +0 -0
- data/test/data/Packungen_error_missing1.xls +0 -0
- data/test/data/Packungen_error_missing2.xls +0 -0
- data/test/test_swissmedic-diff.rb +195 -0
- metadata +117 -0
data/README.txt
ADDED
@@ -0,0 +1,68 @@
|
|
1
|
+
= swissmedic-diff
|
2
|
+
|
3
|
+
* https://github.com/zdavatz/swissmedic-diff
|
4
|
+
|
5
|
+
== DESCRIPTION:
|
6
|
+
|
7
|
+
* Compares two Excel Documents provided by Swissmedic and displays the
|
8
|
+
salient differences. Also: Find out what Products have changed on the
|
9
|
+
swiss healthcare market.
|
10
|
+
|
11
|
+
Up-To-Date file:
|
12
|
+
|
13
|
+
* http://www.swissmedic.ch//daten/00080/00251/index.html
|
14
|
+
|
15
|
+
|
16
|
+
== FEATURES/PROBLEMS:
|
17
|
+
|
18
|
+
Swissmedic does not store old files. You must do this on your own.
|
19
|
+
|
20
|
+
Version 0.1.3 is capable of the Packungen.xls without column 'Gruppe',
|
21
|
+
column E in the previous format. If you want to use Packunge.xls
|
22
|
+
including the column 'Gruppe', you should use version 0.1.2. After
|
23
|
+
you get the source code via Git command, type in the swissmedic-diff
|
24
|
+
directory as follows:
|
25
|
+
|
26
|
+
* git checkout 4c8c9323297453c3cb3380a9d41457d534ed8861
|
27
|
+
|
28
|
+
Then you can get the version 0.1.2.
|
29
|
+
|
30
|
+
== REQUIREMENTS:
|
31
|
+
|
32
|
+
* ruby 1.8 (with oniguruma patch) or ruby 1.9
|
33
|
+
* spreadsheet
|
34
|
+
|
35
|
+
== INSTALL:
|
36
|
+
|
37
|
+
The easiest way to install is via RubyGems. On the command line enter:
|
38
|
+
|
39
|
+
* gem build swissmedic-diff.gemspec
|
40
|
+
* sudo gem install swissmedic-diff-0.1.3.gem
|
41
|
+
|
42
|
+
To manually install, use the included setup.rb script:
|
43
|
+
|
44
|
+
* sudo ruby setup.rb
|
45
|
+
|
46
|
+
See test directory for tests. Run
|
47
|
+
|
48
|
+
* ruby test/test_swissmedic-diff.rb
|
49
|
+
|
50
|
+
for testing.
|
51
|
+
|
52
|
+
== USAGE:
|
53
|
+
|
54
|
+
Usage: /usr/bin/swissmedic-diff [-gnr] <file1> <file2> [<output>]
|
55
|
+
|
56
|
+
-g --group sort by news, deletions and updates
|
57
|
+
-n --name sort by name
|
58
|
+
-r --registration sort by registration
|
59
|
+
|
60
|
+
== DEVELOPERS:
|
61
|
+
|
62
|
+
* Hannes Wyss <hwyss@ywesee.com>
|
63
|
+
* Masaomi Hatakeyama <mhatakeyama@ywesee.com>
|
64
|
+
* Zeno R.R. Davatz <zdavatz@ywesee.com>
|
65
|
+
|
66
|
+
== LICENSE:
|
67
|
+
|
68
|
+
* GPLv2
|
data/Rakefile
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
# -*- ruby -*-
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'hoe'
|
5
|
+
|
6
|
+
# Hoe.plugin :compiler
|
7
|
+
# Hoe.plugin :gem_prelude_sucks
|
8
|
+
# Hoe.plugin :inline
|
9
|
+
# Hoe.plugin :inline
|
10
|
+
# Hoe.plugin :racc
|
11
|
+
# Hoe.plugin :rubyforge
|
12
|
+
# Hoe.plugin :rubyforge
|
13
|
+
|
14
|
+
Hoe.spec 'swissmedic-diff' do
|
15
|
+
# HEY! If you fill these out in ~/.hoe_template/Rakefile.erb then
|
16
|
+
# you'll never have to touch them again!
|
17
|
+
# (delete this comment too, of course)
|
18
|
+
|
19
|
+
developer('Masaomi Hatakeyama, Zeno R.R. Davatz', 'mhatakeyama@ywesee.com, zdavatz@ywesee.com')
|
20
|
+
|
21
|
+
# self.rubyforge_name = 'swissmswissmedic-diffx' # if different than 'swissmedic-diff'
|
22
|
+
end
|
23
|
+
|
24
|
+
# vim: syntax=ruby
|
data/bin/swissmedic-diff
ADDED
@@ -0,0 +1,45 @@
|
|
1
|
+
#! /usr/bin/ruby18
|
2
|
+
|
3
|
+
require 'swissmedic-diff'
|
4
|
+
|
5
|
+
def usage
|
6
|
+
puts <<-EOS
|
7
|
+
Usage: #$0 [-gnr] [-i ignorelist] <file1> <file2> [<output>]
|
8
|
+
|
9
|
+
-g --group sort by news, deletions and updates
|
10
|
+
-n --name sort by name
|
11
|
+
-r --registration sort by registration
|
12
|
+
-i --ignore ignore differences in the following comma-separated keys
|
13
|
+
EOS
|
14
|
+
end
|
15
|
+
|
16
|
+
out = nil
|
17
|
+
sort = :group
|
18
|
+
|
19
|
+
ignore = []
|
20
|
+
if(/^-/.match ARGV.first)
|
21
|
+
sort = case ARGV.shift
|
22
|
+
when /^-{1,2}i/
|
23
|
+
ignore.concat ARGV.shift.split(',').collect { |key| key.to_sym }
|
24
|
+
when /^-{1,2}n/
|
25
|
+
:name
|
26
|
+
when /^-{1,2}r/
|
27
|
+
:registration
|
28
|
+
else
|
29
|
+
:group
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
case ARGV.size
|
34
|
+
when 2
|
35
|
+
out = $stdout
|
36
|
+
when 3
|
37
|
+
out = File.open(ARGV[2], 'w')
|
38
|
+
else
|
39
|
+
usage
|
40
|
+
exit 1
|
41
|
+
end
|
42
|
+
|
43
|
+
plug = SwissmedicDiff.new
|
44
|
+
diff = plug.diff(ARGV[1], ARGV[0], ignore)
|
45
|
+
out.puts plug.to_s(sort)
|
@@ -0,0 +1,317 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# encoding: utf-8
|
3
|
+
# SwissmedicDiff -- swissmedic-diff -- 27.03.2008 -- hwyss@ywesee.com
|
4
|
+
|
5
|
+
require 'ostruct'
|
6
|
+
require 'spreadsheet'
|
7
|
+
|
8
|
+
#= diff command (compare two xls fles) for swissmedic xls file.
|
9
|
+
#
|
10
|
+
#Compares two Excel Documents provided by Swissmedic and displays the
|
11
|
+
#salient differences. Also: Find out what Products have changed on the
|
12
|
+
#swiss healthcare market.
|
13
|
+
#
|
14
|
+
#Authors:: Hannes Wyss (hwyss@ywesee.com), Masaomi Hatakeyama (mhatakeyama@ywesee.com)
|
15
|
+
#Version:: 0.1.4 2013-10-16 commit c30af5c15f6b8101f8f84cb482dfd09ab20729d6
|
16
|
+
#Copyright:: Copyright (C) ywesee GmbH, 2010. All rights reserved.
|
17
|
+
#License:: GPLv2.0 Compliance
|
18
|
+
#Source:: http://scm.ywesee.com/?p=swissmedic-diff/.git;a=summary
|
19
|
+
class SwissmedicDiff
|
20
|
+
VERSION = '0.1.4'
|
21
|
+
|
22
|
+
module Diff
|
23
|
+
COLUMNS = [ :iksnr, :seqnr, :name_base, :company,
|
24
|
+
:index_therapeuticus, :atc_class, :production_science,
|
25
|
+
:registration_date, :sequence_date, :expiry_date, :ikscd,
|
26
|
+
:size, :unit, :ikscat, :substances, :composition,
|
27
|
+
:indication_registration, :indication_sequence ]
|
28
|
+
FLAGS = {
|
29
|
+
:new => 'Neues Produkt',
|
30
|
+
:name_base => 'Namensänderung',
|
31
|
+
:ikscat => 'Abgabekategorie',
|
32
|
+
:index_therapeuticus => 'Index Therapeuticus',
|
33
|
+
:indication_registration => 'Anwendungsgebiet Präparate',
|
34
|
+
:indication_sequence => 'Anwendungsgebiet Sequenz',
|
35
|
+
:company => 'Zulassungsinhaber',
|
36
|
+
:composition => 'Zusammensetzung',
|
37
|
+
:sequence => 'Packungen',
|
38
|
+
:size => 'Packungsgrösse',
|
39
|
+
:expiry_date => 'Ablaufdatum der Zulassung',
|
40
|
+
:registration_date => 'Erstzulassungsdatum',
|
41
|
+
:sequence_date => 'Zulassungsdatum Sequenz',
|
42
|
+
:delete => 'Das Produkt wurde gelöscht',
|
43
|
+
:replaced_package => 'Packungs-Nummer',
|
44
|
+
:substances => 'Wirkstoffe',
|
45
|
+
:production_science => 'Heilmittelcode',
|
46
|
+
:atc_class => 'ATC-Code',
|
47
|
+
}
|
48
|
+
GALFORM_P = %r{excipiens\s+(ad|pro)\s+(?<galform>((?!\bpro\b)[^.])+)}
|
49
|
+
|
50
|
+
def capitalize(string)
|
51
|
+
string.split(/\s+/).collect { |word| word.capitalize }.join(' ')
|
52
|
+
end
|
53
|
+
def cell(row, pos)
|
54
|
+
if(cell = row[pos])
|
55
|
+
cell.to_s
|
56
|
+
end
|
57
|
+
end
|
58
|
+
def column(key)
|
59
|
+
COLUMNS.index(key)
|
60
|
+
end
|
61
|
+
def describe(diff, iksnr)
|
62
|
+
sprintf("%s: %s", iksnr, name(diff, iksnr))
|
63
|
+
end
|
64
|
+
def describe_flag(diff, iksnr, flag)
|
65
|
+
txt = FLAGS.fetch(flag, flag)
|
66
|
+
case flag
|
67
|
+
when :sequence
|
68
|
+
when :replaced_package
|
69
|
+
pairs = diff.newest_rows[iksnr].collect { |rep, row|
|
70
|
+
if(old = diff.replacements[row])
|
71
|
+
[old, rep].join(' -> ')
|
72
|
+
end
|
73
|
+
}.compact
|
74
|
+
sprintf "%s (%s)", txt, pairs.join(',')
|
75
|
+
when :registration_date, :expiry_date
|
76
|
+
row = diff.newest_rows[iksnr].sort.first.last
|
77
|
+
sprintf "%s (%s)", txt, row[column(flag)].strftime('%d.%m.%Y')
|
78
|
+
else
|
79
|
+
row = diff.newest_rows[iksnr].sort.first.last
|
80
|
+
sprintf "%s (%s)", txt, cell(row, column(flag))
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
#=== Comparison two Excel files
|
85
|
+
#
|
86
|
+
#_target_:: new file path (String)
|
87
|
+
#_latest_:: old file path (String)
|
88
|
+
#_ignore_:: columns not to be compared (Symbol)
|
89
|
+
#
|
90
|
+
#return :: differences (OpenStruct class)
|
91
|
+
def diff(target, latest, ignore = [])
|
92
|
+
replacements = {}
|
93
|
+
known_regs, known_seqs, known_pacs, newest_rows = known_data(latest)
|
94
|
+
@diff = OpenStruct.new
|
95
|
+
@diff.news = news = []
|
96
|
+
@diff.updates = updates = []
|
97
|
+
@diff.changes = changes = {}
|
98
|
+
@diff.newest_rows = newest_rows
|
99
|
+
Spreadsheet.client_encoding = 'UTF-8'
|
100
|
+
tbook = Spreadsheet.open(target)
|
101
|
+
sheet = tbook.worksheet(0)
|
102
|
+
if new_column = cell(sheet.row(2), COLUMNS.size)
|
103
|
+
raise "New column #{COLUMNS.size} (#{new_column})"
|
104
|
+
end
|
105
|
+
idx, prr, prp = nil
|
106
|
+
multiples = {}
|
107
|
+
each_valid_row(tbook) { |row|
|
108
|
+
iksnr = cell(row, column(:iksnr))
|
109
|
+
seqnr = cell(row, column(:seqnr))
|
110
|
+
pacnr = cell(row, column(:ikscd))
|
111
|
+
(multiples[iksnr] ||= {})
|
112
|
+
if prr == iksnr && prp == pacnr
|
113
|
+
idx += 1
|
114
|
+
elsif previous = multiples[iksnr][pacnr]
|
115
|
+
prr = iksnr
|
116
|
+
prp = pacnr
|
117
|
+
idx = previous[COLUMNS.size].to_i + 1
|
118
|
+
else
|
119
|
+
prr = iksnr
|
120
|
+
prp = pacnr
|
121
|
+
idx = 0
|
122
|
+
end
|
123
|
+
row[COLUMNS.size] = idx
|
124
|
+
(newest_rows[iksnr] ||= {})[pacnr] = row
|
125
|
+
multiples[iksnr][pacnr] = row
|
126
|
+
if(other = known_regs.delete([iksnr]))
|
127
|
+
changes[iksnr] ||= []
|
128
|
+
else
|
129
|
+
changes[iksnr] ||= [:new]
|
130
|
+
end
|
131
|
+
known_seqs.delete([iksnr, seqnr])
|
132
|
+
if(other = known_pacs.delete([iksnr, pacnr, idx]))
|
133
|
+
flags = rows_diff(row, other, ignore)
|
134
|
+
(changes[iksnr].concat flags).uniq!
|
135
|
+
updates.push row unless flags.empty?
|
136
|
+
else
|
137
|
+
replacements.store [ iksnr, seqnr, cell(row, column(:size)),
|
138
|
+
cell(row, column(:unit)) ], row
|
139
|
+
flags = changes[iksnr]
|
140
|
+
flags.push(:sequence).uniq! unless(flags.include? :new)
|
141
|
+
news.push row
|
142
|
+
end
|
143
|
+
}
|
144
|
+
@diff.replacements = reps = {}
|
145
|
+
known_pacs.each { |(iksnr, pacnr), row|
|
146
|
+
key = [iksnr, '%02i' % cell(row, column(:seqnr)).to_i,
|
147
|
+
cell(row, column(:size)), cell(row, column(:unit))]
|
148
|
+
if(rep = replacements[key])
|
149
|
+
changes[iksnr].push :replaced_package
|
150
|
+
reps.store rep, pacnr
|
151
|
+
end
|
152
|
+
}
|
153
|
+
known_regs.each_key { |(iksnr,_)| changes[iksnr] = [:delete] }
|
154
|
+
changes.delete_if { |iksnr, flags| flags.empty? }
|
155
|
+
@diff.package_deletions = known_pacs.collect { |key, row|
|
156
|
+
## the keys in known_pacs don't include the sequence number (which
|
157
|
+
# would prevent us from properly recognizing multi-sequence-Packages),
|
158
|
+
# so we need complete the path to the package now
|
159
|
+
key[1,0] = '%02i' % cell(row, column(:seqnr)).to_i
|
160
|
+
key
|
161
|
+
}
|
162
|
+
@diff.sequence_deletions = known_seqs.keys
|
163
|
+
@diff.registration_deletions = known_regs.keys
|
164
|
+
@diff
|
165
|
+
end
|
166
|
+
def format_flags(flags)
|
167
|
+
flags.delete(:revision)
|
168
|
+
flags.collect { |flag|
|
169
|
+
"- %s\n" % FLAGS.fetch(flag, "Unbekannt (#{flag})")
|
170
|
+
}.compact.join
|
171
|
+
end
|
172
|
+
def known_data(latest)
|
173
|
+
known_regs = {}
|
174
|
+
known_seqs = {}
|
175
|
+
known_pacs = {}
|
176
|
+
newest_rows = {}
|
177
|
+
_known_data latest, known_regs, known_seqs, known_pacs, newest_rows
|
178
|
+
[known_regs, known_seqs, known_pacs, newest_rows]
|
179
|
+
end
|
180
|
+
def _known_data(latest, known_regs, known_seqs, known_pacs, newest_rows)
|
181
|
+
lbook = Spreadsheet.open(latest)
|
182
|
+
idx, prr, prp = nil
|
183
|
+
multiples = {}
|
184
|
+
each_valid_row(lbook) { |row|
|
185
|
+
iksnr = cell(row, column(:iksnr))
|
186
|
+
seqnr = cell(row, column(:seqnr))
|
187
|
+
pacnr = cell(row, column(:ikscd))
|
188
|
+
multiples[iksnr] ||= {}
|
189
|
+
if prr == iksnr && prp == pacnr
|
190
|
+
idx += 1
|
191
|
+
elsif previous = multiples[iksnr][pacnr]
|
192
|
+
prr = iksnr
|
193
|
+
prp = pacnr
|
194
|
+
idx = previous[COLUMNS.size].to_i + 1
|
195
|
+
else
|
196
|
+
prr = iksnr
|
197
|
+
prp = pacnr
|
198
|
+
idx = 0
|
199
|
+
end
|
200
|
+
multiples[iksnr][pacnr] = row
|
201
|
+
row[COLUMNS.size] = idx
|
202
|
+
known_regs.store [iksnr], row
|
203
|
+
known_seqs.store [iksnr, seqnr], row
|
204
|
+
known_pacs.store [iksnr, pacnr, idx], row
|
205
|
+
(newest_rows[iksnr] ||= {})[pacnr] = row
|
206
|
+
}
|
207
|
+
end
|
208
|
+
def name(diff, iksnr)
|
209
|
+
rows = diff.newest_rows[iksnr]
|
210
|
+
row = rows.sort.first.last
|
211
|
+
cell(row, column(:name_base))
|
212
|
+
end
|
213
|
+
def rows_diff(row, other, ignore = [])
|
214
|
+
flags = []
|
215
|
+
COLUMNS.each_with_index { |key, idx|
|
216
|
+
if(!ignore.include?(key) \
|
217
|
+
&& _comparable(key, row, idx) != _comparable(key, other, idx))
|
218
|
+
flags.push key
|
219
|
+
end
|
220
|
+
}
|
221
|
+
flags
|
222
|
+
end
|
223
|
+
|
224
|
+
#=== Output the differencies with String
|
225
|
+
#
|
226
|
+
# This should be called after diff method.
|
227
|
+
#
|
228
|
+
#_sort_ :: sort key (:group | :name | :registration)
|
229
|
+
#
|
230
|
+
#return :: difference (String)
|
231
|
+
def to_s(sort=:group)
|
232
|
+
@diff ||= nil
|
233
|
+
return '' unless @diff
|
234
|
+
@diff.changes.sort_by { |iksnr, flags|
|
235
|
+
_sort_by(sort, iksnr, flags)
|
236
|
+
}.collect { |iksnr, flags|
|
237
|
+
if(flags.include? :new)
|
238
|
+
"+ " << describe(@diff, iksnr)
|
239
|
+
elsif(flags.include? :delete)
|
240
|
+
"- " << describe(@diff, iksnr)
|
241
|
+
else
|
242
|
+
"> " << describe(@diff, iksnr) << "; " \
|
243
|
+
<< flags.collect { |flag| describe_flag(@diff, iksnr, flag)
|
244
|
+
}.compact.join(", ")
|
245
|
+
end
|
246
|
+
}.join("\n")
|
247
|
+
end
|
248
|
+
def _sort_by(sort, iksnr, flags)
|
249
|
+
case sort
|
250
|
+
when :name
|
251
|
+
[name(@diff, iksnr), iksnr]
|
252
|
+
when :registration
|
253
|
+
iksnr
|
254
|
+
else
|
255
|
+
weight = if(flags.include? :new)
|
256
|
+
0
|
257
|
+
elsif(flags.include? :delete)
|
258
|
+
1
|
259
|
+
else
|
260
|
+
2
|
261
|
+
end
|
262
|
+
[weight, iksnr]
|
263
|
+
end
|
264
|
+
end
|
265
|
+
def _comparable(key, row, idx)
|
266
|
+
if cell = row[idx]
|
267
|
+
case key
|
268
|
+
when :registration_date, :expiry_date
|
269
|
+
row[idx]
|
270
|
+
when :seqnr
|
271
|
+
sprintf "%02i", cell.to_i
|
272
|
+
else
|
273
|
+
cell(row, idx).downcase.gsub(/\s+/, "")
|
274
|
+
end
|
275
|
+
end
|
276
|
+
end
|
277
|
+
|
278
|
+
#=== iterate over all valid rows of a swissmedic Packungen.xls
|
279
|
+
#
|
280
|
+
# Iterates over all rows, ignoring Tierarzneimittel and
|
281
|
+
# lines with not enough data
|
282
|
+
# Patches the fields :iksnr, :seqnr, :ikscd to match the old swissmedic convention
|
283
|
+
# of a fixed sized string
|
284
|
+
#
|
285
|
+
# example:
|
286
|
+
# SwissmedicDiff.new.each_valid_row(Spreadsheet.open('path/to/file')) { |x| puts "iksnr #{x[0]}" }
|
287
|
+
#
|
288
|
+
#_spreadsheet_:: spreadsheet to operate on
|
289
|
+
#
|
290
|
+
#return ::
|
291
|
+
def each_valid_row(spreadsheet)
|
292
|
+
skipRows = rows_to_skip(spreadsheet)
|
293
|
+
worksheet = spreadsheet.worksheet(0)
|
294
|
+
worksheet.each(skipRows) {
|
295
|
+
|row|
|
296
|
+
if row.size < COLUMNS.size/2 || row.select{|val| val==nil}.size > COLUMNS.size/2
|
297
|
+
raise "Data missing in \n(line " + (row.idx+1).to_s + "): " + row.join(", ").to_s + "\n"
|
298
|
+
end
|
299
|
+
next if (cell(row, column(:production_science)) == 'Tierarzneimittel')
|
300
|
+
row[column(:iksnr)] = "%05i" % cell(row, column(:iksnr)).to_i
|
301
|
+
row[column(:seqnr)] = "%02i" % cell(row, column(:seqnr)).to_i
|
302
|
+
row[column(:ikscd)] = "%03i" % cell(row, column(:ikscd)).to_i
|
303
|
+
yield row
|
304
|
+
}
|
305
|
+
end
|
306
|
+
|
307
|
+
def rows_to_skip(spreadsheet)
|
308
|
+
# Packungen.xls of swissmedic before October 2013 had 3 leading rows
|
309
|
+
# Packungen.xls of swissmedic after October 2013 have 4 leading rows
|
310
|
+
j = 0
|
311
|
+
j += 1 while spreadsheet.worksheet(0).row(j)[0].to_i == 0
|
312
|
+
j
|
313
|
+
end
|
314
|
+
|
315
|
+
end
|
316
|
+
include Diff
|
317
|
+
end
|