swissmedic-diff 0.1.4
Sign up to get free protection for your applications and to get access to all the features.
- data/.gemtest +0 -0
- data/.gitignore +2 -0
- data/.travis.yml +18 -0
- data/Gemfile +8 -0
- data/Gemfile.lock +37 -0
- data/History.txt +18 -0
- data/LICENSE +339 -0
- data/Manifest.txt +20 -0
- data/README.txt +68 -0
- data/Rakefile +24 -0
- data/bin/swissmedic-diff +45 -0
- data/lib/swissmedic-diff.rb +317 -0
- data/setup.rb +1345 -0
- data/swissmedic-diff.gemspec +22 -0
- data/test/data/Packungen-2013.10.14.xls +0 -0
- data/test/data/Packungen.older.xls +0 -0
- data/test/data/Packungen.xls +0 -0
- data/test/data/Packungen_error_column.xls +0 -0
- data/test/data/Packungen_error_missing1.xls +0 -0
- data/test/data/Packungen_error_missing2.xls +0 -0
- data/test/test_swissmedic-diff.rb +195 -0
- metadata +117 -0
data/README.txt
ADDED
@@ -0,0 +1,68 @@
|
|
1
|
+
= swissmedic-diff
|
2
|
+
|
3
|
+
* https://github.com/zdavatz/swissmedic-diff
|
4
|
+
|
5
|
+
== DESCRIPTION:
|
6
|
+
|
7
|
+
* Compares two Excel Documents provided by Swissmedic and displays the
|
8
|
+
salient differences. Also: Find out what Products have changed on the
|
9
|
+
swiss healthcare market.
|
10
|
+
|
11
|
+
Up-To-Date file:
|
12
|
+
|
13
|
+
* http://www.swissmedic.ch//daten/00080/00251/index.html
|
14
|
+
|
15
|
+
|
16
|
+
== FEATURES/PROBLEMS:
|
17
|
+
|
18
|
+
Swissmedic does not store old files. You must do this on your own.
|
19
|
+
|
20
|
+
Version 0.1.3 is capable of the Packungen.xls without column 'Gruppe',
|
21
|
+
column E in the previous format. If you want to use Packunge.xls
|
22
|
+
including the column 'Gruppe', you should use version 0.1.2. After
|
23
|
+
you get the source code via Git command, type in the swissmedic-diff
|
24
|
+
directory as follows:
|
25
|
+
|
26
|
+
* git checkout 4c8c9323297453c3cb3380a9d41457d534ed8861
|
27
|
+
|
28
|
+
Then you can get the version 0.1.2.
|
29
|
+
|
30
|
+
== REQUIREMENTS:
|
31
|
+
|
32
|
+
* ruby 1.8 (with oniguruma patch) or ruby 1.9
|
33
|
+
* spreadsheet
|
34
|
+
|
35
|
+
== INSTALL:
|
36
|
+
|
37
|
+
The easiest way to install is via RubyGems. On the command line enter:
|
38
|
+
|
39
|
+
* gem build swissmedic-diff.gemspec
|
40
|
+
* sudo gem install swissmedic-diff-0.1.3.gem
|
41
|
+
|
42
|
+
To manually install, use the included setup.rb script:
|
43
|
+
|
44
|
+
* sudo ruby setup.rb
|
45
|
+
|
46
|
+
See test directory for tests. Run
|
47
|
+
|
48
|
+
* ruby test/test_swissmedic-diff.rb
|
49
|
+
|
50
|
+
for testing.
|
51
|
+
|
52
|
+
== USAGE:
|
53
|
+
|
54
|
+
Usage: /usr/bin/swissmedic-diff [-gnr] <file1> <file2> [<output>]
|
55
|
+
|
56
|
+
-g --group sort by news, deletions and updates
|
57
|
+
-n --name sort by name
|
58
|
+
-r --registration sort by registration
|
59
|
+
|
60
|
+
== DEVELOPERS:
|
61
|
+
|
62
|
+
* Hannes Wyss <hwyss@ywesee.com>
|
63
|
+
* Masaomi Hatakeyama <mhatakeyama@ywesee.com>
|
64
|
+
* Zeno R.R. Davatz <zdavatz@ywesee.com>
|
65
|
+
|
66
|
+
== LICENSE:
|
67
|
+
|
68
|
+
* GPLv2
|
data/Rakefile
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
# -*- ruby -*-
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'hoe'
|
5
|
+
|
6
|
+
# Hoe.plugin :compiler
|
7
|
+
# Hoe.plugin :gem_prelude_sucks
|
8
|
+
# Hoe.plugin :inline
|
9
|
+
# Hoe.plugin :inline
|
10
|
+
# Hoe.plugin :racc
|
11
|
+
# Hoe.plugin :rubyforge
|
12
|
+
# Hoe.plugin :rubyforge
|
13
|
+
|
14
|
+
Hoe.spec 'swissmedic-diff' do
|
15
|
+
# HEY! If you fill these out in ~/.hoe_template/Rakefile.erb then
|
16
|
+
# you'll never have to touch them again!
|
17
|
+
# (delete this comment too, of course)
|
18
|
+
|
19
|
+
developer('Masaomi Hatakeyama, Zeno R.R. Davatz', 'mhatakeyama@ywesee.com, zdavatz@ywesee.com')
|
20
|
+
|
21
|
+
# self.rubyforge_name = 'swissmswissmedic-diffx' # if different than 'swissmedic-diff'
|
22
|
+
end
|
23
|
+
|
24
|
+
# vim: syntax=ruby
|
data/bin/swissmedic-diff
ADDED
@@ -0,0 +1,45 @@
|
|
1
|
+
#! /usr/bin/ruby18
|
2
|
+
|
3
|
+
require 'swissmedic-diff'
|
4
|
+
|
5
|
+
def usage
|
6
|
+
puts <<-EOS
|
7
|
+
Usage: #$0 [-gnr] [-i ignorelist] <file1> <file2> [<output>]
|
8
|
+
|
9
|
+
-g --group sort by news, deletions and updates
|
10
|
+
-n --name sort by name
|
11
|
+
-r --registration sort by registration
|
12
|
+
-i --ignore ignore differences in the following comma-separated keys
|
13
|
+
EOS
|
14
|
+
end
|
15
|
+
|
16
|
+
out = nil
|
17
|
+
sort = :group
|
18
|
+
|
19
|
+
ignore = []
|
20
|
+
if(/^-/.match ARGV.first)
|
21
|
+
sort = case ARGV.shift
|
22
|
+
when /^-{1,2}i/
|
23
|
+
ignore.concat ARGV.shift.split(',').collect { |key| key.to_sym }
|
24
|
+
when /^-{1,2}n/
|
25
|
+
:name
|
26
|
+
when /^-{1,2}r/
|
27
|
+
:registration
|
28
|
+
else
|
29
|
+
:group
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
case ARGV.size
|
34
|
+
when 2
|
35
|
+
out = $stdout
|
36
|
+
when 3
|
37
|
+
out = File.open(ARGV[2], 'w')
|
38
|
+
else
|
39
|
+
usage
|
40
|
+
exit 1
|
41
|
+
end
|
42
|
+
|
43
|
+
plug = SwissmedicDiff.new
|
44
|
+
diff = plug.diff(ARGV[1], ARGV[0], ignore)
|
45
|
+
out.puts plug.to_s(sort)
|
@@ -0,0 +1,317 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# encoding: utf-8
|
3
|
+
# SwissmedicDiff -- swissmedic-diff -- 27.03.2008 -- hwyss@ywesee.com
|
4
|
+
|
5
|
+
require 'ostruct'
|
6
|
+
require 'spreadsheet'
|
7
|
+
|
8
|
+
#= diff command (compare two xls fles) for swissmedic xls file.
|
9
|
+
#
|
10
|
+
#Compares two Excel Documents provided by Swissmedic and displays the
|
11
|
+
#salient differences. Also: Find out what Products have changed on the
|
12
|
+
#swiss healthcare market.
|
13
|
+
#
|
14
|
+
#Authors:: Hannes Wyss (hwyss@ywesee.com), Masaomi Hatakeyama (mhatakeyama@ywesee.com)
|
15
|
+
#Version:: 0.1.4 2013-10-16 commit c30af5c15f6b8101f8f84cb482dfd09ab20729d6
|
16
|
+
#Copyright:: Copyright (C) ywesee GmbH, 2010. All rights reserved.
|
17
|
+
#License:: GPLv2.0 Compliance
|
18
|
+
#Source:: http://scm.ywesee.com/?p=swissmedic-diff/.git;a=summary
|
19
|
+
class SwissmedicDiff
|
20
|
+
VERSION = '0.1.4'
|
21
|
+
|
22
|
+
module Diff
|
23
|
+
COLUMNS = [ :iksnr, :seqnr, :name_base, :company,
|
24
|
+
:index_therapeuticus, :atc_class, :production_science,
|
25
|
+
:registration_date, :sequence_date, :expiry_date, :ikscd,
|
26
|
+
:size, :unit, :ikscat, :substances, :composition,
|
27
|
+
:indication_registration, :indication_sequence ]
|
28
|
+
FLAGS = {
|
29
|
+
:new => 'Neues Produkt',
|
30
|
+
:name_base => 'Namensänderung',
|
31
|
+
:ikscat => 'Abgabekategorie',
|
32
|
+
:index_therapeuticus => 'Index Therapeuticus',
|
33
|
+
:indication_registration => 'Anwendungsgebiet Präparate',
|
34
|
+
:indication_sequence => 'Anwendungsgebiet Sequenz',
|
35
|
+
:company => 'Zulassungsinhaber',
|
36
|
+
:composition => 'Zusammensetzung',
|
37
|
+
:sequence => 'Packungen',
|
38
|
+
:size => 'Packungsgrösse',
|
39
|
+
:expiry_date => 'Ablaufdatum der Zulassung',
|
40
|
+
:registration_date => 'Erstzulassungsdatum',
|
41
|
+
:sequence_date => 'Zulassungsdatum Sequenz',
|
42
|
+
:delete => 'Das Produkt wurde gelöscht',
|
43
|
+
:replaced_package => 'Packungs-Nummer',
|
44
|
+
:substances => 'Wirkstoffe',
|
45
|
+
:production_science => 'Heilmittelcode',
|
46
|
+
:atc_class => 'ATC-Code',
|
47
|
+
}
|
48
|
+
GALFORM_P = %r{excipiens\s+(ad|pro)\s+(?<galform>((?!\bpro\b)[^.])+)}
|
49
|
+
|
50
|
+
def capitalize(string)
|
51
|
+
string.split(/\s+/).collect { |word| word.capitalize }.join(' ')
|
52
|
+
end
|
53
|
+
def cell(row, pos)
|
54
|
+
if(cell = row[pos])
|
55
|
+
cell.to_s
|
56
|
+
end
|
57
|
+
end
|
58
|
+
def column(key)
|
59
|
+
COLUMNS.index(key)
|
60
|
+
end
|
61
|
+
def describe(diff, iksnr)
|
62
|
+
sprintf("%s: %s", iksnr, name(diff, iksnr))
|
63
|
+
end
|
64
|
+
def describe_flag(diff, iksnr, flag)
|
65
|
+
txt = FLAGS.fetch(flag, flag)
|
66
|
+
case flag
|
67
|
+
when :sequence
|
68
|
+
when :replaced_package
|
69
|
+
pairs = diff.newest_rows[iksnr].collect { |rep, row|
|
70
|
+
if(old = diff.replacements[row])
|
71
|
+
[old, rep].join(' -> ')
|
72
|
+
end
|
73
|
+
}.compact
|
74
|
+
sprintf "%s (%s)", txt, pairs.join(',')
|
75
|
+
when :registration_date, :expiry_date
|
76
|
+
row = diff.newest_rows[iksnr].sort.first.last
|
77
|
+
sprintf "%s (%s)", txt, row[column(flag)].strftime('%d.%m.%Y')
|
78
|
+
else
|
79
|
+
row = diff.newest_rows[iksnr].sort.first.last
|
80
|
+
sprintf "%s (%s)", txt, cell(row, column(flag))
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
#=== Comparison two Excel files
|
85
|
+
#
|
86
|
+
#_target_:: new file path (String)
|
87
|
+
#_latest_:: old file path (String)
|
88
|
+
#_ignore_:: columns not to be compared (Symbol)
|
89
|
+
#
|
90
|
+
#return :: differences (OpenStruct class)
|
91
|
+
def diff(target, latest, ignore = [])
|
92
|
+
replacements = {}
|
93
|
+
known_regs, known_seqs, known_pacs, newest_rows = known_data(latest)
|
94
|
+
@diff = OpenStruct.new
|
95
|
+
@diff.news = news = []
|
96
|
+
@diff.updates = updates = []
|
97
|
+
@diff.changes = changes = {}
|
98
|
+
@diff.newest_rows = newest_rows
|
99
|
+
Spreadsheet.client_encoding = 'UTF-8'
|
100
|
+
tbook = Spreadsheet.open(target)
|
101
|
+
sheet = tbook.worksheet(0)
|
102
|
+
if new_column = cell(sheet.row(2), COLUMNS.size)
|
103
|
+
raise "New column #{COLUMNS.size} (#{new_column})"
|
104
|
+
end
|
105
|
+
idx, prr, prp = nil
|
106
|
+
multiples = {}
|
107
|
+
each_valid_row(tbook) { |row|
|
108
|
+
iksnr = cell(row, column(:iksnr))
|
109
|
+
seqnr = cell(row, column(:seqnr))
|
110
|
+
pacnr = cell(row, column(:ikscd))
|
111
|
+
(multiples[iksnr] ||= {})
|
112
|
+
if prr == iksnr && prp == pacnr
|
113
|
+
idx += 1
|
114
|
+
elsif previous = multiples[iksnr][pacnr]
|
115
|
+
prr = iksnr
|
116
|
+
prp = pacnr
|
117
|
+
idx = previous[COLUMNS.size].to_i + 1
|
118
|
+
else
|
119
|
+
prr = iksnr
|
120
|
+
prp = pacnr
|
121
|
+
idx = 0
|
122
|
+
end
|
123
|
+
row[COLUMNS.size] = idx
|
124
|
+
(newest_rows[iksnr] ||= {})[pacnr] = row
|
125
|
+
multiples[iksnr][pacnr] = row
|
126
|
+
if(other = known_regs.delete([iksnr]))
|
127
|
+
changes[iksnr] ||= []
|
128
|
+
else
|
129
|
+
changes[iksnr] ||= [:new]
|
130
|
+
end
|
131
|
+
known_seqs.delete([iksnr, seqnr])
|
132
|
+
if(other = known_pacs.delete([iksnr, pacnr, idx]))
|
133
|
+
flags = rows_diff(row, other, ignore)
|
134
|
+
(changes[iksnr].concat flags).uniq!
|
135
|
+
updates.push row unless flags.empty?
|
136
|
+
else
|
137
|
+
replacements.store [ iksnr, seqnr, cell(row, column(:size)),
|
138
|
+
cell(row, column(:unit)) ], row
|
139
|
+
flags = changes[iksnr]
|
140
|
+
flags.push(:sequence).uniq! unless(flags.include? :new)
|
141
|
+
news.push row
|
142
|
+
end
|
143
|
+
}
|
144
|
+
@diff.replacements = reps = {}
|
145
|
+
known_pacs.each { |(iksnr, pacnr), row|
|
146
|
+
key = [iksnr, '%02i' % cell(row, column(:seqnr)).to_i,
|
147
|
+
cell(row, column(:size)), cell(row, column(:unit))]
|
148
|
+
if(rep = replacements[key])
|
149
|
+
changes[iksnr].push :replaced_package
|
150
|
+
reps.store rep, pacnr
|
151
|
+
end
|
152
|
+
}
|
153
|
+
known_regs.each_key { |(iksnr,_)| changes[iksnr] = [:delete] }
|
154
|
+
changes.delete_if { |iksnr, flags| flags.empty? }
|
155
|
+
@diff.package_deletions = known_pacs.collect { |key, row|
|
156
|
+
## the keys in known_pacs don't include the sequence number (which
|
157
|
+
# would prevent us from properly recognizing multi-sequence-Packages),
|
158
|
+
# so we need complete the path to the package now
|
159
|
+
key[1,0] = '%02i' % cell(row, column(:seqnr)).to_i
|
160
|
+
key
|
161
|
+
}
|
162
|
+
@diff.sequence_deletions = known_seqs.keys
|
163
|
+
@diff.registration_deletions = known_regs.keys
|
164
|
+
@diff
|
165
|
+
end
|
166
|
+
def format_flags(flags)
|
167
|
+
flags.delete(:revision)
|
168
|
+
flags.collect { |flag|
|
169
|
+
"- %s\n" % FLAGS.fetch(flag, "Unbekannt (#{flag})")
|
170
|
+
}.compact.join
|
171
|
+
end
|
172
|
+
def known_data(latest)
|
173
|
+
known_regs = {}
|
174
|
+
known_seqs = {}
|
175
|
+
known_pacs = {}
|
176
|
+
newest_rows = {}
|
177
|
+
_known_data latest, known_regs, known_seqs, known_pacs, newest_rows
|
178
|
+
[known_regs, known_seqs, known_pacs, newest_rows]
|
179
|
+
end
|
180
|
+
def _known_data(latest, known_regs, known_seqs, known_pacs, newest_rows)
|
181
|
+
lbook = Spreadsheet.open(latest)
|
182
|
+
idx, prr, prp = nil
|
183
|
+
multiples = {}
|
184
|
+
each_valid_row(lbook) { |row|
|
185
|
+
iksnr = cell(row, column(:iksnr))
|
186
|
+
seqnr = cell(row, column(:seqnr))
|
187
|
+
pacnr = cell(row, column(:ikscd))
|
188
|
+
multiples[iksnr] ||= {}
|
189
|
+
if prr == iksnr && prp == pacnr
|
190
|
+
idx += 1
|
191
|
+
elsif previous = multiples[iksnr][pacnr]
|
192
|
+
prr = iksnr
|
193
|
+
prp = pacnr
|
194
|
+
idx = previous[COLUMNS.size].to_i + 1
|
195
|
+
else
|
196
|
+
prr = iksnr
|
197
|
+
prp = pacnr
|
198
|
+
idx = 0
|
199
|
+
end
|
200
|
+
multiples[iksnr][pacnr] = row
|
201
|
+
row[COLUMNS.size] = idx
|
202
|
+
known_regs.store [iksnr], row
|
203
|
+
known_seqs.store [iksnr, seqnr], row
|
204
|
+
known_pacs.store [iksnr, pacnr, idx], row
|
205
|
+
(newest_rows[iksnr] ||= {})[pacnr] = row
|
206
|
+
}
|
207
|
+
end
|
208
|
+
def name(diff, iksnr)
|
209
|
+
rows = diff.newest_rows[iksnr]
|
210
|
+
row = rows.sort.first.last
|
211
|
+
cell(row, column(:name_base))
|
212
|
+
end
|
213
|
+
def rows_diff(row, other, ignore = [])
|
214
|
+
flags = []
|
215
|
+
COLUMNS.each_with_index { |key, idx|
|
216
|
+
if(!ignore.include?(key) \
|
217
|
+
&& _comparable(key, row, idx) != _comparable(key, other, idx))
|
218
|
+
flags.push key
|
219
|
+
end
|
220
|
+
}
|
221
|
+
flags
|
222
|
+
end
|
223
|
+
|
224
|
+
#=== Output the differencies with String
|
225
|
+
#
|
226
|
+
# This should be called after diff method.
|
227
|
+
#
|
228
|
+
#_sort_ :: sort key (:group | :name | :registration)
|
229
|
+
#
|
230
|
+
#return :: difference (String)
|
231
|
+
def to_s(sort=:group)
|
232
|
+
@diff ||= nil
|
233
|
+
return '' unless @diff
|
234
|
+
@diff.changes.sort_by { |iksnr, flags|
|
235
|
+
_sort_by(sort, iksnr, flags)
|
236
|
+
}.collect { |iksnr, flags|
|
237
|
+
if(flags.include? :new)
|
238
|
+
"+ " << describe(@diff, iksnr)
|
239
|
+
elsif(flags.include? :delete)
|
240
|
+
"- " << describe(@diff, iksnr)
|
241
|
+
else
|
242
|
+
"> " << describe(@diff, iksnr) << "; " \
|
243
|
+
<< flags.collect { |flag| describe_flag(@diff, iksnr, flag)
|
244
|
+
}.compact.join(", ")
|
245
|
+
end
|
246
|
+
}.join("\n")
|
247
|
+
end
|
248
|
+
def _sort_by(sort, iksnr, flags)
|
249
|
+
case sort
|
250
|
+
when :name
|
251
|
+
[name(@diff, iksnr), iksnr]
|
252
|
+
when :registration
|
253
|
+
iksnr
|
254
|
+
else
|
255
|
+
weight = if(flags.include? :new)
|
256
|
+
0
|
257
|
+
elsif(flags.include? :delete)
|
258
|
+
1
|
259
|
+
else
|
260
|
+
2
|
261
|
+
end
|
262
|
+
[weight, iksnr]
|
263
|
+
end
|
264
|
+
end
|
265
|
+
def _comparable(key, row, idx)
|
266
|
+
if cell = row[idx]
|
267
|
+
case key
|
268
|
+
when :registration_date, :expiry_date
|
269
|
+
row[idx]
|
270
|
+
when :seqnr
|
271
|
+
sprintf "%02i", cell.to_i
|
272
|
+
else
|
273
|
+
cell(row, idx).downcase.gsub(/\s+/, "")
|
274
|
+
end
|
275
|
+
end
|
276
|
+
end
|
277
|
+
|
278
|
+
#=== iterate over all valid rows of a swissmedic Packungen.xls
|
279
|
+
#
|
280
|
+
# Iterates over all rows, ignoring Tierarzneimittel and
|
281
|
+
# lines with not enough data
|
282
|
+
# Patches the fields :iksnr, :seqnr, :ikscd to match the old swissmedic convention
|
283
|
+
# of a fixed sized string
|
284
|
+
#
|
285
|
+
# example:
|
286
|
+
# SwissmedicDiff.new.each_valid_row(Spreadsheet.open('path/to/file')) { |x| puts "iksnr #{x[0]}" }
|
287
|
+
#
|
288
|
+
#_spreadsheet_:: spreadsheet to operate on
|
289
|
+
#
|
290
|
+
#return ::
|
291
|
+
def each_valid_row(spreadsheet)
|
292
|
+
skipRows = rows_to_skip(spreadsheet)
|
293
|
+
worksheet = spreadsheet.worksheet(0)
|
294
|
+
worksheet.each(skipRows) {
|
295
|
+
|row|
|
296
|
+
if row.size < COLUMNS.size/2 || row.select{|val| val==nil}.size > COLUMNS.size/2
|
297
|
+
raise "Data missing in \n(line " + (row.idx+1).to_s + "): " + row.join(", ").to_s + "\n"
|
298
|
+
end
|
299
|
+
next if (cell(row, column(:production_science)) == 'Tierarzneimittel')
|
300
|
+
row[column(:iksnr)] = "%05i" % cell(row, column(:iksnr)).to_i
|
301
|
+
row[column(:seqnr)] = "%02i" % cell(row, column(:seqnr)).to_i
|
302
|
+
row[column(:ikscd)] = "%03i" % cell(row, column(:ikscd)).to_i
|
303
|
+
yield row
|
304
|
+
}
|
305
|
+
end
|
306
|
+
|
307
|
+
def rows_to_skip(spreadsheet)
|
308
|
+
# Packungen.xls of swissmedic before October 2013 had 3 leading rows
|
309
|
+
# Packungen.xls of swissmedic after October 2013 have 4 leading rows
|
310
|
+
j = 0
|
311
|
+
j += 1 while spreadsheet.worksheet(0).row(j)[0].to_i == 0
|
312
|
+
j
|
313
|
+
end
|
314
|
+
|
315
|
+
end
|
316
|
+
include Diff
|
317
|
+
end
|