swissmedic-diff 0.1.4

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,68 @@
1
+ = swissmedic-diff
2
+
3
+ * https://github.com/zdavatz/swissmedic-diff
4
+
5
+ == DESCRIPTION:
6
+
7
+ * Compares two Excel Documents provided by Swissmedic and displays the
8
+ salient differences. Also: Find out what Products have changed on the
9
+ swiss healthcare market.
10
+
11
+ Up-To-Date file:
12
+
13
+ * http://www.swissmedic.ch//daten/00080/00251/index.html
14
+
15
+
16
+ == FEATURES/PROBLEMS:
17
+
18
+ Swissmedic does not store old files. You must do this on your own.
19
+
20
+ Version 0.1.3 is capable of the Packungen.xls without column 'Gruppe',
21
+ column E in the previous format. If you want to use Packunge.xls
22
+ including the column 'Gruppe', you should use version 0.1.2. After
23
+ you get the source code via Git command, type in the swissmedic-diff
24
+ directory as follows:
25
+
26
+ * git checkout 4c8c9323297453c3cb3380a9d41457d534ed8861
27
+
28
+ Then you can get the version 0.1.2.
29
+
30
+ == REQUIREMENTS:
31
+
32
+ * ruby 1.8 (with oniguruma patch) or ruby 1.9
33
+ * spreadsheet
34
+
35
+ == INSTALL:
36
+
37
+ The easiest way to install is via RubyGems. On the command line enter:
38
+
39
+ * gem build swissmedic-diff.gemspec
40
+ * sudo gem install swissmedic-diff-0.1.3.gem
41
+
42
+ To manually install, use the included setup.rb script:
43
+
44
+ * sudo ruby setup.rb
45
+
46
+ See test directory for tests. Run
47
+
48
+ * ruby test/test_swissmedic-diff.rb
49
+
50
+ for testing.
51
+
52
+ == USAGE:
53
+
54
+ Usage: /usr/bin/swissmedic-diff [-gnr] <file1> <file2> [<output>]
55
+
56
+ -g --group sort by news, deletions and updates
57
+ -n --name sort by name
58
+ -r --registration sort by registration
59
+
60
+ == DEVELOPERS:
61
+
62
+ * Hannes Wyss <hwyss@ywesee.com>
63
+ * Masaomi Hatakeyama <mhatakeyama@ywesee.com>
64
+ * Zeno R.R. Davatz <zdavatz@ywesee.com>
65
+
66
+ == LICENSE:
67
+
68
+ * GPLv2
@@ -0,0 +1,24 @@
1
+ # -*- ruby -*-
2
+
3
+ require 'rubygems'
4
+ require 'hoe'
5
+
6
+ # Hoe.plugin :compiler
7
+ # Hoe.plugin :gem_prelude_sucks
8
+ # Hoe.plugin :inline
9
+ # Hoe.plugin :inline
10
+ # Hoe.plugin :racc
11
+ # Hoe.plugin :rubyforge
12
+ # Hoe.plugin :rubyforge
13
+
14
+ Hoe.spec 'swissmedic-diff' do
15
+ # HEY! If you fill these out in ~/.hoe_template/Rakefile.erb then
16
+ # you'll never have to touch them again!
17
+ # (delete this comment too, of course)
18
+
19
+ developer('Masaomi Hatakeyama, Zeno R.R. Davatz', 'mhatakeyama@ywesee.com, zdavatz@ywesee.com')
20
+
21
+ # self.rubyforge_name = 'swissmswissmedic-diffx' # if different than 'swissmedic-diff'
22
+ end
23
+
24
+ # vim: syntax=ruby
@@ -0,0 +1,45 @@
1
+ #! /usr/bin/ruby18
2
+
3
+ require 'swissmedic-diff'
4
+
5
+ def usage
6
+ puts <<-EOS
7
+ Usage: #$0 [-gnr] [-i ignorelist] <file1> <file2> [<output>]
8
+
9
+ -g --group sort by news, deletions and updates
10
+ -n --name sort by name
11
+ -r --registration sort by registration
12
+ -i --ignore ignore differences in the following comma-separated keys
13
+ EOS
14
+ end
15
+
16
+ out = nil
17
+ sort = :group
18
+
19
+ ignore = []
20
+ if(/^-/.match ARGV.first)
21
+ sort = case ARGV.shift
22
+ when /^-{1,2}i/
23
+ ignore.concat ARGV.shift.split(',').collect { |key| key.to_sym }
24
+ when /^-{1,2}n/
25
+ :name
26
+ when /^-{1,2}r/
27
+ :registration
28
+ else
29
+ :group
30
+ end
31
+ end
32
+
33
+ case ARGV.size
34
+ when 2
35
+ out = $stdout
36
+ when 3
37
+ out = File.open(ARGV[2], 'w')
38
+ else
39
+ usage
40
+ exit 1
41
+ end
42
+
43
+ plug = SwissmedicDiff.new
44
+ diff = plug.diff(ARGV[1], ARGV[0], ignore)
45
+ out.puts plug.to_s(sort)
@@ -0,0 +1,317 @@
1
+ #!/usr/bin/env ruby
2
+ # encoding: utf-8
3
+ # SwissmedicDiff -- swissmedic-diff -- 27.03.2008 -- hwyss@ywesee.com
4
+
5
+ require 'ostruct'
6
+ require 'spreadsheet'
7
+
8
+ #= diff command (compare two xls fles) for swissmedic xls file.
9
+ #
10
+ #Compares two Excel Documents provided by Swissmedic and displays the
11
+ #salient differences. Also: Find out what Products have changed on the
12
+ #swiss healthcare market.
13
+ #
14
+ #Authors:: Hannes Wyss (hwyss@ywesee.com), Masaomi Hatakeyama (mhatakeyama@ywesee.com)
15
+ #Version:: 0.1.4 2013-10-16 commit c30af5c15f6b8101f8f84cb482dfd09ab20729d6
16
+ #Copyright:: Copyright (C) ywesee GmbH, 2010. All rights reserved.
17
+ #License:: GPLv2.0 Compliance
18
+ #Source:: http://scm.ywesee.com/?p=swissmedic-diff/.git;a=summary
19
+ class SwissmedicDiff
20
+ VERSION = '0.1.4'
21
+
22
+ module Diff
23
+ COLUMNS = [ :iksnr, :seqnr, :name_base, :company,
24
+ :index_therapeuticus, :atc_class, :production_science,
25
+ :registration_date, :sequence_date, :expiry_date, :ikscd,
26
+ :size, :unit, :ikscat, :substances, :composition,
27
+ :indication_registration, :indication_sequence ]
28
+ FLAGS = {
29
+ :new => 'Neues Produkt',
30
+ :name_base => 'Namensänderung',
31
+ :ikscat => 'Abgabekategorie',
32
+ :index_therapeuticus => 'Index Therapeuticus',
33
+ :indication_registration => 'Anwendungsgebiet Präparate',
34
+ :indication_sequence => 'Anwendungsgebiet Sequenz',
35
+ :company => 'Zulassungsinhaber',
36
+ :composition => 'Zusammensetzung',
37
+ :sequence => 'Packungen',
38
+ :size => 'Packungsgrösse',
39
+ :expiry_date => 'Ablaufdatum der Zulassung',
40
+ :registration_date => 'Erstzulassungsdatum',
41
+ :sequence_date => 'Zulassungsdatum Sequenz',
42
+ :delete => 'Das Produkt wurde gelöscht',
43
+ :replaced_package => 'Packungs-Nummer',
44
+ :substances => 'Wirkstoffe',
45
+ :production_science => 'Heilmittelcode',
46
+ :atc_class => 'ATC-Code',
47
+ }
48
+ GALFORM_P = %r{excipiens\s+(ad|pro)\s+(?<galform>((?!\bpro\b)[^.])+)}
49
+
50
+ def capitalize(string)
51
+ string.split(/\s+/).collect { |word| word.capitalize }.join(' ')
52
+ end
53
+ def cell(row, pos)
54
+ if(cell = row[pos])
55
+ cell.to_s
56
+ end
57
+ end
58
+ def column(key)
59
+ COLUMNS.index(key)
60
+ end
61
+ def describe(diff, iksnr)
62
+ sprintf("%s: %s", iksnr, name(diff, iksnr))
63
+ end
64
+ def describe_flag(diff, iksnr, flag)
65
+ txt = FLAGS.fetch(flag, flag)
66
+ case flag
67
+ when :sequence
68
+ when :replaced_package
69
+ pairs = diff.newest_rows[iksnr].collect { |rep, row|
70
+ if(old = diff.replacements[row])
71
+ [old, rep].join(' -> ')
72
+ end
73
+ }.compact
74
+ sprintf "%s (%s)", txt, pairs.join(',')
75
+ when :registration_date, :expiry_date
76
+ row = diff.newest_rows[iksnr].sort.first.last
77
+ sprintf "%s (%s)", txt, row[column(flag)].strftime('%d.%m.%Y')
78
+ else
79
+ row = diff.newest_rows[iksnr].sort.first.last
80
+ sprintf "%s (%s)", txt, cell(row, column(flag))
81
+ end
82
+ end
83
+
84
+ #=== Comparison two Excel files
85
+ #
86
+ #_target_:: new file path (String)
87
+ #_latest_:: old file path (String)
88
+ #_ignore_:: columns not to be compared (Symbol)
89
+ #
90
+ #return :: differences (OpenStruct class)
91
+ def diff(target, latest, ignore = [])
92
+ replacements = {}
93
+ known_regs, known_seqs, known_pacs, newest_rows = known_data(latest)
94
+ @diff = OpenStruct.new
95
+ @diff.news = news = []
96
+ @diff.updates = updates = []
97
+ @diff.changes = changes = {}
98
+ @diff.newest_rows = newest_rows
99
+ Spreadsheet.client_encoding = 'UTF-8'
100
+ tbook = Spreadsheet.open(target)
101
+ sheet = tbook.worksheet(0)
102
+ if new_column = cell(sheet.row(2), COLUMNS.size)
103
+ raise "New column #{COLUMNS.size} (#{new_column})"
104
+ end
105
+ idx, prr, prp = nil
106
+ multiples = {}
107
+ each_valid_row(tbook) { |row|
108
+ iksnr = cell(row, column(:iksnr))
109
+ seqnr = cell(row, column(:seqnr))
110
+ pacnr = cell(row, column(:ikscd))
111
+ (multiples[iksnr] ||= {})
112
+ if prr == iksnr && prp == pacnr
113
+ idx += 1
114
+ elsif previous = multiples[iksnr][pacnr]
115
+ prr = iksnr
116
+ prp = pacnr
117
+ idx = previous[COLUMNS.size].to_i + 1
118
+ else
119
+ prr = iksnr
120
+ prp = pacnr
121
+ idx = 0
122
+ end
123
+ row[COLUMNS.size] = idx
124
+ (newest_rows[iksnr] ||= {})[pacnr] = row
125
+ multiples[iksnr][pacnr] = row
126
+ if(other = known_regs.delete([iksnr]))
127
+ changes[iksnr] ||= []
128
+ else
129
+ changes[iksnr] ||= [:new]
130
+ end
131
+ known_seqs.delete([iksnr, seqnr])
132
+ if(other = known_pacs.delete([iksnr, pacnr, idx]))
133
+ flags = rows_diff(row, other, ignore)
134
+ (changes[iksnr].concat flags).uniq!
135
+ updates.push row unless flags.empty?
136
+ else
137
+ replacements.store [ iksnr, seqnr, cell(row, column(:size)),
138
+ cell(row, column(:unit)) ], row
139
+ flags = changes[iksnr]
140
+ flags.push(:sequence).uniq! unless(flags.include? :new)
141
+ news.push row
142
+ end
143
+ }
144
+ @diff.replacements = reps = {}
145
+ known_pacs.each { |(iksnr, pacnr), row|
146
+ key = [iksnr, '%02i' % cell(row, column(:seqnr)).to_i,
147
+ cell(row, column(:size)), cell(row, column(:unit))]
148
+ if(rep = replacements[key])
149
+ changes[iksnr].push :replaced_package
150
+ reps.store rep, pacnr
151
+ end
152
+ }
153
+ known_regs.each_key { |(iksnr,_)| changes[iksnr] = [:delete] }
154
+ changes.delete_if { |iksnr, flags| flags.empty? }
155
+ @diff.package_deletions = known_pacs.collect { |key, row|
156
+ ## the keys in known_pacs don't include the sequence number (which
157
+ # would prevent us from properly recognizing multi-sequence-Packages),
158
+ # so we need complete the path to the package now
159
+ key[1,0] = '%02i' % cell(row, column(:seqnr)).to_i
160
+ key
161
+ }
162
+ @diff.sequence_deletions = known_seqs.keys
163
+ @diff.registration_deletions = known_regs.keys
164
+ @diff
165
+ end
166
+ def format_flags(flags)
167
+ flags.delete(:revision)
168
+ flags.collect { |flag|
169
+ "- %s\n" % FLAGS.fetch(flag, "Unbekannt (#{flag})")
170
+ }.compact.join
171
+ end
172
+ def known_data(latest)
173
+ known_regs = {}
174
+ known_seqs = {}
175
+ known_pacs = {}
176
+ newest_rows = {}
177
+ _known_data latest, known_regs, known_seqs, known_pacs, newest_rows
178
+ [known_regs, known_seqs, known_pacs, newest_rows]
179
+ end
180
+ def _known_data(latest, known_regs, known_seqs, known_pacs, newest_rows)
181
+ lbook = Spreadsheet.open(latest)
182
+ idx, prr, prp = nil
183
+ multiples = {}
184
+ each_valid_row(lbook) { |row|
185
+ iksnr = cell(row, column(:iksnr))
186
+ seqnr = cell(row, column(:seqnr))
187
+ pacnr = cell(row, column(:ikscd))
188
+ multiples[iksnr] ||= {}
189
+ if prr == iksnr && prp == pacnr
190
+ idx += 1
191
+ elsif previous = multiples[iksnr][pacnr]
192
+ prr = iksnr
193
+ prp = pacnr
194
+ idx = previous[COLUMNS.size].to_i + 1
195
+ else
196
+ prr = iksnr
197
+ prp = pacnr
198
+ idx = 0
199
+ end
200
+ multiples[iksnr][pacnr] = row
201
+ row[COLUMNS.size] = idx
202
+ known_regs.store [iksnr], row
203
+ known_seqs.store [iksnr, seqnr], row
204
+ known_pacs.store [iksnr, pacnr, idx], row
205
+ (newest_rows[iksnr] ||= {})[pacnr] = row
206
+ }
207
+ end
208
+ def name(diff, iksnr)
209
+ rows = diff.newest_rows[iksnr]
210
+ row = rows.sort.first.last
211
+ cell(row, column(:name_base))
212
+ end
213
+ def rows_diff(row, other, ignore = [])
214
+ flags = []
215
+ COLUMNS.each_with_index { |key, idx|
216
+ if(!ignore.include?(key) \
217
+ && _comparable(key, row, idx) != _comparable(key, other, idx))
218
+ flags.push key
219
+ end
220
+ }
221
+ flags
222
+ end
223
+
224
+ #=== Output the differencies with String
225
+ #
226
+ # This should be called after diff method.
227
+ #
228
+ #_sort_ :: sort key (:group | :name | :registration)
229
+ #
230
+ #return :: difference (String)
231
+ def to_s(sort=:group)
232
+ @diff ||= nil
233
+ return '' unless @diff
234
+ @diff.changes.sort_by { |iksnr, flags|
235
+ _sort_by(sort, iksnr, flags)
236
+ }.collect { |iksnr, flags|
237
+ if(flags.include? :new)
238
+ "+ " << describe(@diff, iksnr)
239
+ elsif(flags.include? :delete)
240
+ "- " << describe(@diff, iksnr)
241
+ else
242
+ "> " << describe(@diff, iksnr) << "; " \
243
+ << flags.collect { |flag| describe_flag(@diff, iksnr, flag)
244
+ }.compact.join(", ")
245
+ end
246
+ }.join("\n")
247
+ end
248
+ def _sort_by(sort, iksnr, flags)
249
+ case sort
250
+ when :name
251
+ [name(@diff, iksnr), iksnr]
252
+ when :registration
253
+ iksnr
254
+ else
255
+ weight = if(flags.include? :new)
256
+ 0
257
+ elsif(flags.include? :delete)
258
+ 1
259
+ else
260
+ 2
261
+ end
262
+ [weight, iksnr]
263
+ end
264
+ end
265
+ def _comparable(key, row, idx)
266
+ if cell = row[idx]
267
+ case key
268
+ when :registration_date, :expiry_date
269
+ row[idx]
270
+ when :seqnr
271
+ sprintf "%02i", cell.to_i
272
+ else
273
+ cell(row, idx).downcase.gsub(/\s+/, "")
274
+ end
275
+ end
276
+ end
277
+
278
+ #=== iterate over all valid rows of a swissmedic Packungen.xls
279
+ #
280
+ # Iterates over all rows, ignoring Tierarzneimittel and
281
+ # lines with not enough data
282
+ # Patches the fields :iksnr, :seqnr, :ikscd to match the old swissmedic convention
283
+ # of a fixed sized string
284
+ #
285
+ # example:
286
+ # SwissmedicDiff.new.each_valid_row(Spreadsheet.open('path/to/file')) { |x| puts "iksnr #{x[0]}" }
287
+ #
288
+ #_spreadsheet_:: spreadsheet to operate on
289
+ #
290
+ #return ::
291
+ def each_valid_row(spreadsheet)
292
+ skipRows = rows_to_skip(spreadsheet)
293
+ worksheet = spreadsheet.worksheet(0)
294
+ worksheet.each(skipRows) {
295
+ |row|
296
+ if row.size < COLUMNS.size/2 || row.select{|val| val==nil}.size > COLUMNS.size/2
297
+ raise "Data missing in \n(line " + (row.idx+1).to_s + "): " + row.join(", ").to_s + "\n"
298
+ end
299
+ next if (cell(row, column(:production_science)) == 'Tierarzneimittel')
300
+ row[column(:iksnr)] = "%05i" % cell(row, column(:iksnr)).to_i
301
+ row[column(:seqnr)] = "%02i" % cell(row, column(:seqnr)).to_i
302
+ row[column(:ikscd)] = "%03i" % cell(row, column(:ikscd)).to_i
303
+ yield row
304
+ }
305
+ end
306
+
307
+ def rows_to_skip(spreadsheet)
308
+ # Packungen.xls of swissmedic before October 2013 had 3 leading rows
309
+ # Packungen.xls of swissmedic after October 2013 have 4 leading rows
310
+ j = 0
311
+ j += 1 while spreadsheet.worksheet(0).row(j)[0].to_i == 0
312
+ j
313
+ end
314
+
315
+ end
316
+ include Diff
317
+ end