swissmedic-diff 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,68 @@
1
+ = swissmedic-diff
2
+
3
+ * https://github.com/zdavatz/swissmedic-diff
4
+
5
+ == DESCRIPTION:
6
+
7
+ * Compares two Excel Documents provided by Swissmedic and displays the
8
+ salient differences. Also: Find out what Products have changed on the
9
+ swiss healthcare market.
10
+
11
+ Up-To-Date file:
12
+
13
+ * http://www.swissmedic.ch//daten/00080/00251/index.html
14
+
15
+
16
+ == FEATURES/PROBLEMS:
17
+
18
+ Swissmedic does not store old files. You must do this on your own.
19
+
20
+ Version 0.1.3 is capable of the Packungen.xls without column 'Gruppe',
21
+ column E in the previous format. If you want to use Packunge.xls
22
+ including the column 'Gruppe', you should use version 0.1.2. After
23
+ you get the source code via Git command, type in the swissmedic-diff
24
+ directory as follows:
25
+
26
+ * git checkout 4c8c9323297453c3cb3380a9d41457d534ed8861
27
+
28
+ Then you can get the version 0.1.2.
29
+
30
+ == REQUIREMENTS:
31
+
32
+ * ruby 1.8 (with oniguruma patch) or ruby 1.9
33
+ * spreadsheet
34
+
35
+ == INSTALL:
36
+
37
+ The easiest way to install is via RubyGems. On the command line enter:
38
+
39
+ * gem build swissmedic-diff.gemspec
40
+ * sudo gem install swissmedic-diff-0.1.3.gem
41
+
42
+ To manually install, use the included setup.rb script:
43
+
44
+ * sudo ruby setup.rb
45
+
46
+ See test directory for tests. Run
47
+
48
+ * ruby test/test_swissmedic-diff.rb
49
+
50
+ for testing.
51
+
52
+ == USAGE:
53
+
54
+ Usage: /usr/bin/swissmedic-diff [-gnr] <file1> <file2> [<output>]
55
+
56
+ -g --group sort by news, deletions and updates
57
+ -n --name sort by name
58
+ -r --registration sort by registration
59
+
60
+ == DEVELOPERS:
61
+
62
+ * Hannes Wyss <hwyss@ywesee.com>
63
+ * Masaomi Hatakeyama <mhatakeyama@ywesee.com>
64
+ * Zeno R.R. Davatz <zdavatz@ywesee.com>
65
+
66
+ == LICENSE:
67
+
68
+ * GPLv2
@@ -0,0 +1,24 @@
1
+ # -*- ruby -*-
2
+
3
+ require 'rubygems'
4
+ require 'hoe'
5
+
6
+ # Hoe.plugin :compiler
7
+ # Hoe.plugin :gem_prelude_sucks
8
+ # Hoe.plugin :inline
9
+ # Hoe.plugin :inline
10
+ # Hoe.plugin :racc
11
+ # Hoe.plugin :rubyforge
12
+ # Hoe.plugin :rubyforge
13
+
14
+ Hoe.spec 'swissmedic-diff' do
15
+ # HEY! If you fill these out in ~/.hoe_template/Rakefile.erb then
16
+ # you'll never have to touch them again!
17
+ # (delete this comment too, of course)
18
+
19
+ developer('Masaomi Hatakeyama, Zeno R.R. Davatz', 'mhatakeyama@ywesee.com, zdavatz@ywesee.com')
20
+
21
+ # self.rubyforge_name = 'swissmswissmedic-diffx' # if different than 'swissmedic-diff'
22
+ end
23
+
24
+ # vim: syntax=ruby
@@ -0,0 +1,45 @@
1
+ #! /usr/bin/ruby18
2
+
3
+ require 'swissmedic-diff'
4
+
5
+ def usage
6
+ puts <<-EOS
7
+ Usage: #$0 [-gnr] [-i ignorelist] <file1> <file2> [<output>]
8
+
9
+ -g --group sort by news, deletions and updates
10
+ -n --name sort by name
11
+ -r --registration sort by registration
12
+ -i --ignore ignore differences in the following comma-separated keys
13
+ EOS
14
+ end
15
+
16
+ out = nil
17
+ sort = :group
18
+
19
+ ignore = []
20
+ if(/^-/.match ARGV.first)
21
+ sort = case ARGV.shift
22
+ when /^-{1,2}i/
23
+ ignore.concat ARGV.shift.split(',').collect { |key| key.to_sym }
24
+ when /^-{1,2}n/
25
+ :name
26
+ when /^-{1,2}r/
27
+ :registration
28
+ else
29
+ :group
30
+ end
31
+ end
32
+
33
+ case ARGV.size
34
+ when 2
35
+ out = $stdout
36
+ when 3
37
+ out = File.open(ARGV[2], 'w')
38
+ else
39
+ usage
40
+ exit 1
41
+ end
42
+
43
+ plug = SwissmedicDiff.new
44
+ diff = plug.diff(ARGV[1], ARGV[0], ignore)
45
+ out.puts plug.to_s(sort)
@@ -0,0 +1,317 @@
1
+ #!/usr/bin/env ruby
2
+ # encoding: utf-8
3
+ # SwissmedicDiff -- swissmedic-diff -- 27.03.2008 -- hwyss@ywesee.com
4
+
5
+ require 'ostruct'
6
+ require 'spreadsheet'
7
+
8
+ #= diff command (compare two xls fles) for swissmedic xls file.
9
+ #
10
+ #Compares two Excel Documents provided by Swissmedic and displays the
11
+ #salient differences. Also: Find out what Products have changed on the
12
+ #swiss healthcare market.
13
+ #
14
+ #Authors:: Hannes Wyss (hwyss@ywesee.com), Masaomi Hatakeyama (mhatakeyama@ywesee.com)
15
+ #Version:: 0.1.4 2013-10-16 commit c30af5c15f6b8101f8f84cb482dfd09ab20729d6
16
+ #Copyright:: Copyright (C) ywesee GmbH, 2010. All rights reserved.
17
+ #License:: GPLv2.0 Compliance
18
+ #Source:: http://scm.ywesee.com/?p=swissmedic-diff/.git;a=summary
19
+ class SwissmedicDiff
20
+ VERSION = '0.1.4'
21
+
22
+ module Diff
23
+ COLUMNS = [ :iksnr, :seqnr, :name_base, :company,
24
+ :index_therapeuticus, :atc_class, :production_science,
25
+ :registration_date, :sequence_date, :expiry_date, :ikscd,
26
+ :size, :unit, :ikscat, :substances, :composition,
27
+ :indication_registration, :indication_sequence ]
28
+ FLAGS = {
29
+ :new => 'Neues Produkt',
30
+ :name_base => 'Namensänderung',
31
+ :ikscat => 'Abgabekategorie',
32
+ :index_therapeuticus => 'Index Therapeuticus',
33
+ :indication_registration => 'Anwendungsgebiet Präparate',
34
+ :indication_sequence => 'Anwendungsgebiet Sequenz',
35
+ :company => 'Zulassungsinhaber',
36
+ :composition => 'Zusammensetzung',
37
+ :sequence => 'Packungen',
38
+ :size => 'Packungsgrösse',
39
+ :expiry_date => 'Ablaufdatum der Zulassung',
40
+ :registration_date => 'Erstzulassungsdatum',
41
+ :sequence_date => 'Zulassungsdatum Sequenz',
42
+ :delete => 'Das Produkt wurde gelöscht',
43
+ :replaced_package => 'Packungs-Nummer',
44
+ :substances => 'Wirkstoffe',
45
+ :production_science => 'Heilmittelcode',
46
+ :atc_class => 'ATC-Code',
47
+ }
48
+ GALFORM_P = %r{excipiens\s+(ad|pro)\s+(?<galform>((?!\bpro\b)[^.])+)}
49
+
50
+ def capitalize(string)
51
+ string.split(/\s+/).collect { |word| word.capitalize }.join(' ')
52
+ end
53
+ def cell(row, pos)
54
+ if(cell = row[pos])
55
+ cell.to_s
56
+ end
57
+ end
58
+ def column(key)
59
+ COLUMNS.index(key)
60
+ end
61
+ def describe(diff, iksnr)
62
+ sprintf("%s: %s", iksnr, name(diff, iksnr))
63
+ end
64
+ def describe_flag(diff, iksnr, flag)
65
+ txt = FLAGS.fetch(flag, flag)
66
+ case flag
67
+ when :sequence
68
+ when :replaced_package
69
+ pairs = diff.newest_rows[iksnr].collect { |rep, row|
70
+ if(old = diff.replacements[row])
71
+ [old, rep].join(' -> ')
72
+ end
73
+ }.compact
74
+ sprintf "%s (%s)", txt, pairs.join(',')
75
+ when :registration_date, :expiry_date
76
+ row = diff.newest_rows[iksnr].sort.first.last
77
+ sprintf "%s (%s)", txt, row[column(flag)].strftime('%d.%m.%Y')
78
+ else
79
+ row = diff.newest_rows[iksnr].sort.first.last
80
+ sprintf "%s (%s)", txt, cell(row, column(flag))
81
+ end
82
+ end
83
+
84
+ #=== Comparison two Excel files
85
+ #
86
+ #_target_:: new file path (String)
87
+ #_latest_:: old file path (String)
88
+ #_ignore_:: columns not to be compared (Symbol)
89
+ #
90
+ #return :: differences (OpenStruct class)
91
+ def diff(target, latest, ignore = [])
92
+ replacements = {}
93
+ known_regs, known_seqs, known_pacs, newest_rows = known_data(latest)
94
+ @diff = OpenStruct.new
95
+ @diff.news = news = []
96
+ @diff.updates = updates = []
97
+ @diff.changes = changes = {}
98
+ @diff.newest_rows = newest_rows
99
+ Spreadsheet.client_encoding = 'UTF-8'
100
+ tbook = Spreadsheet.open(target)
101
+ sheet = tbook.worksheet(0)
102
+ if new_column = cell(sheet.row(2), COLUMNS.size)
103
+ raise "New column #{COLUMNS.size} (#{new_column})"
104
+ end
105
+ idx, prr, prp = nil
106
+ multiples = {}
107
+ each_valid_row(tbook) { |row|
108
+ iksnr = cell(row, column(:iksnr))
109
+ seqnr = cell(row, column(:seqnr))
110
+ pacnr = cell(row, column(:ikscd))
111
+ (multiples[iksnr] ||= {})
112
+ if prr == iksnr && prp == pacnr
113
+ idx += 1
114
+ elsif previous = multiples[iksnr][pacnr]
115
+ prr = iksnr
116
+ prp = pacnr
117
+ idx = previous[COLUMNS.size].to_i + 1
118
+ else
119
+ prr = iksnr
120
+ prp = pacnr
121
+ idx = 0
122
+ end
123
+ row[COLUMNS.size] = idx
124
+ (newest_rows[iksnr] ||= {})[pacnr] = row
125
+ multiples[iksnr][pacnr] = row
126
+ if(other = known_regs.delete([iksnr]))
127
+ changes[iksnr] ||= []
128
+ else
129
+ changes[iksnr] ||= [:new]
130
+ end
131
+ known_seqs.delete([iksnr, seqnr])
132
+ if(other = known_pacs.delete([iksnr, pacnr, idx]))
133
+ flags = rows_diff(row, other, ignore)
134
+ (changes[iksnr].concat flags).uniq!
135
+ updates.push row unless flags.empty?
136
+ else
137
+ replacements.store [ iksnr, seqnr, cell(row, column(:size)),
138
+ cell(row, column(:unit)) ], row
139
+ flags = changes[iksnr]
140
+ flags.push(:sequence).uniq! unless(flags.include? :new)
141
+ news.push row
142
+ end
143
+ }
144
+ @diff.replacements = reps = {}
145
+ known_pacs.each { |(iksnr, pacnr), row|
146
+ key = [iksnr, '%02i' % cell(row, column(:seqnr)).to_i,
147
+ cell(row, column(:size)), cell(row, column(:unit))]
148
+ if(rep = replacements[key])
149
+ changes[iksnr].push :replaced_package
150
+ reps.store rep, pacnr
151
+ end
152
+ }
153
+ known_regs.each_key { |(iksnr,_)| changes[iksnr] = [:delete] }
154
+ changes.delete_if { |iksnr, flags| flags.empty? }
155
+ @diff.package_deletions = known_pacs.collect { |key, row|
156
+ ## the keys in known_pacs don't include the sequence number (which
157
+ # would prevent us from properly recognizing multi-sequence-Packages),
158
+ # so we need complete the path to the package now
159
+ key[1,0] = '%02i' % cell(row, column(:seqnr)).to_i
160
+ key
161
+ }
162
+ @diff.sequence_deletions = known_seqs.keys
163
+ @diff.registration_deletions = known_regs.keys
164
+ @diff
165
+ end
166
+ def format_flags(flags)
167
+ flags.delete(:revision)
168
+ flags.collect { |flag|
169
+ "- %s\n" % FLAGS.fetch(flag, "Unbekannt (#{flag})")
170
+ }.compact.join
171
+ end
172
+ def known_data(latest)
173
+ known_regs = {}
174
+ known_seqs = {}
175
+ known_pacs = {}
176
+ newest_rows = {}
177
+ _known_data latest, known_regs, known_seqs, known_pacs, newest_rows
178
+ [known_regs, known_seqs, known_pacs, newest_rows]
179
+ end
180
+ def _known_data(latest, known_regs, known_seqs, known_pacs, newest_rows)
181
+ lbook = Spreadsheet.open(latest)
182
+ idx, prr, prp = nil
183
+ multiples = {}
184
+ each_valid_row(lbook) { |row|
185
+ iksnr = cell(row, column(:iksnr))
186
+ seqnr = cell(row, column(:seqnr))
187
+ pacnr = cell(row, column(:ikscd))
188
+ multiples[iksnr] ||= {}
189
+ if prr == iksnr && prp == pacnr
190
+ idx += 1
191
+ elsif previous = multiples[iksnr][pacnr]
192
+ prr = iksnr
193
+ prp = pacnr
194
+ idx = previous[COLUMNS.size].to_i + 1
195
+ else
196
+ prr = iksnr
197
+ prp = pacnr
198
+ idx = 0
199
+ end
200
+ multiples[iksnr][pacnr] = row
201
+ row[COLUMNS.size] = idx
202
+ known_regs.store [iksnr], row
203
+ known_seqs.store [iksnr, seqnr], row
204
+ known_pacs.store [iksnr, pacnr, idx], row
205
+ (newest_rows[iksnr] ||= {})[pacnr] = row
206
+ }
207
+ end
208
+ def name(diff, iksnr)
209
+ rows = diff.newest_rows[iksnr]
210
+ row = rows.sort.first.last
211
+ cell(row, column(:name_base))
212
+ end
213
+ def rows_diff(row, other, ignore = [])
214
+ flags = []
215
+ COLUMNS.each_with_index { |key, idx|
216
+ if(!ignore.include?(key) \
217
+ && _comparable(key, row, idx) != _comparable(key, other, idx))
218
+ flags.push key
219
+ end
220
+ }
221
+ flags
222
+ end
223
+
224
+ #=== Output the differencies with String
225
+ #
226
+ # This should be called after diff method.
227
+ #
228
+ #_sort_ :: sort key (:group | :name | :registration)
229
+ #
230
+ #return :: difference (String)
231
+ def to_s(sort=:group)
232
+ @diff ||= nil
233
+ return '' unless @diff
234
+ @diff.changes.sort_by { |iksnr, flags|
235
+ _sort_by(sort, iksnr, flags)
236
+ }.collect { |iksnr, flags|
237
+ if(flags.include? :new)
238
+ "+ " << describe(@diff, iksnr)
239
+ elsif(flags.include? :delete)
240
+ "- " << describe(@diff, iksnr)
241
+ else
242
+ "> " << describe(@diff, iksnr) << "; " \
243
+ << flags.collect { |flag| describe_flag(@diff, iksnr, flag)
244
+ }.compact.join(", ")
245
+ end
246
+ }.join("\n")
247
+ end
248
+ def _sort_by(sort, iksnr, flags)
249
+ case sort
250
+ when :name
251
+ [name(@diff, iksnr), iksnr]
252
+ when :registration
253
+ iksnr
254
+ else
255
+ weight = if(flags.include? :new)
256
+ 0
257
+ elsif(flags.include? :delete)
258
+ 1
259
+ else
260
+ 2
261
+ end
262
+ [weight, iksnr]
263
+ end
264
+ end
265
+ def _comparable(key, row, idx)
266
+ if cell = row[idx]
267
+ case key
268
+ when :registration_date, :expiry_date
269
+ row[idx]
270
+ when :seqnr
271
+ sprintf "%02i", cell.to_i
272
+ else
273
+ cell(row, idx).downcase.gsub(/\s+/, "")
274
+ end
275
+ end
276
+ end
277
+
278
+ #=== iterate over all valid rows of a swissmedic Packungen.xls
279
+ #
280
+ # Iterates over all rows, ignoring Tierarzneimittel and
281
+ # lines with not enough data
282
+ # Patches the fields :iksnr, :seqnr, :ikscd to match the old swissmedic convention
283
+ # of a fixed sized string
284
+ #
285
+ # example:
286
+ # SwissmedicDiff.new.each_valid_row(Spreadsheet.open('path/to/file')) { |x| puts "iksnr #{x[0]}" }
287
+ #
288
+ #_spreadsheet_:: spreadsheet to operate on
289
+ #
290
+ #return ::
291
+ def each_valid_row(spreadsheet)
292
+ skipRows = rows_to_skip(spreadsheet)
293
+ worksheet = spreadsheet.worksheet(0)
294
+ worksheet.each(skipRows) {
295
+ |row|
296
+ if row.size < COLUMNS.size/2 || row.select{|val| val==nil}.size > COLUMNS.size/2
297
+ raise "Data missing in \n(line " + (row.idx+1).to_s + "): " + row.join(", ").to_s + "\n"
298
+ end
299
+ next if (cell(row, column(:production_science)) == 'Tierarzneimittel')
300
+ row[column(:iksnr)] = "%05i" % cell(row, column(:iksnr)).to_i
301
+ row[column(:seqnr)] = "%02i" % cell(row, column(:seqnr)).to_i
302
+ row[column(:ikscd)] = "%03i" % cell(row, column(:ikscd)).to_i
303
+ yield row
304
+ }
305
+ end
306
+
307
+ def rows_to_skip(spreadsheet)
308
+ # Packungen.xls of swissmedic before October 2013 had 3 leading rows
309
+ # Packungen.xls of swissmedic after October 2013 have 4 leading rows
310
+ j = 0
311
+ j += 1 while spreadsheet.worksheet(0).row(j)[0].to_i == 0
312
+ j
313
+ end
314
+
315
+ end
316
+ include Diff
317
+ end