swissmedic-diff 0.2.9 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,178 +1,128 @@
1
1
  #!/usr/bin/env ruby
2
- # encoding: utf-8
3
2
  # SwissmedicDiff -- swissmedic-diff -- 27.03.2008 -- hwyss@ywesee.com
4
3
 
5
- require 'ostruct'
6
- require 'spreadsheet'
7
- require 'rubyXL'
8
- require 'pp'
9
- require File.join(File.dirname(__FILE__), 'version.rb')
10
-
11
- # add some monkey patches for Spreadsheet and rubyXL
12
- require File.join(File.dirname(__FILE__), 'compatibility.rb')
4
+ require "ostruct"
5
+ require "simple_xlsx_reader"
6
+ require "pp"
7
+ require File.join(File.dirname(__FILE__), "version.rb")
13
8
 
14
9
  #= diff command (compare two xls fles) for swissmedic xls file.
15
10
  #
16
- #Compares two Excel Documents provided by Swissmedic and displays the
17
- #salient differences. Also: Find out what Products have changed on the
18
- #swiss healthcare market.
11
+ # Compares two Excel Documents provided by Swissmedic and displays the
12
+ # salient differences. Also: Find out what Products have changed on the
13
+ # swiss healthcare market.
19
14
  #
20
- #Authors:: Hannes Wyss (hwyss@ywesee.com), Masaomi Hatakeyama (mhatakeyama@ywesee.com)
21
- #Version:: 0.1.4 2013-10-16 commit c30af5c15f6b8101f8f84cb482dfd09ab20729d6
22
- #Copyright:: Copyright (C) ywesee GmbH, 2010. All rights reserved.
23
- #License:: GPLv2.0 Compliance
24
- #Source:: http://scm.ywesee.com/?p=swissmedic-diff/.git;a=summary
15
+ # Authors:: Hannes Wyss (hwyss@ywesee.com), Masaomi Hatakeyama (mhatakeyama@ywesee.com)
16
+ # Version:: 0.1.4 2013-10-16 commit c30af5c15f6b8101f8f84cb482dfd09ab20729d6
17
+ # Copyright:: Copyright (C) ywesee GmbH, 2010. All rights reserved.
18
+ # License:: GPLv2.0 Compliance
19
+ # Source:: http://scm.ywesee.com/?p=swissmedic-diff/.git;a=summary
25
20
  class SwissmedicDiff
26
- VALUE_UNLIMITED = nil
27
- REGEXP_UNLIMITED = /unbegrenzt/i
21
+ VALUE_UNLIMITED = nil
22
+ REGEXP_UNLIMITED = /unbegrenzt/i
28
23
 
29
24
  module Diff
30
- COLUMNS_2014 = {
31
- :iksnr => /Zulassungs-Nummer/i, # column-nr: 0
32
- :seqnr => /Dosistärke-nummer|^Sequenz$/i,
33
- :name_base => /Präparatebezeichnung|^Sequenzname$/i,
34
- :company => /Zulassungsinhaberin/i,
35
- :index_therapeuticus => /IT-Nummer/i,
36
- :atc_class => /ATC-Code/i, # column-nr: 5
37
- :production_science => /Heilmittelcode/i,
38
- :registration_date => /Erstzul.datum Präp./i,
39
- :sequence_date => /Zul.datum Dosisstärke *|Zul.datum Sequenz/i,
40
- :expiry_date => /Gültigkeits-datum */i,
41
- :ikscd => /Verpackungs ID/i, # column-nr: 10
42
- :size => /Packungsgrösse/i,
43
- :unit => /Einheit/i,
44
- :ikscat => /Abgabekategorie/i,
45
- :substances => /Wirkstoff/i,
46
- :composition => /Zusammensetzung/i, # column-nr: 15
47
- :indication_registration => /Anwendungsgebiet Präparate/i,
48
- :indication_sequence => /Anwendungsgebiet Dosisstärke|Anwendungsgebiet Sequenz/i,
49
- }
50
-
51
- COLUMNS_OLD = [ :iksnr, :seqnr, :name_base, :company,
52
- :index_therapeuticus, :atc_class, :production_science,
53
- :registration_date, :sequence_date, :expiry_date, :ikscd,
54
- :size, :unit, :ikscat, :substances, :composition,
55
- :indication_registration, :indication_sequence ]
25
+ COLUMNS_OLD = [:iksnr, :seqnr, :name_base, :company,
26
+ :index_therapeuticus, :atc_class, :production_science,
27
+ :registration_date, :sequence_date, :expiry_date, :ikscd,
28
+ :size, :unit, :ikscat, :substances, :composition,
29
+ :indication_registration, :indication_sequence]
56
30
 
57
- COLUMNS_JULY_2015 = {
58
- :iksnr => /Zulassungs-Nummer/i, # column-nr: 0
59
- :seqnr => /Dosis+tärke-nummer/i,
60
- :name_base => /Präparatebezeichnung/i,
61
- :company => /Zulassungsinhaberin/i,
62
- :production_science => /Heilmittelcode/i,
63
- :index_therapeuticus => /IT-Nummer/i, # column-nr: 5
64
- :atc_class => /ATC-Code/i,
65
- :registration_date => /Erstzulassungs-datum./i,
66
- :sequence_date => /Zul.datum Dosisstärke/i,
67
- :expiry_date => /Gültigkeitsdauer der Zulassung/i,
68
- :ikscd => /Packungscode/i, # column-nr: 10
69
- :size => /Packungsgrösse/i,
70
- :unit => /Einheit/i,
71
- :ikscat => /Abgabekategorie Packung/i,
72
- :ikscat_seq => /Abgabekategorie Dosisstärke/i,
73
- :ikscat_preparation => /Abgabekategorie Präparat/i, # column-nr: 15
74
- :substances => /Wirkstoff/i,
75
- :composition => /Zusammensetzung/i,
76
- :indication_registration => /Anwendungsgebiet Präparat/i,
77
- :indication_sequence => /Anwendungsgebiet Dosisstärke/i,
78
- :gen_production => /Gentechnisch hergestellte Wirkstoffe/i, # column-nr 20
79
- :insulin_category => /Kategorie bei Insulinen/i,
31
+ COLUMNS_FEBRUARY_2019 = {
32
+ iksnr: /Zulassungs-Nummer/i, # column-nr: 0
33
+ seqnr: /Dosisstärke-nummer/i,
34
+ name_base: /Bezeichnung des Arzneimittels/i,
35
+ company: /Zulassungsinhaberin/i,
36
+ production_science: /Heilmittelcode/i,
37
+ index_therapeuticus: /IT-Nummer/i, # column-nr: 5
38
+ atc_class: /ATC-Code/i,
39
+ registration_date: /Erstzul.datum Arzneimittel/i,
40
+ sequence_date: /Zul.datum Dosisstärke/i,
41
+ expiry_date: /Gültigkeitsdauer der Zulassung/i,
42
+ ikscd: /Packungscode/i, # column-nr: 10
43
+ size: /Packungsgrösse/i,
44
+ unit: /Einheit/i,
45
+ ikscat: /Abgabekategorie Packung/i,
46
+ ikscat_seq: /Abgabekategorie Dosisstärke/i,
47
+ ikscat_preparation: /Abgabekategorie Arzneimittel/i, # column-nr: 15
48
+ substances: /Wirkstoff/i,
49
+ composition: /Zusammensetzung/i,
50
+ composition_AMZV: /Volldeklaration rev. AMZV umgesetzt/i,
51
+ indication_registration: /Anwendungsgebiet Arzneimittel/i,
52
+ indication_sequence: /Anwendungsgebiet Dosisstärke/i, # column-nr 20
53
+ gen_production: /Gentechnisch hergestellte Wirkstoffe/i,
54
+ insulin_category: /Kategorie bei Insulinen/i,
80
55
  # swissmedi corrected in february 2018 the typo betäubunsmittel to betäubungsmittel-
81
- :drug_index => /Verz. bei betäubun.*smittel-haltigen Präparaten/i,
82
- }
83
-
84
- COLUMNS_FEBRUARY_2019= {
85
- :iksnr => /Zulassungs-Nummer/i, # column-nr: 0
86
- :seqnr => /Dosisstärke-nummer/i,
87
- :name_base => /Bezeichnung des Arzneimittels/i,
88
- :company => /Zulassungsinhaberin/i,
89
- :production_science => /Heilmittelcode/i,
90
- :index_therapeuticus => /IT-Nummer/i, # column-nr: 5
91
- :atc_class => /ATC-Code/i,
92
- :registration_date => /Erstzul.datum Arzneimittel/i,
93
- :sequence_date => /Zul.datum Dosisstärke/i,
94
- :expiry_date => /Gültigkeitsdauer der Zulassung/i,
95
- :ikscd => /Packungscode/i, # column-nr: 10
96
- :size => /Packungsgrösse/i,
97
- :unit => /Einheit/i,
98
- :ikscat => /Abgabekategorie Packung/i,
99
- :ikscat_seq => /Abgabekategorie Dosisstärke/i,
100
- :ikscat_preparation => /Abgabekategorie Arzneimittel/i, # column-nr: 15
101
- :substances => /Wirkstoff/i,
102
- :composition => /Zusammensetzung/i,
103
- :composition_AMZV => /Volldeklaration rev. AMZV umgesetzt/i,
104
- :indication_registration => /Anwendungsgebiet Arzneimittel/i,
105
- :indication_sequence => /Anwendungsgebiet Dosisstärke/i, # column-nr 20
106
- :gen_production => /Gentechnisch hergestellte Wirkstoffe/i,
107
- :insulin_category => /Kategorie bei Insulinen/i,
108
- # swissmedi corrected in february 2018 the typo betäubunsmittel to betäubungsmittel-
109
- :drug_index => /Verz. bei betäubungsmittel-haltigen Arzneimittel/i,
56
+ drug_index: /Verz. bei betäubungsmittel-haltigen Arzneimittel/i
110
57
  }
111
58
  FLAGS = {
112
- :new => 'Neues Produkt',
113
- :name_base => 'Namensänderung',
114
- :ikscat => 'Abgabekategorie',
115
- :index_therapeuticus => 'Index Therapeuticus',
116
- :indication_registration => 'Anwendungsgebiet Präparate',
117
- :indication_sequence => 'Anwendungsgebiet Sequenz',
118
- :company => 'Zulassungsinhaber',
119
- :composition => 'Zusammensetzung',
120
- :sequence => 'Packungen',
121
- :size => 'Packungsgrösse',
122
- :expiry_date => 'Ablaufdatum der Zulassung',
123
- :registration_date => 'Erstzulassungsdatum',
124
- :sequence_date => 'Zulassungsdatum Sequenz',
125
- :delete => 'Das Produkt wurde gelöscht',
126
- :replaced_package => 'Packungs-Nummer',
127
- :substances => 'Wirkstoffe',
128
- :production_science => 'Heilmittelcode',
129
- :atc_class => 'ATC-Code',
59
+ new: "Neues Produkt",
60
+ name_base: "Namensänderung",
61
+ ikscat: "Abgabekategorie",
62
+ index_therapeuticus: "Index Therapeuticus",
63
+ indication_registration: "Anwendungsgebiet Präparate",
64
+ indication_sequence: "Anwendungsgebiet Sequenz",
65
+ company: "Zulassungsinhaber",
66
+ composition: "Zusammensetzung",
67
+ sequence: "Packungen",
68
+ size: "Packungsgrösse",
69
+ expiry_date: "Ablaufdatum der Zulassung",
70
+ registration_date: "Erstzulassungsdatum",
71
+ sequence_date: "Zulassungsdatum Sequenz",
72
+ delete: "Das Produkt wurde gelöscht",
73
+ replaced_package: "Packungs-Nummer",
74
+ substances: "Wirkstoffe",
75
+ production_science: "Heilmittelcode",
76
+ atc_class: "ATC-Code"
130
77
  }
131
- GALFORM_P = %r{excipiens\s+(ad|pro)\s+(?<galform>((?!\bpro\b)[^.])+)}
132
- DATE_FORMAT = '%Y:%m:%d'
78
+ DATE_FORMAT = "%Y:%m:%d"
133
79
 
134
80
  def capitalize(string)
135
- string.split(/\s+/).collect { |word| word.capitalize }.join(' ')
81
+ string.split(/\s+/).collect { |word| word.capitalize }.join(" ")
136
82
  end
83
+
137
84
  def cell(row, pos)
138
- if(cell = row[pos])
85
+ if (cell = row[pos])
139
86
  cell.to_s
140
87
  end
141
88
  end
89
+
142
90
  def describe(diff, iksnr)
143
91
  sprintf("%s: %s", iksnr, name(diff, iksnr))
144
92
  end
93
+
145
94
  def describe_flag(diff, iksnr, flag)
146
95
  txt = FLAGS.fetch(flag, flag)
147
96
  case flag
148
97
  when :sequence
98
+ txt
149
99
  when :replaced_package
150
100
  pairs = diff.newest_rows[iksnr].collect { |rep, row|
151
- if(old = diff.replacements[row])
152
- [old, rep].join(' -> ')
101
+ if (old = diff.replacements[row])
102
+ [old, rep].join(" -> ")
153
103
  end
154
104
  }.compact
155
- sprintf "%s (%s)", txt, pairs.join(',')
105
+ sprintf "%s (%s)", txt, pairs.join(",")
156
106
  when :registration_date, :expiry_date
157
- row = diff.newest_rows[iksnr].sort.first.last
158
- if row[COLUMNS_2014.keys.index(flag)].value.to_s.match(REGEXP_UNLIMITED)
159
- sprintf "%s (%s)", txt, 'unbegrenzt'
107
+ row = diff.newest_rows[iksnr].min.last
108
+ if row[COLUMNS_FEBRUARY_2019.keys.index(flag)].to_s.match(REGEXP_UNLIMITED)
109
+ sprintf "%s (%s)", txt, "unbegrenzt"
160
110
  else
161
- sprintf "%s (%s)", txt, row[COLUMNS_2014.keys.index(flag)].value.strftime('%d.%m.%Y')
111
+ sprintf "%s (%s)", txt, row[COLUMNS_FEBRUARY_2019.keys.index(flag)].strftime("%d.%m.%Y")
162
112
  end
163
113
  else
164
- row = diff.newest_rows[iksnr].sort.first.last
165
- sprintf "%s (%s)", txt, cell(row, COLUMNS_2014.keys.index(flag))
114
+ row = diff.newest_rows[iksnr].min.last
115
+ sprintf "%s (%s)", txt, cell(row, COLUMNS_FEBRUARY_2019.keys.index(flag))
166
116
  end
167
117
  end
168
118
 
169
119
  #=== Comparison two Excel files
170
120
  #
171
- #_target_:: new file path (String)
172
- #_latest_:: old file path (String)
173
- #_ignore_:: columns not to be compared (Symbol)
121
+ # _target_:: new file path (String)
122
+ # _latest_:: old file path (String)
123
+ # _ignore_:: columns not to be compared (Symbol)
174
124
  #
175
- #return :: differences (OpenStruct class)
125
+ # return :: differences (OpenStruct class)
176
126
  def diff(target, latest, ignore = [])
177
127
  replacements = {}
178
128
  known_regs, known_seqs, known_pacs, newest_rows = known_data(latest)
@@ -181,20 +131,20 @@ class SwissmedicDiff
181
131
  @diff.updates = updates = []
182
132
  @diff.changes = changes = {}
183
133
  @diff.newest_rows = newest_rows
184
- Spreadsheet.client_encoding = 'UTF-8'
185
- tbook = Spreadsheet.open(target)
134
+ t_rows = SimpleXlsxReader.open(target).sheets.first.rows.slurp
186
135
  idx, prr, prp = nil
187
136
  multiples = {}
188
- @latest_keys = get_column_indices(Spreadsheet.open(latest)).keys
189
- @target_keys = get_column_indices(tbook).keys
190
- each_valid_row(tbook) { |row|
137
+ l_rows = SimpleXlsxReader.open(latest).sheets.first.rows.slurp
138
+ @latest_keys = get_column_indices(l_rows).keys
139
+ @target_keys = get_column_indices(t_rows).keys
140
+ each_valid_row(t_rows) { |row|
191
141
  iksnr = cell(row, @target_keys.index(:iksnr))
192
142
  seqnr = cell(row, @target_keys.index(:seqnr))
193
143
  pacnr = cell(row, @target_keys.index(:ikscd))
194
- (multiples[iksnr] ||= {})
144
+ multiples[iksnr] ||= {}
195
145
  if prr == iksnr && prp == pacnr
196
146
  idx += 1
197
- elsif previous = multiples[iksnr][pacnr]
147
+ elsif (previous = multiples[iksnr][pacnr])
198
148
  prr = iksnr
199
149
  prp = pacnr
200
150
  idx = previous[@target_keys.size].to_i + 1
@@ -206,52 +156,62 @@ class SwissmedicDiff
206
156
  row[@target_keys.size] = idx
207
157
  (newest_rows[iksnr] ||= {})[pacnr] = row
208
158
  multiples[iksnr][pacnr] = row
209
- if(other = known_regs.delete([iksnr]))
210
- changes[iksnr] ||= []
159
+ changes[iksnr] ||= if known_regs.delete([iksnr])
160
+ []
211
161
  else
212
- changes[iksnr] ||= [:new]
162
+ [:new]
213
163
  end
214
164
  known_seqs.delete([iksnr, seqnr])
215
- if(other = known_pacs.delete([iksnr, pacnr, idx]))
165
+ if (other = known_pacs.delete([iksnr, pacnr, idx]))
216
166
  flags = rows_diff(row, other, ignore)
217
167
  (changes[iksnr].concat flags).uniq!
218
168
  updates.push row unless flags.empty?
219
169
  else
220
- replacements.store [ iksnr, seqnr, cell(row, @target_keys.index(:size)),
221
- cell(row, @target_keys.index(:unit)) ], row
170
+ replacements.store [iksnr, seqnr, cell(row, @target_keys.index(:size)),
171
+ cell(row, @target_keys.index(:unit))], row
222
172
  flags = changes[iksnr]
223
- flags.push(:sequence).uniq! unless(flags.include? :new)
173
+ flags.push(:sequence).uniq! unless flags.include? :new
224
174
  news.push row
225
175
  end
226
176
  }
227
177
  @diff.replacements = reps = {}
228
178
  known_pacs.each { |(iksnr, pacnr), row|
229
- key = [iksnr, '%02i' % cell(row, @target_keys.index(:seqnr)).to_i,
230
- cell(row, @target_keys.index(:size)), cell(row, @target_keys.index(:unit))]
231
- if(rep = replacements[key])
179
+ key = [iksnr, "%02i" % cell(row, @target_keys.index(:seqnr)).to_i,
180
+ cell(row, @target_keys.index(:size)), cell(row, @target_keys.index(:unit))]
181
+ if (rep = replacements[key])
232
182
  changes[iksnr].push :replaced_package
233
183
  reps.store rep, pacnr
234
184
  end
235
185
  }
236
- known_regs.each_key { |(iksnr,_)| changes[iksnr] = [:delete] }
186
+ known_regs.each_key { |(iksnr, _)| changes[iksnr] = [:delete] }
237
187
  changes.delete_if { |iksnr, flags| flags.empty? }
238
188
  @diff.package_deletions = known_pacs.collect { |key, row|
239
189
  ## the keys in known_pacs don't include the sequence number (which
240
190
  # would prevent us from properly recognizing multi-sequence-Packages),
241
191
  # so we need complete the path to the package now
242
- key[1,0] = '%02i' % cell(row, @target_keys.index(:seqnr)).to_i
192
+ key[1, 0] = "%02i" % cell(row, @target_keys.index(:seqnr)).to_i
243
193
  key
244
194
  }
245
195
  @diff.sequence_deletions = known_seqs.keys
246
196
  @diff.registration_deletions = known_regs.keys
197
+ @@stat = {}
198
+ @@stat[:target] = "#{File.basename(target)} #{File.size(target)} bytes"
199
+ @@stat[:latest] = "#{File.basename(latest)} #{File.size(latest)} bytes"
200
+ @diff.to_h.keys.each { |name| @diff.instance_eval("@@stat[:#{name}] = #{name}.size", __FILE__, __LINE__) }
247
201
  @diff
248
202
  end
203
+
204
+ def SwissmedicDiff.stat
205
+ @@stat
206
+ end
207
+
249
208
  def format_flags(flags)
250
209
  flags.delete(:revision)
251
210
  flags.collect { |flag|
252
211
  "- %s\n" % FLAGS.fetch(flag, "Unbekannt (#{flag})")
253
212
  }.compact.join
254
213
  end
214
+
255
215
  def known_data(latest)
256
216
  known_regs = {}
257
217
  known_seqs = {}
@@ -260,19 +220,22 @@ class SwissmedicDiff
260
220
  _known_data latest, known_regs, known_seqs, known_pacs, newest_rows
261
221
  [known_regs, known_seqs, known_pacs, newest_rows]
262
222
  end
223
+
263
224
  def _known_data(latest, known_regs, known_seqs, known_pacs, newest_rows)
264
- lbook = Spreadsheet.open(latest)
265
- @latest_keys = get_column_indices(lbook).keys
225
+ sheet = SimpleXlsxReader.open(latest).sheets.first
226
+ rows = sheet.rows.slurp
227
+ @latest_keys = get_column_indices(rows).keys
266
228
  idx, prr, prp = nil
267
229
  multiples = {}
268
- each_valid_row(lbook) { |row|
230
+
231
+ each_valid_row(rows) { |row|
269
232
  iksnr = cell(row, @latest_keys.index(:iksnr))
270
233
  seqnr = cell(row, @latest_keys.index(:seqnr))
271
234
  pacnr = cell(row, @latest_keys.index(:ikscd))
272
235
  multiples[iksnr] ||= {}
273
236
  if prr == iksnr && prp == pacnr
274
237
  idx += 1
275
- elsif previous = multiples[iksnr][pacnr]
238
+ elsif (previous = multiples[iksnr][pacnr])
276
239
  prr = iksnr
277
240
  prp = pacnr
278
241
  idx = previous[@latest_keys.size].to_i + 1
@@ -289,17 +252,18 @@ class SwissmedicDiff
289
252
  (newest_rows[iksnr] ||= {})[pacnr] = row
290
253
  }
291
254
  end
255
+
292
256
  def name(diff, iksnr)
293
257
  rows = diff.newest_rows[iksnr]
294
- row = rows.sort.first.last
295
- cell(row, COLUMNS_2014.keys.index(:name_base))
258
+ row = rows.min.last
259
+ cell(row, COLUMNS_FEBRUARY_2019.keys.index(:name_base))
296
260
  end
261
+
297
262
  def rows_diff(row, other, ignore = [])
298
263
  flags = []
299
- COLUMNS_OLD.each_with_index {
300
- |key, idx|
264
+ COLUMNS_OLD.each_with_index { |key, idx|
301
265
  if !ignore.include?(key)
302
- left = _comparable(key, row, @target_keys.index(key))
266
+ left = _comparable(key, row, @target_keys.index(key))
303
267
  right = _comparable(key, other, @latest_keys.index(key))
304
268
  next if left.is_a?(Date) && right.is_a?(Date) && left.strftime(DATE_FORMAT).eql?(right.strftime(DATE_FORMAT))
305
269
  next if left.is_a?(String) && left.empty? && !right
@@ -316,26 +280,28 @@ class SwissmedicDiff
316
280
  #
317
281
  # This should be called after diff method.
318
282
  #
319
- #_sort_ :: sort key (:group | :name | :registration)
283
+ # _sort_ :: sort key (:group | :name | :registration)
320
284
  #
321
- #return :: difference (String)
322
- def to_s(sort=:group)
285
+ # return :: difference (String)
286
+ def to_s(sort = :group)
323
287
  @diff ||= nil
324
- return '' unless @diff
288
+ return "" unless @diff
325
289
  @diff.changes.sort_by { |iksnr, flags|
326
290
  _sort_by(sort, iksnr, flags)
327
291
  }.collect { |iksnr, flags|
328
- if(flags.include? :new)
292
+ if flags.include? :new
329
293
  "+ " + describe(@diff, iksnr)
330
- elsif(flags.include? :delete)
294
+ elsif flags.include? :delete
331
295
  "- " + describe(@diff, iksnr)
332
296
  else
333
297
  "> " + describe(@diff, iksnr) << "; " \
334
- + flags.collect { |flag| describe_flag(@diff, iksnr, flag)
335
- }.compact.join(", ")
298
+ + flags.collect { |flag|
299
+ describe_flag(@diff, iksnr, flag)
300
+ }.compact.join(", ")
336
301
  end
337
302
  }.join("\n")
338
303
  end
304
+
339
305
  def _sort_by(sort, iksnr, flags)
340
306
  case sort
341
307
  when :name
@@ -343,24 +309,25 @@ class SwissmedicDiff
343
309
  when :registration
344
310
  iksnr
345
311
  else
346
- weight = if(flags.include? :new)
347
- 0
348
- elsif(flags.include? :delete)
349
- 1
350
- else
351
- 2
352
- end
312
+ weight = if flags.include? :new
313
+ 0
314
+ elsif flags.include? :delete
315
+ 1
316
+ else
317
+ 2
318
+ end
353
319
  [weight, iksnr]
354
320
  end
355
321
  end
322
+
356
323
  def _comparable(key, row, idx)
357
- if cell = row[idx]
324
+ if row[idx]
358
325
  case key
359
326
  when :registration_date, :expiry_date
360
- if row[idx] && row[idx].value && REGEXP_UNLIMITED.match(row[idx].value.to_s)
327
+ if row[idx] && REGEXP_UNLIMITED.match(row[idx].to_s)
361
328
  VALUE_UNLIMITED # Date.new(2099,12,31)
362
329
  else
363
- row[idx] && row[idx].value ? row[idx].value.to_date : nil
330
+ row[idx]
364
331
  end
365
332
  when :seqnr
366
333
  sprintf "%02i", cell(row, idx).to_i
@@ -370,68 +337,21 @@ class SwissmedicDiff
370
337
  end
371
338
  end
372
339
 
373
- def get_column_indices(spreadsheet)
374
- error_2014 = nil
375
- filename = spreadsheet.root.respond_to?(:filepath) ? spreadsheet.root.filepath : 'unknown'
376
- headerRowId = rows_to_skip(spreadsheet)-1
377
- row = spreadsheet.worksheet(0)[headerRowId]
378
-
379
- COLUMNS_2014.each{
380
- |key, value|
381
- header_name = row[COLUMNS_2014.keys.index(key)].value
382
- unless value.match(header_name)
383
- puts "#{__LINE__}: #{key} -> #{COLUMNS_2014.keys.index(key)} #{value}\nbut was #{header_name}" if $VERBOSE
384
- error_2014 = "#{filename}_has_unexpected_column_#{COLUMNS_2014.keys.index(key)}_#{key}_#{value.to_s}_but_was_#{header_name}"
385
- break
386
- end
387
- }
388
- return COLUMNS_2014 unless error_2014
389
- error_2015 = nil
390
- COLUMNS_JULY_2015.each{
391
- |key, value|
392
- header_name = row[COLUMNS_JULY_2015.keys.index(key)].value
340
+ def get_column_indices(rows)
341
+ header_row_id = rows_to_skip(rows) - 1
342
+ rows[header_row_id]
343
+ row = rows[5] # Headers are found at row 5 since February 5
344
+ 0.upto(COLUMNS_FEBRUARY_2019.size - 1).each { |idx| puts "#{idx}: #{row[idx]}" } if $VERBOSE
345
+ COLUMNS_FEBRUARY_2019.each { |key, value|
346
+ header_name = row[COLUMNS_FEBRUARY_2019.keys.index(key)].to_s
393
347
  unless value.match(header_name)
394
- puts "#{__LINE__}: #{key} -> #{COLUMNS_JULY_2015.keys.index(key)} #{value}\nbut was #{header_name}" if $VERBOSE
395
- error_2015 = "#{filename}_has_unexpected_column_#{COLUMNS_JULY_2015.keys.index(key)}_#{key}_#{value.to_s}_but_was_#{header_name}"
348
+ puts "#{__LINE__}: #{key} -> #{COLUMNS_FEBRUARY_2019.keys.index(key)} #{value}\nbut was #{header_name}" if $VERBOSE
396
349
  break
397
350
  end
398
351
  }
399
- unless error_2015
400
- idx14 = COLUMNS_2014.keys.index(:name_base)
401
- idx15 = COLUMNS_2014.keys.index(:name_base)
402
- if (idx14 != idx15)
403
- raise ":name_base must be same index in COLUMNS_JULY_2015 and COLUMNS_2014. Is #{idx14} and #{idx15}"
404
- end
405
- return COLUMNS_JULY_2015
406
- end
407
- row = spreadsheet.worksheet(0)[5] # Headers are found at row 5 since February 5
408
- error_2019 = nil
409
- if spreadsheet.worksheet(0)[5].size != COLUMNS_FEBRUARY_2019.size
410
- raise "#{error_2015}\n#{error_2014}"
411
- else
412
- 0.upto((COLUMNS_FEBRUARY_2019.size) -1).each{ |idx| puts "#{idx}: #{row[idx].value}" } if $VERBOSE
413
- COLUMNS_FEBRUARY_2019.each{
414
- |key, value|
415
- header_name = row[COLUMNS_FEBRUARY_2019.keys.index(key)].value.to_s
416
- unless value.match(header_name)
417
- puts "#{__LINE__}: #{key} -> #{COLUMNS_FEBRUARY_2019.keys.index(key)} #{value}\nbut was #{header_name}" if $VERBOSE
418
- error_2019 = "Packungen.xlslx_has_unexpected_column_#{COLUMNS_FEBRUARY_2019.keys.index(key)}_#{key}_#{value.to_s}_but_was_#{header_name}"
419
- require 'pry'; binding.pry
420
- break
421
- end
422
- }
423
- unless error_2015
424
- idx14 = COLUMNS_2014.keys.index(:name_base)
425
- idx15 = COLUMNS_2014.keys.index(:name_base)
426
- if (idx14 != idx15)
427
- raise ":name_base must be same index in COLUMNS_JULY_2015 and COLUMNS_2014. Is #{idx14} and #{idx15}"
428
- end
429
- return COLUMNS_FEBRUARY_2019
430
- end
431
- raise "#{error_2019}" if error_2019
432
- end
433
- COLUMNS_FEBRUARY_2019
352
+ COLUMNS_FEBRUARY_2019
434
353
  end
354
+
435
355
  #=== iterate over all valid rows of a swissmedic Packungen.xls
436
356
  #
437
357
  # Iterates over all rows, ignoring Tierarzneimittel and
@@ -442,24 +362,22 @@ class SwissmedicDiff
442
362
  # example:
443
363
  # SwissmedicDiff.new.each_valid_row(Spreadsheet.open('path/to/file')) { |x| puts "iksnr #{x[0]}" }
444
364
  #
445
- #_spreadsheet_:: spreadsheet to operate on
365
+ # rows:: rows to operate on
446
366
  #
447
- #return ::
448
- def each_valid_row(spreadsheet)
449
- skipRows = rows_to_skip(spreadsheet)
450
- column_keys = get_column_indices(spreadsheet).keys
451
- worksheet = spreadsheet.worksheet(0)
367
+ # return ::
368
+ def each_valid_row(rows)
369
+ skip_rows = rows_to_skip(rows)
370
+ column_keys = get_column_indices(rows).keys
452
371
  row_nr = 0
453
- worksheet.each() {
454
- |row|
372
+ rows.each { |row|
455
373
  row_nr += 1
456
- next if row_nr <= skipRows
374
+ next if row_nr <= skip_rows
457
375
  break unless row
458
- if row.size < column_keys.size/2
459
- $stdout.puts "Data missing in \n(line " + (row_nr).to_s + "): " + row.join(", ").to_s + "\n"
376
+ if row.size < column_keys.size / 2
377
+ $stdout.puts "Data missing in \n(line " + row_nr.to_s + "): " + row.join(", ").to_s + "\n"
460
378
  next
461
379
  end
462
- next if (cell(row, column_keys.index(:production_science)) == 'Tierarzneimittel')
380
+ next if cell(row, column_keys.index(:production_science)) == "Tierarzneimittel"
463
381
  row[column_keys.index(:iksnr)] = "%05i" % cell(row, column_keys.index(:iksnr)).to_i
464
382
  row[column_keys.index(:seqnr)] = "%02i" % cell(row, column_keys.index(:seqnr)).to_i
465
383
  row[column_keys.index(:ikscd)] = "%03i" % cell(row, column_keys.index(:ikscd)).to_i
@@ -467,19 +385,15 @@ class SwissmedicDiff
467
385
  }
468
386
  end
469
387
 
470
- def rows_to_skip(spreadsheet)
471
- # Packungen.xls of swissmedic before October 2013 had 3 leading rows
472
- # Packungen.xls of swissmedic after October 2013 have 4 leading rows
388
+ def rows_to_skip(rows)
473
389
  j = 0
474
- while true
475
- cell = spreadsheet.worksheet(0).row(j)[0]
476
- cell = cell.value if cell.is_a?(RubyXL::Cell)
477
- break if cell.respond_to?(:to_i) and cell.to_i != 0
390
+ loop do
391
+ cell = rows[j][0]
392
+ break if cell.respond_to?(:to_i) && cell.to_i != 0
478
393
  j += 1
479
394
  end
480
395
  j
481
396
  end
482
-
483
397
  end
484
398
  include Diff
485
399
  end
data/lib/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  class SwissmedicDiff
2
- VERSION = '0.2.9'
2
+ VERSION = "0.3.1"
3
3
  end
@@ -0,0 +1,28 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # SwissmedicPluginTest -- oddb.org -- 18.03.2008 -- hwyss@ywesee.com
4
+
5
+ $: << File.expand_path("../lib", File.dirname(__FILE__))
6
+
7
+ require "minitest/autorun"
8
+ require "tempfile"
9
+ require "open3"
10
+
11
+ module ODDB
12
+ class SwissmedicPluginBinTest < Minitest::Test
13
+ def test_running_binary
14
+ cmd = "bundle exec ruby bin/swissmedic-diff test/data/Packungen-2025.07.01.xlsx test/data/Packungen-2019.03.06.xlsx"
15
+ stdout, stderr, status = Open3.capture3(cmd)
16
+ puts "Output: #{stdout}"
17
+ puts "Error: #{stderr}" if stderr
18
+ puts "Exit Status: #{status.exitstatus}"
19
+ assert_equal(0, status.exitstatus)
20
+ lines = stdout.split("\n")
21
+ assert_equal(40, lines.size)
22
+ found = lines.find { |line| line.eql?("+ 16105: Hirudoid, Creme") }
23
+ assert(found, "Must find + 16105: Hirudoid, Creme")
24
+ found = lines.find { |line| line.eql?("> 44447: Lopresor Retard 200, Divitabs; Namensänderung (Lopresor Retard 200, Divitabs), Heilmittelcode (Synthetika human), Ablaufdatum der Zulassung (22.12.2019), Wirkstoffe (metoprololi tartras (2:1)), Zusammensetzung (metoprololi tartras (2:1) 200 mg, excipiens pro compresso obducto.)") }
25
+ assert(found, "must found Ablaufdatum")
26
+ end
27
+ end
28
+ end