swissmedic-diff 0.2.9 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,164 +1,114 @@
1
1
  #!/usr/bin/env ruby
2
- # encoding: utf-8
3
2
  # SwissmedicDiff -- swissmedic-diff -- 27.03.2008 -- hwyss@ywesee.com
4
3
 
5
- require 'ostruct'
6
- require 'spreadsheet'
7
- require 'rubyXL'
8
- require 'pp'
9
- require File.join(File.dirname(__FILE__), 'version.rb')
10
-
11
- # add some monkey patches for Spreadsheet and rubyXL
12
- require File.join(File.dirname(__FILE__), 'compatibility.rb')
4
+ require "ostruct"
5
+ require "simple_xlsx_reader"
6
+ require "pp"
7
+ require File.join(File.dirname(__FILE__), "version.rb")
13
8
 
14
9
  #= diff command (compare two xls fles) for swissmedic xls file.
15
10
  #
16
- #Compares two Excel Documents provided by Swissmedic and displays the
17
- #salient differences. Also: Find out what Products have changed on the
18
- #swiss healthcare market.
11
+ # Compares two Excel Documents provided by Swissmedic and displays the
12
+ # salient differences. Also: Find out what Products have changed on the
13
+ # swiss healthcare market.
19
14
  #
20
- #Authors:: Hannes Wyss (hwyss@ywesee.com), Masaomi Hatakeyama (mhatakeyama@ywesee.com)
21
- #Version:: 0.1.4 2013-10-16 commit c30af5c15f6b8101f8f84cb482dfd09ab20729d6
22
- #Copyright:: Copyright (C) ywesee GmbH, 2010. All rights reserved.
23
- #License:: GPLv2.0 Compliance
24
- #Source:: http://scm.ywesee.com/?p=swissmedic-diff/.git;a=summary
15
+ # Authors:: Hannes Wyss (hwyss@ywesee.com), Masaomi Hatakeyama (mhatakeyama@ywesee.com)
16
+ # Version:: 0.1.4 2013-10-16 commit c30af5c15f6b8101f8f84cb482dfd09ab20729d6
17
+ # Copyright:: Copyright (C) ywesee GmbH, 2010. All rights reserved.
18
+ # License:: GPLv2.0 Compliance
19
+ # Source:: http://scm.ywesee.com/?p=swissmedic-diff/.git;a=summary
25
20
  class SwissmedicDiff
26
- VALUE_UNLIMITED = nil
27
- REGEXP_UNLIMITED = /unbegrenzt/i
21
+ VALUE_UNLIMITED = nil
22
+ REGEXP_UNLIMITED = /unbegrenzt/i
28
23
 
29
24
  module Diff
30
- COLUMNS_2014 = {
31
- :iksnr => /Zulassungs-Nummer/i, # column-nr: 0
32
- :seqnr => /Dosistärke-nummer|^Sequenz$/i,
33
- :name_base => /Präparatebezeichnung|^Sequenzname$/i,
34
- :company => /Zulassungsinhaberin/i,
35
- :index_therapeuticus => /IT-Nummer/i,
36
- :atc_class => /ATC-Code/i, # column-nr: 5
37
- :production_science => /Heilmittelcode/i,
38
- :registration_date => /Erstzul.datum Präp./i,
39
- :sequence_date => /Zul.datum Dosisstärke *|Zul.datum Sequenz/i,
40
- :expiry_date => /Gültigkeits-datum */i,
41
- :ikscd => /Verpackungs ID/i, # column-nr: 10
42
- :size => /Packungsgrösse/i,
43
- :unit => /Einheit/i,
44
- :ikscat => /Abgabekategorie/i,
45
- :substances => /Wirkstoff/i,
46
- :composition => /Zusammensetzung/i, # column-nr: 15
47
- :indication_registration => /Anwendungsgebiet Präparate/i,
48
- :indication_sequence => /Anwendungsgebiet Dosisstärke|Anwendungsgebiet Sequenz/i,
49
- }
50
-
51
- COLUMNS_OLD = [ :iksnr, :seqnr, :name_base, :company,
52
- :index_therapeuticus, :atc_class, :production_science,
53
- :registration_date, :sequence_date, :expiry_date, :ikscd,
54
- :size, :unit, :ikscat, :substances, :composition,
55
- :indication_registration, :indication_sequence ]
25
+ COLUMNS_OLD = [:iksnr, :seqnr, :name_base, :company,
26
+ :index_therapeuticus, :atc_class, :production_science,
27
+ :registration_date, :sequence_date, :expiry_date, :ikscd,
28
+ :size, :unit, :ikscat, :substances, :composition,
29
+ :indication_registration, :indication_sequence]
56
30
 
57
- COLUMNS_JULY_2015 = {
58
- :iksnr => /Zulassungs-Nummer/i, # column-nr: 0
59
- :seqnr => /Dosis+tärke-nummer/i,
60
- :name_base => /Präparatebezeichnung/i,
61
- :company => /Zulassungsinhaberin/i,
62
- :production_science => /Heilmittelcode/i,
63
- :index_therapeuticus => /IT-Nummer/i, # column-nr: 5
64
- :atc_class => /ATC-Code/i,
65
- :registration_date => /Erstzulassungs-datum./i,
66
- :sequence_date => /Zul.datum Dosisstärke/i,
67
- :expiry_date => /Gültigkeitsdauer der Zulassung/i,
68
- :ikscd => /Packungscode/i, # column-nr: 10
69
- :size => /Packungsgrösse/i,
70
- :unit => /Einheit/i,
71
- :ikscat => /Abgabekategorie Packung/i,
72
- :ikscat_seq => /Abgabekategorie Dosisstärke/i,
73
- :ikscat_preparation => /Abgabekategorie Präparat/i, # column-nr: 15
74
- :substances => /Wirkstoff/i,
75
- :composition => /Zusammensetzung/i,
76
- :indication_registration => /Anwendungsgebiet Präparat/i,
77
- :indication_sequence => /Anwendungsgebiet Dosisstärke/i,
78
- :gen_production => /Gentechnisch hergestellte Wirkstoffe/i, # column-nr 20
79
- :insulin_category => /Kategorie bei Insulinen/i,
31
+ COLUMNS_FEBRUARY_2019 = {
32
+ iksnr: /Zulassungs-Nummer/i, # column-nr: 0
33
+ seqnr: /Dosisstärke-nummer/i,
34
+ name_base: /Bezeichnung des Arzneimittels/i,
35
+ company: /Zulassungsinhaberin/i,
36
+ production_science: /Heilmittelcode/i,
37
+ index_therapeuticus: /IT-Nummer/i, # column-nr: 5
38
+ atc_class: /ATC-Code/i,
39
+ registration_date: /Erstzul.datum Arzneimittel/i,
40
+ sequence_date: /Zul.datum Dosisstärke/i,
41
+ expiry_date: /Gültigkeitsdauer der Zulassung/i,
42
+ ikscd: /Packungscode/i, # column-nr: 10
43
+ size: /Packungsgrösse/i,
44
+ unit: /Einheit/i,
45
+ ikscat: /Abgabekategorie Packung/i,
46
+ ikscat_seq: /Abgabekategorie Dosisstärke/i,
47
+ ikscat_preparation: /Abgabekategorie Arzneimittel/i, # column-nr: 15
48
+ substances: /Wirkstoff/i,
49
+ composition: /Zusammensetzung/i,
50
+ composition_AMZV: /Volldeklaration rev. AMZV umgesetzt/i,
51
+ indication_registration: /Anwendungsgebiet Arzneimittel/i,
52
+ indication_sequence: /Anwendungsgebiet Dosisstärke/i, # column-nr 20
53
+ gen_production: /Gentechnisch hergestellte Wirkstoffe/i,
54
+ insulin_category: /Kategorie bei Insulinen/i,
80
55
  # swissmedi corrected in february 2018 the typo betäubunsmittel to betäubungsmittel-
81
- :drug_index => /Verz. bei betäubun.*smittel-haltigen Präparaten/i,
82
- }
83
-
84
- COLUMNS_FEBRUARY_2019= {
85
- :iksnr => /Zulassungs-Nummer/i, # column-nr: 0
86
- :seqnr => /Dosisstärke-nummer/i,
87
- :name_base => /Bezeichnung des Arzneimittels/i,
88
- :company => /Zulassungsinhaberin/i,
89
- :production_science => /Heilmittelcode/i,
90
- :index_therapeuticus => /IT-Nummer/i, # column-nr: 5
91
- :atc_class => /ATC-Code/i,
92
- :registration_date => /Erstzul.datum Arzneimittel/i,
93
- :sequence_date => /Zul.datum Dosisstärke/i,
94
- :expiry_date => /Gültigkeitsdauer der Zulassung/i,
95
- :ikscd => /Packungscode/i, # column-nr: 10
96
- :size => /Packungsgrösse/i,
97
- :unit => /Einheit/i,
98
- :ikscat => /Abgabekategorie Packung/i,
99
- :ikscat_seq => /Abgabekategorie Dosisstärke/i,
100
- :ikscat_preparation => /Abgabekategorie Arzneimittel/i, # column-nr: 15
101
- :substances => /Wirkstoff/i,
102
- :composition => /Zusammensetzung/i,
103
- :composition_AMZV => /Volldeklaration rev. AMZV umgesetzt/i,
104
- :indication_registration => /Anwendungsgebiet Arzneimittel/i,
105
- :indication_sequence => /Anwendungsgebiet Dosisstärke/i, # column-nr 20
106
- :gen_production => /Gentechnisch hergestellte Wirkstoffe/i,
107
- :insulin_category => /Kategorie bei Insulinen/i,
108
- # swissmedi corrected in february 2018 the typo betäubunsmittel to betäubungsmittel-
109
- :drug_index => /Verz. bei betäubungsmittel-haltigen Arzneimittel/i,
56
+ drug_index: /Verz. bei betäubungsmittel-haltigen Arzneimittel/i
110
57
  }
111
58
  FLAGS = {
112
- :new => 'Neues Produkt',
113
- :name_base => 'Namensänderung',
114
- :ikscat => 'Abgabekategorie',
115
- :index_therapeuticus => 'Index Therapeuticus',
116
- :indication_registration => 'Anwendungsgebiet Präparate',
117
- :indication_sequence => 'Anwendungsgebiet Sequenz',
118
- :company => 'Zulassungsinhaber',
119
- :composition => 'Zusammensetzung',
120
- :sequence => 'Packungen',
121
- :size => 'Packungsgrösse',
122
- :expiry_date => 'Ablaufdatum der Zulassung',
123
- :registration_date => 'Erstzulassungsdatum',
124
- :sequence_date => 'Zulassungsdatum Sequenz',
125
- :delete => 'Das Produkt wurde gelöscht',
126
- :replaced_package => 'Packungs-Nummer',
127
- :substances => 'Wirkstoffe',
128
- :production_science => 'Heilmittelcode',
129
- :atc_class => 'ATC-Code',
59
+ new: "Neues Produkt",
60
+ name_base: "Namensänderung",
61
+ ikscat: "Abgabekategorie",
62
+ index_therapeuticus: "Index Therapeuticus",
63
+ indication_registration: "Anwendungsgebiet Präparate",
64
+ indication_sequence: "Anwendungsgebiet Sequenz",
65
+ company: "Zulassungsinhaber",
66
+ composition: "Zusammensetzung",
67
+ sequence: "Packungen",
68
+ size: "Packungsgrösse",
69
+ expiry_date: "Ablaufdatum der Zulassung",
70
+ registration_date: "Erstzulassungsdatum",
71
+ sequence_date: "Zulassungsdatum Sequenz",
72
+ delete: "Das Produkt wurde gelöscht",
73
+ replaced_package: "Packungs-Nummer",
74
+ substances: "Wirkstoffe",
75
+ production_science: "Heilmittelcode",
76
+ atc_class: "ATC-Code"
130
77
  }
131
78
  GALFORM_P = %r{excipiens\s+(ad|pro)\s+(?<galform>((?!\bpro\b)[^.])+)}
132
- DATE_FORMAT = '%Y:%m:%d'
79
+ DATE_FORMAT = "%Y:%m:%d"
133
80
 
134
81
  def capitalize(string)
135
- string.split(/\s+/).collect { |word| word.capitalize }.join(' ')
82
+ string.split(/\s+/).collect { |word| word.capitalize }.join(" ")
136
83
  end
84
+
137
85
  def cell(row, pos)
138
- if(cell = row[pos])
86
+ if (cell = row[pos])
139
87
  cell.to_s
140
88
  end
141
89
  end
90
+
142
91
  def describe(diff, iksnr)
143
92
  sprintf("%s: %s", iksnr, name(diff, iksnr))
144
93
  end
94
+
145
95
  def describe_flag(diff, iksnr, flag)
146
96
  txt = FLAGS.fetch(flag, flag)
147
97
  case flag
148
98
  when :sequence
149
99
  when :replaced_package
150
100
  pairs = diff.newest_rows[iksnr].collect { |rep, row|
151
- if(old = diff.replacements[row])
152
- [old, rep].join(' -> ')
101
+ if (old = diff.replacements[row])
102
+ [old, rep].join(" -> ")
153
103
  end
154
104
  }.compact
155
- sprintf "%s (%s)", txt, pairs.join(',')
105
+ sprintf "%s (%s)", txt, pairs.join(",")
156
106
  when :registration_date, :expiry_date
157
107
  row = diff.newest_rows[iksnr].sort.first.last
158
- if row[COLUMNS_2014.keys.index(flag)].value.to_s.match(REGEXP_UNLIMITED)
159
- sprintf "%s (%s)", txt, 'unbegrenzt'
108
+ if row[COLUMNS_2014.keys.index(flag)].to_s.match(REGEXP_UNLIMITED)
109
+ sprintf "%s (%s)", txt, "unbegrenzt"
160
110
  else
161
- sprintf "%s (%s)", txt, row[COLUMNS_2014.keys.index(flag)].value.strftime('%d.%m.%Y')
111
+ sprintf "%s (%s)", txt, row[COLUMNS_2014.keys.index(flag)].strftime("%d.%m.%Y")
162
112
  end
163
113
  else
164
114
  row = diff.newest_rows[iksnr].sort.first.last
@@ -168,11 +118,11 @@ class SwissmedicDiff
168
118
 
169
119
  #=== Comparison two Excel files
170
120
  #
171
- #_target_:: new file path (String)
172
- #_latest_:: old file path (String)
173
- #_ignore_:: columns not to be compared (Symbol)
121
+ # _target_:: new file path (String)
122
+ # _latest_:: old file path (String)
123
+ # _ignore_:: columns not to be compared (Symbol)
174
124
  #
175
- #return :: differences (OpenStruct class)
125
+ # return :: differences (OpenStruct class)
176
126
  def diff(target, latest, ignore = [])
177
127
  replacements = {}
178
128
  known_regs, known_seqs, known_pacs, newest_rows = known_data(latest)
@@ -181,17 +131,17 @@ class SwissmedicDiff
181
131
  @diff.updates = updates = []
182
132
  @diff.changes = changes = {}
183
133
  @diff.newest_rows = newest_rows
184
- Spreadsheet.client_encoding = 'UTF-8'
185
- tbook = Spreadsheet.open(target)
134
+ t_rows = SimpleXlsxReader.open(target).sheets.first.rows.slurp
186
135
  idx, prr, prp = nil
187
136
  multiples = {}
188
- @latest_keys = get_column_indices(Spreadsheet.open(latest)).keys
189
- @target_keys = get_column_indices(tbook).keys
190
- each_valid_row(tbook) { |row|
137
+ l_rows = SimpleXlsxReader.open(latest).sheets.first.rows.slurp
138
+ @latest_keys = get_column_indices(l_rows).keys
139
+ @target_keys = get_column_indices(t_rows).keys
140
+ each_valid_row(t_rows) { |row|
191
141
  iksnr = cell(row, @target_keys.index(:iksnr))
192
142
  seqnr = cell(row, @target_keys.index(:seqnr))
193
143
  pacnr = cell(row, @target_keys.index(:ikscd))
194
- (multiples[iksnr] ||= {})
144
+ multiples[iksnr] ||= {}
195
145
  if prr == iksnr && prp == pacnr
196
146
  idx += 1
197
147
  elsif previous = multiples[iksnr][pacnr]
@@ -206,52 +156,54 @@ class SwissmedicDiff
206
156
  row[@target_keys.size] = idx
207
157
  (newest_rows[iksnr] ||= {})[pacnr] = row
208
158
  multiples[iksnr][pacnr] = row
209
- if(other = known_regs.delete([iksnr]))
210
- changes[iksnr] ||= []
159
+ changes[iksnr] ||= if known_regs.delete([iksnr])
160
+ []
211
161
  else
212
- changes[iksnr] ||= [:new]
162
+ [:new]
213
163
  end
214
164
  known_seqs.delete([iksnr, seqnr])
215
- if(other = known_pacs.delete([iksnr, pacnr, idx]))
165
+ if (other = known_pacs.delete([iksnr, pacnr, idx]))
216
166
  flags = rows_diff(row, other, ignore)
217
167
  (changes[iksnr].concat flags).uniq!
218
168
  updates.push row unless flags.empty?
219
169
  else
220
- replacements.store [ iksnr, seqnr, cell(row, @target_keys.index(:size)),
221
- cell(row, @target_keys.index(:unit)) ], row
170
+ replacements.store [iksnr, seqnr, cell(row, @target_keys.index(:size)),
171
+ cell(row, @target_keys.index(:unit))], row
222
172
  flags = changes[iksnr]
223
- flags.push(:sequence).uniq! unless(flags.include? :new)
173
+ flags.push(:sequence).uniq! unless flags.include? :new
224
174
  news.push row
225
175
  end
226
176
  }
227
177
  @diff.replacements = reps = {}
228
178
  known_pacs.each { |(iksnr, pacnr), row|
229
- key = [iksnr, '%02i' % cell(row, @target_keys.index(:seqnr)).to_i,
230
- cell(row, @target_keys.index(:size)), cell(row, @target_keys.index(:unit))]
231
- if(rep = replacements[key])
179
+ key = [iksnr, "%02i" % cell(row, @target_keys.index(:seqnr)).to_i,
180
+ cell(row, @target_keys.index(:size)), cell(row, @target_keys.index(:unit))]
181
+ if (rep = replacements[key])
232
182
  changes[iksnr].push :replaced_package
233
183
  reps.store rep, pacnr
234
184
  end
235
185
  }
236
- known_regs.each_key { |(iksnr,_)| changes[iksnr] = [:delete] }
186
+ known_regs.each_key { |(iksnr, _)| changes[iksnr] = [:delete] }
237
187
  changes.delete_if { |iksnr, flags| flags.empty? }
238
188
  @diff.package_deletions = known_pacs.collect { |key, row|
239
189
  ## the keys in known_pacs don't include the sequence number (which
240
190
  # would prevent us from properly recognizing multi-sequence-Packages),
241
191
  # so we need complete the path to the package now
242
- key[1,0] = '%02i' % cell(row, @target_keys.index(:seqnr)).to_i
192
+ key[1, 0] = "%02i" % cell(row, @target_keys.index(:seqnr)).to_i
243
193
  key
244
194
  }
245
195
  @diff.sequence_deletions = known_seqs.keys
246
196
  @diff.registration_deletions = known_regs.keys
247
197
  @diff
248
198
  end
199
+
249
200
  def format_flags(flags)
250
201
  flags.delete(:revision)
251
202
  flags.collect { |flag|
252
203
  "- %s\n" % FLAGS.fetch(flag, "Unbekannt (#{flag})")
253
204
  }.compact.join
254
205
  end
206
+
255
207
  def known_data(latest)
256
208
  known_regs = {}
257
209
  known_seqs = {}
@@ -260,12 +212,15 @@ class SwissmedicDiff
260
212
  _known_data latest, known_regs, known_seqs, known_pacs, newest_rows
261
213
  [known_regs, known_seqs, known_pacs, newest_rows]
262
214
  end
215
+
263
216
  def _known_data(latest, known_regs, known_seqs, known_pacs, newest_rows)
264
- lbook = Spreadsheet.open(latest)
265
- @latest_keys = get_column_indices(lbook).keys
217
+ sheet = SimpleXlsxReader.open(latest).sheets.first
218
+ rows = sheet.rows.slurp
219
+ @latest_keys = get_column_indices(rows).keys
266
220
  idx, prr, prp = nil
267
221
  multiples = {}
268
- each_valid_row(lbook) { |row|
222
+
223
+ each_valid_row(rows) { |row|
269
224
  iksnr = cell(row, @latest_keys.index(:iksnr))
270
225
  seqnr = cell(row, @latest_keys.index(:seqnr))
271
226
  pacnr = cell(row, @latest_keys.index(:ikscd))
@@ -289,17 +244,18 @@ class SwissmedicDiff
289
244
  (newest_rows[iksnr] ||= {})[pacnr] = row
290
245
  }
291
246
  end
247
+
292
248
  def name(diff, iksnr)
293
249
  rows = diff.newest_rows[iksnr]
294
250
  row = rows.sort.first.last
295
251
  cell(row, COLUMNS_2014.keys.index(:name_base))
296
252
  end
253
+
297
254
  def rows_diff(row, other, ignore = [])
298
255
  flags = []
299
- COLUMNS_OLD.each_with_index {
300
- |key, idx|
256
+ COLUMNS_OLD.each_with_index { |key, idx|
301
257
  if !ignore.include?(key)
302
- left = _comparable(key, row, @target_keys.index(key))
258
+ left = _comparable(key, row, @target_keys.index(key))
303
259
  right = _comparable(key, other, @latest_keys.index(key))
304
260
  next if left.is_a?(Date) && right.is_a?(Date) && left.strftime(DATE_FORMAT).eql?(right.strftime(DATE_FORMAT))
305
261
  next if left.is_a?(String) && left.empty? && !right
@@ -316,26 +272,28 @@ class SwissmedicDiff
316
272
  #
317
273
  # This should be called after diff method.
318
274
  #
319
- #_sort_ :: sort key (:group | :name | :registration)
275
+ # _sort_ :: sort key (:group | :name | :registration)
320
276
  #
321
- #return :: difference (String)
322
- def to_s(sort=:group)
277
+ # return :: difference (String)
278
+ def to_s(sort = :group)
323
279
  @diff ||= nil
324
- return '' unless @diff
280
+ return "" unless @diff
325
281
  @diff.changes.sort_by { |iksnr, flags|
326
282
  _sort_by(sort, iksnr, flags)
327
283
  }.collect { |iksnr, flags|
328
- if(flags.include? :new)
284
+ if flags.include? :new
329
285
  "+ " + describe(@diff, iksnr)
330
- elsif(flags.include? :delete)
286
+ elsif flags.include? :delete
331
287
  "- " + describe(@diff, iksnr)
332
288
  else
333
289
  "> " + describe(@diff, iksnr) << "; " \
334
- + flags.collect { |flag| describe_flag(@diff, iksnr, flag)
335
- }.compact.join(", ")
290
+ + flags.collect { |flag|
291
+ describe_flag(@diff, iksnr, flag)
292
+ }.compact.join(", ")
336
293
  end
337
294
  }.join("\n")
338
295
  end
296
+
339
297
  def _sort_by(sort, iksnr, flags)
340
298
  case sort
341
299
  when :name
@@ -343,24 +301,25 @@ class SwissmedicDiff
343
301
  when :registration
344
302
  iksnr
345
303
  else
346
- weight = if(flags.include? :new)
347
- 0
348
- elsif(flags.include? :delete)
349
- 1
350
- else
351
- 2
352
- end
304
+ weight = if flags.include? :new
305
+ 0
306
+ elsif flags.include? :delete
307
+ 1
308
+ else
309
+ 2
310
+ end
353
311
  [weight, iksnr]
354
312
  end
355
313
  end
314
+
356
315
  def _comparable(key, row, idx)
357
- if cell = row[idx]
316
+ if row[idx]
358
317
  case key
359
318
  when :registration_date, :expiry_date
360
- if row[idx] && row[idx].value && REGEXP_UNLIMITED.match(row[idx].value.to_s)
319
+ if row[idx] && row[idx] && REGEXP_UNLIMITED.match(row[idx].to_s)
361
320
  VALUE_UNLIMITED # Date.new(2099,12,31)
362
321
  else
363
- row[idx] && row[idx].value ? row[idx].value.to_date : nil
322
+ row[idx]
364
323
  end
365
324
  when :seqnr
366
325
  sprintf "%02i", cell(row, idx).to_i
@@ -370,68 +329,21 @@ class SwissmedicDiff
370
329
  end
371
330
  end
372
331
 
373
- def get_column_indices(spreadsheet)
374
- error_2014 = nil
375
- filename = spreadsheet.root.respond_to?(:filepath) ? spreadsheet.root.filepath : 'unknown'
376
- headerRowId = rows_to_skip(spreadsheet)-1
377
- row = spreadsheet.worksheet(0)[headerRowId]
378
-
379
- COLUMNS_2014.each{
380
- |key, value|
381
- header_name = row[COLUMNS_2014.keys.index(key)].value
332
+ def get_column_indices(rows)
333
+ headerRowId = rows_to_skip(rows) - 1
334
+ rows[headerRowId]
335
+ row = rows[5] # Headers are found at row 5 since February 5
336
+ 0.upto(COLUMNS_FEBRUARY_2019.size - 1).each { |idx| puts "#{idx}: #{row[idx]}" } if $VERBOSE
337
+ COLUMNS_FEBRUARY_2019.each { |key, value|
338
+ header_name = row[COLUMNS_FEBRUARY_2019.keys.index(key)].to_s
382
339
  unless value.match(header_name)
383
- puts "#{__LINE__}: #{key} -> #{COLUMNS_2014.keys.index(key)} #{value}\nbut was #{header_name}" if $VERBOSE
384
- error_2014 = "#{filename}_has_unexpected_column_#{COLUMNS_2014.keys.index(key)}_#{key}_#{value.to_s}_but_was_#{header_name}"
340
+ puts "#{__LINE__}: #{key} -> #{COLUMNS_FEBRUARY_2019.keys.index(key)} #{value}\nbut was #{header_name}" if $VERBOSE
385
341
  break
386
342
  end
387
343
  }
388
- return COLUMNS_2014 unless error_2014
389
- error_2015 = nil
390
- COLUMNS_JULY_2015.each{
391
- |key, value|
392
- header_name = row[COLUMNS_JULY_2015.keys.index(key)].value
393
- unless value.match(header_name)
394
- puts "#{__LINE__}: #{key} -> #{COLUMNS_JULY_2015.keys.index(key)} #{value}\nbut was #{header_name}" if $VERBOSE
395
- error_2015 = "#{filename}_has_unexpected_column_#{COLUMNS_JULY_2015.keys.index(key)}_#{key}_#{value.to_s}_but_was_#{header_name}"
396
- break
397
- end
398
- }
399
- unless error_2015
400
- idx14 = COLUMNS_2014.keys.index(:name_base)
401
- idx15 = COLUMNS_2014.keys.index(:name_base)
402
- if (idx14 != idx15)
403
- raise ":name_base must be same index in COLUMNS_JULY_2015 and COLUMNS_2014. Is #{idx14} and #{idx15}"
404
- end
405
- return COLUMNS_JULY_2015
406
- end
407
- row = spreadsheet.worksheet(0)[5] # Headers are found at row 5 since February 5
408
- error_2019 = nil
409
- if spreadsheet.worksheet(0)[5].size != COLUMNS_FEBRUARY_2019.size
410
- raise "#{error_2015}\n#{error_2014}"
411
- else
412
- 0.upto((COLUMNS_FEBRUARY_2019.size) -1).each{ |idx| puts "#{idx}: #{row[idx].value}" } if $VERBOSE
413
- COLUMNS_FEBRUARY_2019.each{
414
- |key, value|
415
- header_name = row[COLUMNS_FEBRUARY_2019.keys.index(key)].value.to_s
416
- unless value.match(header_name)
417
- puts "#{__LINE__}: #{key} -> #{COLUMNS_FEBRUARY_2019.keys.index(key)} #{value}\nbut was #{header_name}" if $VERBOSE
418
- error_2019 = "Packungen.xlslx_has_unexpected_column_#{COLUMNS_FEBRUARY_2019.keys.index(key)}_#{key}_#{value.to_s}_but_was_#{header_name}"
419
- require 'pry'; binding.pry
420
- break
421
- end
422
- }
423
- unless error_2015
424
- idx14 = COLUMNS_2014.keys.index(:name_base)
425
- idx15 = COLUMNS_2014.keys.index(:name_base)
426
- if (idx14 != idx15)
427
- raise ":name_base must be same index in COLUMNS_JULY_2015 and COLUMNS_2014. Is #{idx14} and #{idx15}"
428
- end
429
- return COLUMNS_FEBRUARY_2019
430
- end
431
- raise "#{error_2019}" if error_2019
432
- end
433
- COLUMNS_FEBRUARY_2019
344
+ COLUMNS_FEBRUARY_2019
434
345
  end
346
+
435
347
  #=== iterate over all valid rows of a swissmedic Packungen.xls
436
348
  #
437
349
  # Iterates over all rows, ignoring Tierarzneimittel and
@@ -442,24 +354,22 @@ class SwissmedicDiff
442
354
  # example:
443
355
  # SwissmedicDiff.new.each_valid_row(Spreadsheet.open('path/to/file')) { |x| puts "iksnr #{x[0]}" }
444
356
  #
445
- #_spreadsheet_:: spreadsheet to operate on
357
+ # rows:: rows to operate on
446
358
  #
447
- #return ::
448
- def each_valid_row(spreadsheet)
449
- skipRows = rows_to_skip(spreadsheet)
450
- column_keys = get_column_indices(spreadsheet).keys
451
- worksheet = spreadsheet.worksheet(0)
359
+ # return ::
360
+ def each_valid_row(rows)
361
+ skipRows = rows_to_skip(rows)
362
+ column_keys = get_column_indices(rows).keys
452
363
  row_nr = 0
453
- worksheet.each() {
454
- |row|
364
+ rows.each { |row|
455
365
  row_nr += 1
456
366
  next if row_nr <= skipRows
457
367
  break unless row
458
- if row.size < column_keys.size/2
459
- $stdout.puts "Data missing in \n(line " + (row_nr).to_s + "): " + row.join(", ").to_s + "\n"
368
+ if row.size < column_keys.size / 2
369
+ $stdout.puts "Data missing in \n(line " + row_nr.to_s + "): " + row.join(", ").to_s + "\n"
460
370
  next
461
371
  end
462
- next if (cell(row, column_keys.index(:production_science)) == 'Tierarzneimittel')
372
+ next if cell(row, column_keys.index(:production_science)) == "Tierarzneimittel"
463
373
  row[column_keys.index(:iksnr)] = "%05i" % cell(row, column_keys.index(:iksnr)).to_i
464
374
  row[column_keys.index(:seqnr)] = "%02i" % cell(row, column_keys.index(:seqnr)).to_i
465
375
  row[column_keys.index(:ikscd)] = "%03i" % cell(row, column_keys.index(:ikscd)).to_i
@@ -467,19 +377,15 @@ class SwissmedicDiff
467
377
  }
468
378
  end
469
379
 
470
- def rows_to_skip(spreadsheet)
471
- # Packungen.xls of swissmedic before October 2013 had 3 leading rows
472
- # Packungen.xls of swissmedic after October 2013 have 4 leading rows
380
+ def rows_to_skip(rows)
473
381
  j = 0
474
382
  while true
475
- cell = spreadsheet.worksheet(0).row(j)[0]
476
- cell = cell.value if cell.is_a?(RubyXL::Cell)
383
+ cell = rows[j][0]
477
384
  break if cell.respond_to?(:to_i) and cell.to_i != 0
478
385
  j += 1
479
386
  end
480
387
  j
481
388
  end
482
-
483
389
  end
484
390
  include Diff
485
391
  end
data/lib/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  class SwissmedicDiff
2
- VERSION = '0.2.9'
2
+ VERSION = "0.3.0"
3
3
  end