gtin2atc 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,252 @@
1
+ # encoding: utf-8
2
+ # Copied and only using BAG SwissIndex from oddb2xml/downloader.rb
3
+ require 'mechanize'
4
+ require 'zip'
5
+ require 'savon'
6
+
7
+ module Gtin2atc
8
+ module DownloadMethod
9
+ private
10
+ def download_as(file, option='r')
11
+ tempFile = File.join(WorkDir, File.basename(file))
12
+ file2save = File.join(Util.get_archive, File.basename(file))
13
+ Gtin2atc.log "download_as file #{file2save} via #{tempFile} from #{@url}"
14
+ data = nil
15
+ FileUtils.rm_f(tempFile, :verbose => false)
16
+ if Gtin2atc.skip_download(file)
17
+ io = File.open(file, option)
18
+ data = io.read
19
+ else
20
+ begin
21
+ response = @agent.get(@url)
22
+ response.save_as(file)
23
+ response = nil # win
24
+ io = File.open(file, option)
25
+ data = io.read
26
+ rescue Timeout::Error, Errno::ETIMEDOUT
27
+ retrievable? ? retry : raise
28
+ ensure
29
+ io.close if io and !io.closed? # win
30
+ Gtin2atc.download_finished(tempFile)
31
+ end
32
+ end
33
+ return data
34
+ end
35
+ end
36
+ class Downloader
37
+ attr_reader :type
38
+ def initialize(options={}, url=nil)
39
+ @options = options
40
+ @url = url
41
+ @retry_times = 3
42
+ HTTPI.log = false # disable httpi warning
43
+ Gtin2atc.log "Downloader from #{@url} for #{self.class}"
44
+ init
45
+ end
46
+ def init
47
+ @agent = Mechanize.new
48
+ @agent.user_agent = 'Mozilla/5.0 (X11; Linux x86_64; rv:16.0) Gecko/20100101 Firefox/16.0'
49
+ @agent.redirect_ok = true
50
+ @agent.redirection_limit = 5
51
+ @agent.follow_meta_refresh = true
52
+ if RUBY_PLATFORM =~ /mswin|mingw|bccwin|cygwin/i and
53
+ ENV['SSL_CERT_FILE'].nil?
54
+ cert_store = OpenSSL::X509::Store.new
55
+ cert_store.add_file(File.expand_path('../../../tools/cacert.pem', __FILE__))
56
+ @agent.cert_store = cert_store
57
+ end
58
+ Util.debug_msg "Downloader @agent ist #{@agent}"
59
+ end
60
+ protected
61
+ def retrievable?
62
+ if @retry_times > 0
63
+ sleep 5
64
+ @retry_times -= 1
65
+ true
66
+ else
67
+ false
68
+ end
69
+ end
70
+ def read_xml_from_zip(target, zipfile)
71
+ Gtin2atc.log "read_xml_from_zip target is #{target} zip: #{zipfile} #{File.exists?(zipfile)}"
72
+ entry = nil
73
+ Dir.glob(File.join(Util.get_archive, '*')).each { |name| if target.match(name) then entry = name; break end }
74
+ if entry
75
+ dest = "#{Util.get_archive}/#{File.basename(entry)}"
76
+ if File.exists?(dest)
77
+ Gtin2atc.log "read_xml_from_zip return content of #{dest} #{File.size(dest)} bytes "
78
+ return IO.read(dest)
79
+ else
80
+ Gtin2atc.log "read_xml_from_zip could not read #{dest}"
81
+ end
82
+ else
83
+ Gtin2atc.log "read_xml_from_zip could not find #{target.to_s}"
84
+ end
85
+ xml = ''
86
+ if RUBY_PLATFORM =~ /mswin|mingw|bccwin|cygwin/i
87
+ Zip::File.open(zipfile) do |zipFile|
88
+ zipFile.each do |entry|
89
+ if entry.name =~ target
90
+ Gtin2atc.log "read_xml_from_zip reading #{__LINE__}: #{entry.name}"
91
+ io = entry.get_input_stream
92
+ until io.eof?
93
+ bytes = io.read(1024)
94
+ xml << bytes
95
+ bytes = nil
96
+ end
97
+ io.close if io.respond_to?(:close)
98
+ dest = "#{Util.get_archive}/#{File.basename(entry.name)}"
99
+ File.open(dest, 'w+') { |f| f.write xml }
100
+ Gtin2atc.log "read_xml_from_zip saved as #{dest}"
101
+ end
102
+ end
103
+ end
104
+ else
105
+ Zip::File.foreach(zipfile) do |entry|
106
+ if entry.name =~ target
107
+ Gtin2atc.log "read_xml_from_zip #{__LINE__}: reading #{entry.name}"
108
+ dest = "#{Util.get_archive}/#{File.basename(entry.name)}"
109
+ entry.get_input_stream { |io| xml = io.read }
110
+ File.open(dest, 'w+') { |f| f.write xml }
111
+ Gtin2atc.log "read_xml_from_zip saved as #{dest}"
112
+ end
113
+ end
114
+ end
115
+ xml
116
+ end
117
+ end
118
+ class BagXmlDownloader < Downloader
119
+ def init
120
+ super
121
+ @url ||= 'http://bag.e-mediat.net/SL2007.Web.External/File.axd?file=XMLPublications.zip'
122
+ end
123
+ def origin
124
+ @url
125
+ end
126
+ def download
127
+ file = File.join(WorkDir, 'XMLPublications.zip')
128
+ Gtin2atc.log "BagXmlDownloader #{__LINE__}: #{file} from #{@url}"
129
+ if File.exists?(file) and diff_hours = ((Time.now-File.ctime(file)).to_i/3600) and diff_hours < 24
130
+ Util.debug_msg "Skip download of #{file} as only #{diff_hours} hours old"
131
+ else
132
+ FileUtils.rm_f(file, :verbose => true)
133
+ begin
134
+ response = @agent.get(@url)
135
+ response.save_as(file)
136
+ response = nil # win
137
+ rescue Timeout::Error, Errno::ETIMEDOUT
138
+ retrievable? ? retry : raise
139
+ ensure
140
+ Gtin2atc.download_finished(file)
141
+ end
142
+ end
143
+ content = read_xml_from_zip(/Preparations.xml/, File.join(Util.get_archive, File.basename(file)))
144
+ content
145
+ end
146
+ end
147
+ class SwissmedicDownloader < Downloader
148
+ def initialize(type=:orphan)
149
+ @type = :package
150
+ @xpath = "//div[@id='sprungmarke10_7']//a[@title='Excel-Version Zugelassene Verpackungen*']"
151
+ @url = "http://www.swissmedic.ch/arzneimittel/00156/00221/00222/00230/index.html?lang=de"
152
+ super({}, @url)
153
+ end
154
+ def origin
155
+ @url
156
+ end
157
+ def init
158
+ config = {
159
+ :log_level => :info,
160
+ :log => false, # $stdout
161
+ :raise_errors => true,
162
+ :ssl_version => :SSLv3,
163
+ :wsdl => @url
164
+ }
165
+ @client = Savon::Client.new(config)
166
+ end
167
+ def download
168
+ file2save, dated = Gtin2atc::Util.get_latest_and_dated_name("swissmedic_package", '.xlsx')
169
+ if File.exists?(file2save) and diff_hours = ((Time.now-File.ctime(file2save)).to_i/3600) and diff_hours < 24
170
+ Util.debug_msg "Skip download of #{file2save} as only #{diff_hours} hours old"
171
+ return File.expand_path(file2save)
172
+ end
173
+ Util.debug_msg "Must download #{file2save} #{File.expand_path(file2save)}"
174
+ begin
175
+ @agent = Mechanize.new
176
+ page = @agent.get(@url)
177
+ if link_node = page.search(@xpath).first
178
+ link = Mechanize::Page::Link.new(link_node, @agent, page)
179
+ response = link.click
180
+ response.save_as(file2save)
181
+ response = nil # win
182
+ end
183
+ return File.expand_path(file2save)
184
+ rescue Timeout::Error, Errno::ETIMEDOUT
185
+ retrievable? ? retry : raise
186
+ ensure
187
+ Gtin2atc.download_finished(file2save, false)
188
+ end
189
+ return File.expand_path(file2save)
190
+ end
191
+ end
192
+
193
+ class SwissIndexDownloader < Downloader
194
+ def initialize(options={}, type=:pharma, lang='DE')
195
+ @type = (type == :pharma ? 'Pharma' : 'NonPharma')
196
+ @lang = lang
197
+ @url = "https://index.ws.e-mediat.net/Swissindex/#{@type}/ws_#{@type}_V101.asmx?WSDL"
198
+ super(options, @url)
199
+ end
200
+ def origin
201
+ @url
202
+ end
203
+ def init
204
+ config = {
205
+ :log_level => :info,
206
+ :log => false, # $stdout
207
+ :raise_errors => true,
208
+ :ssl_version => :SSLv3,
209
+ :wsdl => @url
210
+ }
211
+ @client = Savon::Client.new(config)
212
+ end
213
+ def download
214
+ begin
215
+ file2save, dated = Gtin2atc::Util.get_latest_and_dated_name("swissindex_#{@type}_#{@lang}", '.xml')
216
+ if File.exists?(file2save) and diff_hours = ((Time.now-File.ctime(file2save)).to_i/3600) and diff_hours < 24
217
+ Util.debug_msg "Skip download of #{file2save} as only #{diff_hours} hours old"
218
+ return IO.read(file2save)
219
+ end
220
+ FileUtils.rm_f(file2save, :verbose => false)
221
+ soap = <<XML
222
+ <?xml version="1.0" encoding="utf-8"?>
223
+ <soap:Envelope xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/">
224
+ <soap:Body>
225
+ <lang xmlns="http://swissindex.e-mediat.net/Swissindex#{@type}_out_V101">#{@lang}</lang>
226
+ </soap:Body>
227
+ </soap:Envelope>
228
+ XML
229
+ response = @client.call(:download_all, :xml => soap)
230
+ if response.success?
231
+ if xml = response.to_xml
232
+ response = nil # win
233
+ FileUtils.makedirs(WorkDir)
234
+ File.open(file2save, 'w+') { |file| file.write xml }
235
+ Util.debug_msg "Swissindex download successful"
236
+ else
237
+ # received broken data or internal error
238
+ raise StandardError
239
+ end
240
+ else
241
+ raise Timeout::Error
242
+ end
243
+ rescue HTTPI::SSLError
244
+ exit # catch me in Cli class
245
+ rescue Timeout::Error, Errno::ETIMEDOUT
246
+ retrievable? ? retry : raise
247
+ end
248
+ Util.debug_msg "Download of #{file2save} finished"
249
+ xml
250
+ end
251
+ end
252
+ end
@@ -0,0 +1,39 @@
1
+ # encoding: utf-8
2
+ require 'optparse'
3
+
4
+ module Gtin2atc
5
+
6
+ class Options
7
+ attr_reader :parser, :opts
8
+ def Options.default_opts
9
+ {
10
+ :log => false,
11
+ :compare => false,
12
+ }
13
+ end
14
+ def Options.help
15
+ <<EOS
16
+ #$0 ver.#{Gtin2atc::VERSION}
17
+ Usage:
18
+ gtin2atc [--compare] [--log] [file_with_gtin or gtin or pharmacode] [gtin..]
19
+ If file_with_gtin is given only the GTIN (or pharamacode) (one per line) is outputted.
20
+ If no file or gtin is given, alle GTIN will be processed.
21
+ --log log important actions
22
+
23
+ --compare download an compare GTIN/ATC_code from BAG, SwissIndex and RefData
24
+ For each GTIN we will output a message if it can be only found in the
25
+ * BAG http://bag.e-mediat.net/SL2007.Web.External/File.axd?file=XMLPublications.zip
26
+ * SwissIndex e-mediat: http://swissindex.e-mediat.net/SwissindexPharma_out_V101
27
+ * or if the ATC_Code does not not match
28
+ -h, --help Show this help message.
29
+ EOS
30
+ end
31
+ def initialize
32
+ @parser = OptionParser.new
33
+ @opts = Options.default_opts
34
+ @parser.on('--log') {|v| @opts[:log] = true }
35
+ @parser.on('--compare') {|v| @opts[:compare] = true }
36
+ @parser.on_tail('-h', '--help') { puts Options.help; exit }
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,47 @@
1
+ require 'date'
2
+ module Gtin2atc
3
+ class Util
4
+ @@archive = File.expand_path(File.join(__FILE__, '../../..'))
5
+ @@today = Date.today
6
+ @@logging = false
7
+ def Util.get_today
8
+ @@today
9
+ end
10
+ def Util.set_archive_dir(archiveDir)
11
+ @@archive = archiveDir
12
+ end
13
+ def Util.get_archive
14
+ @@archive
15
+ end
16
+ def Util.set_logging(default)
17
+ @@logging = default
18
+ end
19
+ def Util.info(msg)
20
+ puts msg
21
+ return unless @@logging
22
+ Util.init
23
+ @@checkLog.puts("#{Time.now}: #{msg}")
24
+ end
25
+ def Util.init
26
+ return unless @@logging
27
+ if not defined?(@@checkLog) or not @@checkLog
28
+ name = File.join(@@archive, 'log.log')
29
+ FileUtils.makedirs(@@archive)
30
+ @@checkLog = File.open(name, 'a+')
31
+ end
32
+ end
33
+ def Util.debug_msg(msg)
34
+ return unless @@logging
35
+ Util.init
36
+ if @@logging or defined?(MiniTest) then $stdout.puts Time.now.to_s + ': ' + msg; $stdout.flush; return end
37
+ @@checkLog.puts("#{Time.now}: #{msg}")
38
+ @@checkLog.flush
39
+ end
40
+ def Util.get_latest_and_dated_name(keyword, extension)
41
+ return File.expand_path(File.join(Util.get_archive, keyword + '-latest' + extension)),
42
+ File.expand_path(File.join(Util.get_archive, Util.get_today.strftime("#{keyword}-%Y.%m.%d" + extension)))
43
+ end
44
+ end
45
+ def Gtin2atc.download_finished(file, remove_file = true)
46
+ end
47
+ end
@@ -0,0 +1,3 @@
1
+ module Gtin2atc
2
+ VERSION = "0.1.0"
3
+ end
@@ -0,0 +1,250 @@
1
+ # Definitions for the sax-machine parse to parse
2
+ # Preparations.xml
3
+
4
+ require 'sax-machine'
5
+
6
+ class PriceElement
7
+ include SAXMachine
8
+ element :Price
9
+ element :ValidFromDate
10
+ element :DivisionDescription
11
+ element :PriceTypeCode
12
+ element :PriceTypeDescriptionDe
13
+ element :PriceTypeDescriptionFr
14
+ element :PriceTypeDescriptionIt
15
+ element :PriceChangeTypeDescriptionDe
16
+ element :PriceChangeTypeDescriptionFr
17
+ element :PriceChangeTypeDescriptionIt
18
+ end
19
+
20
+ class StatusElement
21
+ include SAXMachine
22
+ element :IntegrationDate
23
+ element :ValidFromDate
24
+ element :ValidThruDate
25
+ element :StatusTypeCodeSl
26
+ element :StatusTypeDescriptionSl
27
+ element :FlagApd
28
+ end
29
+
30
+ class PricesElement
31
+ include SAXMachine
32
+ element :ExFactoryPrice, :class => PriceElement
33
+ element :PublicPrice, :class => PriceElement
34
+ end
35
+
36
+ class LimitationElement
37
+ include SAXMachine
38
+ element :LimitationCode
39
+ element :LimitationType
40
+ element :LimitationValue
41
+ element :LimitationNiveau
42
+ element :DescriptionDe
43
+ element :DescriptionFr
44
+ element :DescriptionIt
45
+ element :ValidFromDate
46
+ element :ValidThruDate
47
+ end
48
+
49
+ class LimitationsElement
50
+ include SAXMachine
51
+ elements :Limitation, :class => LimitationElement
52
+ end
53
+
54
+ class PointLimitationElement
55
+ include SAXMachine
56
+ element :Points
57
+ element :Packs
58
+ element :ValidFromDate
59
+ element :ValidThruDate
60
+ end
61
+
62
+ class PointLimitationsElement
63
+ include SAXMachine
64
+ elements :PointLimitation, :class => PointLimitationElement
65
+ end
66
+
67
+ class PackContent
68
+ include SAXMachine
69
+ attribute :ProductKey
70
+ attribute :Pharmacode
71
+ attribute :PackId
72
+ element :DescriptionDe
73
+ element :DescriptionFr
74
+ element :DescriptionIt
75
+ element :SwissmedicCategory
76
+ element :SwissmedicNo8
77
+ element :FlagNarcosis
78
+ element :FlagModal
79
+ element :BagDossierNo
80
+ element :GTIN
81
+ element :Limitations, :class => LimitationsElement
82
+ element :PointLimitations, :class => PointLimitationsElement
83
+ element :Prices, :class => PricesElement
84
+ end
85
+
86
+ class PacksElement
87
+ include SAXMachine
88
+ elements :Pack, :class => PackContent
89
+ end
90
+
91
+ class ItCodeContent
92
+ include SAXMachine
93
+ attribute :Code
94
+ element :DescriptionDe
95
+ element :DescriptionFr
96
+ element :DescriptionIt
97
+ element :Limitations, :class => LimitationsElement
98
+ end
99
+
100
+ class ItCodeEntry
101
+ include SAXMachine
102
+ element :ItCode, :class => ItCodeContent
103
+ end
104
+
105
+ # handling attributes as suggested by https://github.com/pauldix/sax-machine/issues/30
106
+ class ItCodesElement
107
+ include SAXMachine
108
+ elements :ItCode, :class => ItCodeContent
109
+ end
110
+
111
+ class SubstanceElement
112
+ include SAXMachine
113
+ element :DescriptionLa
114
+ element :Quantity
115
+ element :QuantityUnit
116
+ end
117
+
118
+ class SubstancesElement
119
+ include SAXMachine
120
+ elements :Substance, :class => SubstanceElement
121
+ end
122
+
123
+ class PreparationContent
124
+ include SAXMachine
125
+ attribute :ProductCommercial
126
+ element :NameFr
127
+ element :NameDe
128
+ element :NameIt
129
+ element :Status, :class => StatusElement
130
+ element :Dummy
131
+ element :DescriptionDe
132
+ element :DescriptionFr
133
+ element :DescriptionIt
134
+ element :AtcCode
135
+ element :SwissmedicNo5
136
+ element :FlagItLimitation
137
+ element :OrgGenCode
138
+ element :FlagSB20
139
+ element :CommentDe
140
+ element :CommentFr
141
+ element :CommentIt
142
+ element :VatInEXF
143
+ element :Limitations, :class => LimitationsElement
144
+ element :Substances, :class => SubstancesElement
145
+ element :Packs, :class => PacksElement
146
+ element :ItCodes, :class => ItCodesElement
147
+ end
148
+
149
+ class PreparationEntry
150
+ include SAXMachine
151
+ element :Preparation, :class => PreparationContent
152
+ end
153
+
154
+ class PreparationsContent
155
+ include SAXMachine
156
+ attribute :ReleaseDate
157
+ elements :Preparation, :class => PreparationContent
158
+ end
159
+
160
+ class PreparationsEntry
161
+ include SAXMachine
162
+ element :Preparations, :class => PreparationsContent
163
+ end
164
+
165
+
166
+ class CompElement
167
+ include SAXMachine
168
+ element :NAME
169
+ element :GLN
170
+ end
171
+
172
+ class ItemContent
173
+ include SAXMachine
174
+ attribute :DT
175
+ element :GTIN
176
+ element :PHAR
177
+ element :STATUS
178
+ element :SDATE
179
+ element :LANG
180
+ element :DSCR
181
+ element :ADDSCR
182
+ element :ATC
183
+ element :COMP, :class => CompElement
184
+ end
185
+
186
+ class PharmaContent
187
+ include SAXMachine
188
+ attribute :CREATION_DATETIME
189
+ elements :ITEM, :class => ItemContent
190
+ end
191
+
192
+ class PharmaEntry
193
+ include SAXMachine
194
+ element :CREATION_DATETIME
195
+ element :NONPHARMA, :as => :PHARMA, :class => PharmaContent
196
+ element :PHARMA, :class => PharmaContent
197
+ end
198
+
199
+ class ItemContent
200
+ include SAXMachine
201
+ attribute :DT
202
+ element :GTIN
203
+ element :PHAR
204
+ element :STATUS
205
+ element :STDATE
206
+ element :LANG
207
+ element :DSCR
208
+ element :ADDSCR
209
+ element :ATC
210
+ element :COMP, :class => CompElement
211
+ end
212
+
213
+ class PharmaContent
214
+ include SAXMachine
215
+ attribute :CREATION_DATETIME
216
+ elements :ITEM, :class => ItemContent
217
+ end
218
+
219
+ class PharmaEntry
220
+ include SAXMachine
221
+ element :CREATION_DATETIME
222
+ element :PHARMA, :class => PharmaContent
223
+ end
224
+
225
+ class MedicalInformationContent
226
+ include SAXMachine
227
+ attribute :type
228
+ attribute :version
229
+ attribute :lang
230
+ element :title
231
+ element :authHolder
232
+ element :style
233
+ element :content
234
+ end
235
+
236
+ class MedicalInformationEntry
237
+ include SAXMachine
238
+ element :medicalInformation, :class => MedicalInformationContent
239
+ end
240
+
241
+ class MedicalInformationsContent
242
+ include SAXMachine
243
+ elements :medicalInformation, :class => MedicalInformationContent
244
+ end
245
+
246
+ class MedicalInformationsEntry
247
+ include SAXMachine
248
+ element :medicalInformations, :class => MedicalInformationsContent
249
+ end
250
+
data/lib/gtin2atc.rb ADDED
@@ -0,0 +1,9 @@
1
+ require "gtin2atc/version"
2
+ require "gtin2atc/util"
3
+
4
+ module Gtin2atc
5
+ WorkDir = Dir.pwd
6
+ def self.log(msg)
7
+ Util.debug_msg(msg)
8
+ end
9
+ end
data/readme.textile ADDED
@@ -0,0 +1,19 @@
1
+ h2. gtin2atc
2
+
3
+ h3. usage
4
+
5
+ There are three different useage:
6
+ # gtin2atc --compare
7
+ Reads the GTIN/ATC-Code from BAG, Swissmedic, RefData and compares the from constency.
8
+ When adding --log it outputs a line for each difference/problem found.
9
+ # gtin2atc gtin1 [gtinx]
10
+ Search for ATC-Code with GTIN(s) passed on command line. Outputs a file gtin2atc.csv
11
+ # gtin2atc file_with_one_gtin_per_line
12
+ Reads file_with_one_gtin_per_line, then putputs a file gtin2atc.csv with info about each GTIN found in the input file
13
+
14
+ h3. background
15
+
16
+ We get the data from three sources
17
+ * e-mediat: http://swissindex.e-mediat.net/SwissindexPharma_out_V101
18
+ * BAG: Preparations.xml inside http://bag.e-mediat.net/SL2007.Web.External/File.axd?file=XMLPublications.zip
19
+ * Swissmedic: http://www.swissmedic.ch/arzneimittel/00156/00221/00222/00230/index.html?lang=de