oddb2xml 1.1.0 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +5 -0
- data/bin/oddb2xml +4 -2
- data/lib/oddb2xml/builder.rb +45 -1
- data/lib/oddb2xml/cli.rb +26 -8
- data/lib/oddb2xml/downloader.rb +50 -12
- data/lib/oddb2xml/extractor.rb +28 -0
- data/lib/oddb2xml/version.rb +1 -1
- data/oddb2xml.gemspec +1 -1
- data/spec/spec_helper.rb +29 -0
- metadata +6 -6
data/History.txt
CHANGED
data/bin/oddb2xml
CHANGED
@@ -15,6 +15,7 @@ Usage:
|
|
15
15
|
oddb2xml [option]
|
16
16
|
-c F, --compress=F Compress format F. {tar.gz|zip}
|
17
17
|
-a T, --append=T Additional target. T, only 'nonpharma' is available.
|
18
|
+
-o O, --optional=O Optional output. O, only 'fi' is available.
|
18
19
|
-t S, --tag-suffix=S XML tag suffix S. Default is none. [A-z0-9_]
|
19
20
|
If S is given, it is also used as prefix of filename.
|
20
21
|
-h, --help Show this help message.
|
@@ -28,8 +29,9 @@ opts = {
|
|
28
29
|
:tag_suffix => nil,
|
29
30
|
}
|
30
31
|
|
31
|
-
parser.on('-c v', '--compress v',
|
32
|
-
parser.on('-a v', '--append v',
|
32
|
+
parser.on('-c v', '--compress v', /tar\.gz|zip/) {|v| opts[:compress_ext] = v }
|
33
|
+
parser.on('-a v', '--append v', /nonpharma/) {|v| opts[:nonpharma] = true }
|
34
|
+
parser.on('-o v', '--optional v', /fi/) {|v| opts[:fi] = true }
|
33
35
|
parser.on('-t v', '--tag-suffix v', /^[A-z0-9_]*$/i) {|v| opts[:tag_suffix] = v.upcase }
|
34
36
|
parser.on_tail('-h', '--help') { puts help; exit }
|
35
37
|
|
data/lib/oddb2xml/builder.rb
CHANGED
@@ -17,12 +17,14 @@ end
|
|
17
17
|
|
18
18
|
module Oddb2xml
|
19
19
|
class Builder
|
20
|
-
attr_accessor :subject, :index, :items, :
|
20
|
+
attr_accessor :subject, :index, :items, :infos,
|
21
|
+
:orphans, :fridges,
|
21
22
|
:tag_suffix
|
22
23
|
def initialize
|
23
24
|
@subject = nil
|
24
25
|
@index = {}
|
25
26
|
@items = {}
|
27
|
+
@infos = {}
|
26
28
|
@orphans = []
|
27
29
|
@fridges = []
|
28
30
|
@tag_suffix = nil
|
@@ -446,5 +448,47 @@ module Oddb2xml
|
|
446
448
|
end
|
447
449
|
_builder.to_xml
|
448
450
|
end
|
451
|
+
def build_fi
|
452
|
+
_builder = Nokogiri::XML::Builder.new(:encoding => 'utf-8') do |xml|
|
453
|
+
xml.doc.tag_suffix = @tag_suffix
|
454
|
+
datetime = Time.new.strftime('%FT%T.%7N%z')
|
455
|
+
xml.KOMPENDIUM(
|
456
|
+
'xmlns:xsd' => 'http://www.w3.org/2001/XMLSchema',
|
457
|
+
'xmlns:xsi' => 'http://www.w3.org/2001/XMLSchema-instance',
|
458
|
+
'xmlns' => 'http://wiki.oddb.org/wiki.php?pagename=Swissmedic.Datendeklaration',
|
459
|
+
'CREATION_DATETIME' => datetime,
|
460
|
+
'PROD_DATE' => datetime,
|
461
|
+
'VALID_DATE' => datetime,
|
462
|
+
) {
|
463
|
+
length = 0
|
464
|
+
%w[de fr].each do |lang|
|
465
|
+
length += @infos[lang].length
|
466
|
+
@infos[lang].each_with_index do |info, i|
|
467
|
+
xml.ART(
|
468
|
+
'MONTYPE' => 'fi', # only
|
469
|
+
'LANG' => lang.upcase,
|
470
|
+
'DT' => '',
|
471
|
+
) {
|
472
|
+
unless info[:name].empty?
|
473
|
+
xml.name { xml.p info[:name] }
|
474
|
+
end
|
475
|
+
unless info[:owner].empty?
|
476
|
+
xml.owner { xml.p info[:owner] }
|
477
|
+
end
|
478
|
+
xml.monid info[:monid] unless info[:monid].empty?
|
479
|
+
xml.paragraph { xml.cdata info[:paragraph] unless info[:paragraph].empty? }
|
480
|
+
}
|
481
|
+
end
|
482
|
+
end
|
483
|
+
xml.RESULT {
|
484
|
+
xml.OK_ERROR 'OK'
|
485
|
+
xml.NBR_RECORD length
|
486
|
+
xml.ERROR_CODE ''
|
487
|
+
xml.MESSAGE ''
|
488
|
+
}
|
489
|
+
}
|
490
|
+
end
|
491
|
+
_builder.to_xml
|
492
|
+
end
|
449
493
|
end
|
450
494
|
end
|
data/lib/oddb2xml/cli.rb
CHANGED
@@ -10,12 +10,14 @@ module Oddb2xml
|
|
10
10
|
class Cli
|
11
11
|
SUBJECTS = %w[product article]
|
12
12
|
ADDITIONS = %w[substance limitation]
|
13
|
+
OPTIONALS = %w[fi]
|
13
14
|
LANGUAGES = %w[DE FR] # EN does not exist
|
14
15
|
def initialize(args)
|
15
16
|
@options = args
|
16
17
|
@mutex = Mutex.new
|
17
18
|
@items = {} # Items from Preparations.xml in BAG
|
18
19
|
@index = {} # Base index from swissINDEX
|
20
|
+
@infos = {} # FI from SwissmedicInfo
|
19
21
|
@orphans = [] # Orphaned drugs from Swissmedic xls
|
20
22
|
@fridges = [] # ReFridge drugs from Swissmedic xls
|
21
23
|
LANGUAGES.each do |lang|
|
@@ -25,6 +27,14 @@ module Oddb2xml
|
|
25
27
|
def run
|
26
28
|
threads = []
|
27
29
|
# swissmedic
|
30
|
+
threads << Thread.new do
|
31
|
+
downloader = SwissmedicInfoDownloader.new
|
32
|
+
xml = downloader.download
|
33
|
+
@mutex.synchronize do
|
34
|
+
hsh = SwissmedicInfoExtractor.new(xml).to_hash
|
35
|
+
@infos = hsh
|
36
|
+
end
|
37
|
+
end
|
28
38
|
[:orphans, :fridges].each do |type|
|
29
39
|
threads << Thread.new do
|
30
40
|
downloader = SwissmedicDownloader.new
|
@@ -68,14 +78,17 @@ module Oddb2xml
|
|
68
78
|
LANGUAGES.each do |lang|
|
69
79
|
index[lang] = {} unless index[lang]
|
70
80
|
types.each do |type|
|
71
|
-
index[lang].merge!(@index[lang][type])
|
81
|
+
index[lang].merge!(@index[lang][type]) if @index[lang][type]
|
72
82
|
end
|
73
83
|
end
|
74
|
-
builder.subject
|
75
|
-
builder.index
|
76
|
-
builder.items
|
77
|
-
|
78
|
-
builder.
|
84
|
+
builder.subject = sbj
|
85
|
+
builder.index = index
|
86
|
+
builder.items = @items
|
87
|
+
# additions
|
88
|
+
builder.orphans = @orphans
|
89
|
+
builder.fridges = @fridges
|
90
|
+
# optionals
|
91
|
+
builder.infos = @infos
|
79
92
|
builder.tag_suffix = @options[:tag_suffix]
|
80
93
|
end
|
81
94
|
xml = builder.to_xml
|
@@ -102,7 +115,9 @@ module Oddb2xml
|
|
102
115
|
def files
|
103
116
|
unless @_files
|
104
117
|
@_files = {}
|
105
|
-
(ADDITIONS + SUBJECTS)
|
118
|
+
_files = (ADDITIONS + SUBJECTS)
|
119
|
+
_files += OPTIONALS if @options[:fi]
|
120
|
+
_files.each do|sbj|
|
106
121
|
@_files[sbj] = "#{prefix}_#{sbj.to_s}.xml"
|
107
122
|
end
|
108
123
|
end
|
@@ -117,7 +132,10 @@ module Oddb2xml
|
|
117
132
|
lines << lang
|
118
133
|
types.each do |type|
|
119
134
|
key = (type == :nonpharma ? 'NonPharma' : 'Pharma')
|
120
|
-
|
135
|
+
if @index[lang][type]
|
136
|
+
lines << sprintf(
|
137
|
+
"\t#{key} products: %i", @index[lang][type].values.length)
|
138
|
+
end
|
121
139
|
end
|
122
140
|
end
|
123
141
|
puts lines.join("\n")
|
data/lib/oddb2xml/downloader.rb
CHANGED
@@ -58,21 +58,19 @@ module Oddb2xml
|
|
58
58
|
super(url)
|
59
59
|
end
|
60
60
|
def init
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
61
|
+
@config = {
|
62
|
+
:log_level => :info,
|
63
|
+
:log => false, # $stdout
|
64
|
+
:raise_errors => true,
|
65
|
+
:ssl_verify_mode => :none,
|
66
|
+
:wsdl => @url
|
67
|
+
}
|
66
68
|
end
|
67
69
|
def download_by(lang = 'DE')
|
68
|
-
client = Savon::Client.new
|
69
|
-
http.auth.ssl.verify_mode = :none
|
70
|
-
wsdl.document = @url
|
71
|
-
end
|
70
|
+
client = Savon::Client.new(@config)
|
72
71
|
begin
|
73
72
|
type = @type
|
74
|
-
|
75
|
-
soap.xml = <<XML
|
73
|
+
soap = <<XML
|
76
74
|
<?xml version="1.0" encoding="utf-8"?>
|
77
75
|
<soap:Envelope xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/">
|
78
76
|
<soap:Body>
|
@@ -80,7 +78,7 @@ module Oddb2xml
|
|
80
78
|
</soap:Body>
|
81
79
|
</soap:Envelope>
|
82
80
|
XML
|
83
|
-
|
81
|
+
response = client.call(:download_all, :xml => soap)
|
84
82
|
if response.success?
|
85
83
|
if xml = response.to_xml
|
86
84
|
return xml
|
@@ -128,4 +126,44 @@ XML
|
|
128
126
|
end
|
129
127
|
end
|
130
128
|
end
|
129
|
+
class SwissmedicInfoDownloader < Downloader
|
130
|
+
def init
|
131
|
+
@url ||= "http://download.swissmedicinfo.ch/Accept.aspx?ReturnUrl=%2f"
|
132
|
+
end
|
133
|
+
def download
|
134
|
+
file = "swissmedic_info.zip"
|
135
|
+
begin
|
136
|
+
response = nil
|
137
|
+
agent = Mechanize.new
|
138
|
+
agent.ignore_bad_chunking = true
|
139
|
+
if home = agent.get(@url)
|
140
|
+
form = home.form_with(:id => 'Form1')
|
141
|
+
bttn = form.button_with(:name => 'ctl00$MainContent$btnOK')
|
142
|
+
if page = form.submit(bttn)
|
143
|
+
form = page.form_with(:id => 'Form1')
|
144
|
+
bttn = form.button_with(:name => 'ctl00$MainContent$BtnYes')
|
145
|
+
response = form.submit(bttn)
|
146
|
+
end
|
147
|
+
end
|
148
|
+
if response
|
149
|
+
response.save_as file
|
150
|
+
end
|
151
|
+
xml = ''
|
152
|
+
Zip::ZipFile.foreach(file) do |entry|
|
153
|
+
if entry.name =~ /^AipsDownload_/iu
|
154
|
+
entry.get_input_stream{ |io| xml = io.read }
|
155
|
+
end
|
156
|
+
end
|
157
|
+
return xml
|
158
|
+
rescue Timeout::Error
|
159
|
+
retrievable? ? retry : raise
|
160
|
+
rescue NoMethodError => e
|
161
|
+
# pass
|
162
|
+
ensure
|
163
|
+
if File.exists? file
|
164
|
+
File.unlink file
|
165
|
+
end
|
166
|
+
end
|
167
|
+
end
|
168
|
+
end
|
131
169
|
end
|
data/lib/oddb2xml/extractor.rb
CHANGED
@@ -172,4 +172,32 @@ module Oddb2xml
|
|
172
172
|
end
|
173
173
|
end
|
174
174
|
end
|
175
|
+
class SwissmedicInfoExtractor < Extractor
|
176
|
+
def to_hash
|
177
|
+
data = Hash.new{|h,k| h[k] = [] }
|
178
|
+
doc = Nokogiri::XML(@xml)
|
179
|
+
doc.xpath("//medicalInformations/medicalInformation[@type='fi']").each do |fi|
|
180
|
+
lang = fi.attr('lang').to_s
|
181
|
+
next unless lang =~ /de|fr/
|
182
|
+
item = {}
|
183
|
+
item[:name] = (name = fi.at_xpath('.//title')) ? name.text : ''
|
184
|
+
item[:owner] = (ownr = fi.at_xpath('.//authHolder')) ? ownr.text : ''
|
185
|
+
if content = fi.at_xpath('.//content').children.detect{|child| child.cdata? }
|
186
|
+
html = Nokogiri::HTML(content.to_s)
|
187
|
+
# all HTML contents without MonTitle and ownerCompany
|
188
|
+
item[:paragraph] = "<title><p>#{item[:name]}</p></title>" +
|
189
|
+
((paragraph = html.xpath("///div[@class='paragraph']")) ? paragraph.to_s : '')
|
190
|
+
if text = html.xpath("///div[@id='Section7750']/p").text
|
191
|
+
if text =~ /(\d{5})[,\s]*(\d{5})?/
|
192
|
+
[$1, $2].compact.each do |n|
|
193
|
+
item[:monid] = n
|
194
|
+
data[lang] << item
|
195
|
+
end
|
196
|
+
end
|
197
|
+
end
|
198
|
+
end
|
199
|
+
end
|
200
|
+
data
|
201
|
+
end
|
202
|
+
end
|
175
203
|
end
|
data/lib/oddb2xml/version.rb
CHANGED
data/oddb2xml.gemspec
CHANGED
@@ -21,7 +21,7 @@ Gem::Specification.new do |gem|
|
|
21
21
|
gem.add_dependency 'archive-tar-minitar'
|
22
22
|
gem.add_dependency 'mechanize'
|
23
23
|
gem.add_dependency 'nokogiri'
|
24
|
-
gem.add_dependency 'savon'
|
24
|
+
gem.add_dependency 'savon', '>= 2.0'
|
25
25
|
gem.add_dependency 'spreadsheet'
|
26
26
|
|
27
27
|
gem.add_development_dependency 'rspec'
|
data/spec/spec_helper.rb
CHANGED
@@ -20,6 +20,7 @@ module ServerMockHelper
|
|
20
20
|
setup_bag_xml_server_mock
|
21
21
|
setup_swiss_index_server_mock
|
22
22
|
setup_swissmedic_server_mock
|
23
|
+
setup_swissmedic_info_server_mock
|
23
24
|
end
|
24
25
|
def setup_bag_xml_server_mock
|
25
26
|
# zip
|
@@ -95,6 +96,34 @@ module ServerMockHelper
|
|
95
96
|
:body => stub_response)
|
96
97
|
end
|
97
98
|
end
|
99
|
+
def setup_swissmedic_info_server_mock
|
100
|
+
# html (dummy)
|
101
|
+
stub_html_url = "http://download.swissmedicinfo.ch/Accept.aspx?ReturnUrl=%2f"
|
102
|
+
stub_response = File.read(File.expand_path("../data/swissmedic_info.html", __FILE__))
|
103
|
+
stub_request(:get, stub_html_url).
|
104
|
+
with(:headers => {
|
105
|
+
'Accept' => '*/*',
|
106
|
+
'Host' => 'download.swissmedicinfo.ch',
|
107
|
+
}).
|
108
|
+
to_return(
|
109
|
+
:status => 200,
|
110
|
+
:headers => {'Content-Type' => 'text/html; charset=utf-8'},
|
111
|
+
:body => stub_response)
|
112
|
+
# zip
|
113
|
+
stub_zip_url = "http://download.swissmedicinfo.ch/"
|
114
|
+
stub_response = File.read(File.expand_path('../data/swissmedic_info.zip', __FILE__))
|
115
|
+
stub_request(:get, stub_zip_url).
|
116
|
+
with(:headers => {
|
117
|
+
'Accept' => '*/*',
|
118
|
+
'Accept-Encoding' => 'gzip,deflate,identity',
|
119
|
+
'Host' => 'download.swisssmedicinfo.ch',
|
120
|
+
}).
|
121
|
+
to_return(
|
122
|
+
:status => 200,
|
123
|
+
:headers => {'Content-Type' => 'application/zip; charset=utf-8'},
|
124
|
+
:body => stub_response)
|
125
|
+
|
126
|
+
end
|
98
127
|
end
|
99
128
|
|
100
129
|
RSpec.configure do |config|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: oddb2xml
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.1.
|
4
|
+
version: 1.1.1
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2013-01-07 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rdoc
|
16
|
-
requirement: &
|
16
|
+
requirement: &18836140 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ~>
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: '3.10'
|
22
22
|
type: :development
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *18836140
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: hoe
|
27
|
-
requirement: &
|
27
|
+
requirement: &18834840 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ~>
|
@@ -32,7 +32,7 @@ dependencies:
|
|
32
32
|
version: '2.13'
|
33
33
|
type: :development
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *18834840
|
36
36
|
description: ''
|
37
37
|
email:
|
38
38
|
- yasaka@ywesee.com, zdavatz@ywesee.com
|