oddb2xml 1.1.0 → 1.1.1
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +5 -0
- data/bin/oddb2xml +4 -2
- data/lib/oddb2xml/builder.rb +45 -1
- data/lib/oddb2xml/cli.rb +26 -8
- data/lib/oddb2xml/downloader.rb +50 -12
- data/lib/oddb2xml/extractor.rb +28 -0
- data/lib/oddb2xml/version.rb +1 -1
- data/oddb2xml.gemspec +1 -1
- data/spec/spec_helper.rb +29 -0
- metadata +6 -6
data/History.txt
CHANGED
data/bin/oddb2xml
CHANGED
@@ -15,6 +15,7 @@ Usage:
|
|
15
15
|
oddb2xml [option]
|
16
16
|
-c F, --compress=F Compress format F. {tar.gz|zip}
|
17
17
|
-a T, --append=T Additional target. T, only 'nonpharma' is available.
|
18
|
+
-o O, --optional=O Optional output. O, only 'fi' is available.
|
18
19
|
-t S, --tag-suffix=S XML tag suffix S. Default is none. [A-z0-9_]
|
19
20
|
If S is given, it is also used as prefix of filename.
|
20
21
|
-h, --help Show this help message.
|
@@ -28,8 +29,9 @@ opts = {
|
|
28
29
|
:tag_suffix => nil,
|
29
30
|
}
|
30
31
|
|
31
|
-
parser.on('-c v', '--compress v',
|
32
|
-
parser.on('-a v', '--append v',
|
32
|
+
parser.on('-c v', '--compress v', /tar\.gz|zip/) {|v| opts[:compress_ext] = v }
|
33
|
+
parser.on('-a v', '--append v', /nonpharma/) {|v| opts[:nonpharma] = true }
|
34
|
+
parser.on('-o v', '--optional v', /fi/) {|v| opts[:fi] = true }
|
33
35
|
parser.on('-t v', '--tag-suffix v', /^[A-z0-9_]*$/i) {|v| opts[:tag_suffix] = v.upcase }
|
34
36
|
parser.on_tail('-h', '--help') { puts help; exit }
|
35
37
|
|
data/lib/oddb2xml/builder.rb
CHANGED
@@ -17,12 +17,14 @@ end
|
|
17
17
|
|
18
18
|
module Oddb2xml
|
19
19
|
class Builder
|
20
|
-
attr_accessor :subject, :index, :items, :
|
20
|
+
attr_accessor :subject, :index, :items, :infos,
|
21
|
+
:orphans, :fridges,
|
21
22
|
:tag_suffix
|
22
23
|
def initialize
|
23
24
|
@subject = nil
|
24
25
|
@index = {}
|
25
26
|
@items = {}
|
27
|
+
@infos = {}
|
26
28
|
@orphans = []
|
27
29
|
@fridges = []
|
28
30
|
@tag_suffix = nil
|
@@ -446,5 +448,47 @@ module Oddb2xml
|
|
446
448
|
end
|
447
449
|
_builder.to_xml
|
448
450
|
end
|
451
|
+
def build_fi
|
452
|
+
_builder = Nokogiri::XML::Builder.new(:encoding => 'utf-8') do |xml|
|
453
|
+
xml.doc.tag_suffix = @tag_suffix
|
454
|
+
datetime = Time.new.strftime('%FT%T.%7N%z')
|
455
|
+
xml.KOMPENDIUM(
|
456
|
+
'xmlns:xsd' => 'http://www.w3.org/2001/XMLSchema',
|
457
|
+
'xmlns:xsi' => 'http://www.w3.org/2001/XMLSchema-instance',
|
458
|
+
'xmlns' => 'http://wiki.oddb.org/wiki.php?pagename=Swissmedic.Datendeklaration',
|
459
|
+
'CREATION_DATETIME' => datetime,
|
460
|
+
'PROD_DATE' => datetime,
|
461
|
+
'VALID_DATE' => datetime,
|
462
|
+
) {
|
463
|
+
length = 0
|
464
|
+
%w[de fr].each do |lang|
|
465
|
+
length += @infos[lang].length
|
466
|
+
@infos[lang].each_with_index do |info, i|
|
467
|
+
xml.ART(
|
468
|
+
'MONTYPE' => 'fi', # only
|
469
|
+
'LANG' => lang.upcase,
|
470
|
+
'DT' => '',
|
471
|
+
) {
|
472
|
+
unless info[:name].empty?
|
473
|
+
xml.name { xml.p info[:name] }
|
474
|
+
end
|
475
|
+
unless info[:owner].empty?
|
476
|
+
xml.owner { xml.p info[:owner] }
|
477
|
+
end
|
478
|
+
xml.monid info[:monid] unless info[:monid].empty?
|
479
|
+
xml.paragraph { xml.cdata info[:paragraph] unless info[:paragraph].empty? }
|
480
|
+
}
|
481
|
+
end
|
482
|
+
end
|
483
|
+
xml.RESULT {
|
484
|
+
xml.OK_ERROR 'OK'
|
485
|
+
xml.NBR_RECORD length
|
486
|
+
xml.ERROR_CODE ''
|
487
|
+
xml.MESSAGE ''
|
488
|
+
}
|
489
|
+
}
|
490
|
+
end
|
491
|
+
_builder.to_xml
|
492
|
+
end
|
449
493
|
end
|
450
494
|
end
|
data/lib/oddb2xml/cli.rb
CHANGED
@@ -10,12 +10,14 @@ module Oddb2xml
|
|
10
10
|
class Cli
|
11
11
|
SUBJECTS = %w[product article]
|
12
12
|
ADDITIONS = %w[substance limitation]
|
13
|
+
OPTIONALS = %w[fi]
|
13
14
|
LANGUAGES = %w[DE FR] # EN does not exist
|
14
15
|
def initialize(args)
|
15
16
|
@options = args
|
16
17
|
@mutex = Mutex.new
|
17
18
|
@items = {} # Items from Preparations.xml in BAG
|
18
19
|
@index = {} # Base index from swissINDEX
|
20
|
+
@infos = {} # FI from SwissmedicInfo
|
19
21
|
@orphans = [] # Orphaned drugs from Swissmedic xls
|
20
22
|
@fridges = [] # ReFridge drugs from Swissmedic xls
|
21
23
|
LANGUAGES.each do |lang|
|
@@ -25,6 +27,14 @@ module Oddb2xml
|
|
25
27
|
def run
|
26
28
|
threads = []
|
27
29
|
# swissmedic
|
30
|
+
threads << Thread.new do
|
31
|
+
downloader = SwissmedicInfoDownloader.new
|
32
|
+
xml = downloader.download
|
33
|
+
@mutex.synchronize do
|
34
|
+
hsh = SwissmedicInfoExtractor.new(xml).to_hash
|
35
|
+
@infos = hsh
|
36
|
+
end
|
37
|
+
end
|
28
38
|
[:orphans, :fridges].each do |type|
|
29
39
|
threads << Thread.new do
|
30
40
|
downloader = SwissmedicDownloader.new
|
@@ -68,14 +78,17 @@ module Oddb2xml
|
|
68
78
|
LANGUAGES.each do |lang|
|
69
79
|
index[lang] = {} unless index[lang]
|
70
80
|
types.each do |type|
|
71
|
-
index[lang].merge!(@index[lang][type])
|
81
|
+
index[lang].merge!(@index[lang][type]) if @index[lang][type]
|
72
82
|
end
|
73
83
|
end
|
74
|
-
builder.subject
|
75
|
-
builder.index
|
76
|
-
builder.items
|
77
|
-
|
78
|
-
builder.
|
84
|
+
builder.subject = sbj
|
85
|
+
builder.index = index
|
86
|
+
builder.items = @items
|
87
|
+
# additions
|
88
|
+
builder.orphans = @orphans
|
89
|
+
builder.fridges = @fridges
|
90
|
+
# optionals
|
91
|
+
builder.infos = @infos
|
79
92
|
builder.tag_suffix = @options[:tag_suffix]
|
80
93
|
end
|
81
94
|
xml = builder.to_xml
|
@@ -102,7 +115,9 @@ module Oddb2xml
|
|
102
115
|
def files
|
103
116
|
unless @_files
|
104
117
|
@_files = {}
|
105
|
-
(ADDITIONS + SUBJECTS)
|
118
|
+
_files = (ADDITIONS + SUBJECTS)
|
119
|
+
_files += OPTIONALS if @options[:fi]
|
120
|
+
_files.each do|sbj|
|
106
121
|
@_files[sbj] = "#{prefix}_#{sbj.to_s}.xml"
|
107
122
|
end
|
108
123
|
end
|
@@ -117,7 +132,10 @@ module Oddb2xml
|
|
117
132
|
lines << lang
|
118
133
|
types.each do |type|
|
119
134
|
key = (type == :nonpharma ? 'NonPharma' : 'Pharma')
|
120
|
-
|
135
|
+
if @index[lang][type]
|
136
|
+
lines << sprintf(
|
137
|
+
"\t#{key} products: %i", @index[lang][type].values.length)
|
138
|
+
end
|
121
139
|
end
|
122
140
|
end
|
123
141
|
puts lines.join("\n")
|
data/lib/oddb2xml/downloader.rb
CHANGED
@@ -58,21 +58,19 @@ module Oddb2xml
|
|
58
58
|
super(url)
|
59
59
|
end
|
60
60
|
def init
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
61
|
+
@config = {
|
62
|
+
:log_level => :info,
|
63
|
+
:log => false, # $stdout
|
64
|
+
:raise_errors => true,
|
65
|
+
:ssl_verify_mode => :none,
|
66
|
+
:wsdl => @url
|
67
|
+
}
|
66
68
|
end
|
67
69
|
def download_by(lang = 'DE')
|
68
|
-
client = Savon::Client.new
|
69
|
-
http.auth.ssl.verify_mode = :none
|
70
|
-
wsdl.document = @url
|
71
|
-
end
|
70
|
+
client = Savon::Client.new(@config)
|
72
71
|
begin
|
73
72
|
type = @type
|
74
|
-
|
75
|
-
soap.xml = <<XML
|
73
|
+
soap = <<XML
|
76
74
|
<?xml version="1.0" encoding="utf-8"?>
|
77
75
|
<soap:Envelope xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/">
|
78
76
|
<soap:Body>
|
@@ -80,7 +78,7 @@ module Oddb2xml
|
|
80
78
|
</soap:Body>
|
81
79
|
</soap:Envelope>
|
82
80
|
XML
|
83
|
-
|
81
|
+
response = client.call(:download_all, :xml => soap)
|
84
82
|
if response.success?
|
85
83
|
if xml = response.to_xml
|
86
84
|
return xml
|
@@ -128,4 +126,44 @@ XML
|
|
128
126
|
end
|
129
127
|
end
|
130
128
|
end
|
129
|
+
class SwissmedicInfoDownloader < Downloader
|
130
|
+
def init
|
131
|
+
@url ||= "http://download.swissmedicinfo.ch/Accept.aspx?ReturnUrl=%2f"
|
132
|
+
end
|
133
|
+
def download
|
134
|
+
file = "swissmedic_info.zip"
|
135
|
+
begin
|
136
|
+
response = nil
|
137
|
+
agent = Mechanize.new
|
138
|
+
agent.ignore_bad_chunking = true
|
139
|
+
if home = agent.get(@url)
|
140
|
+
form = home.form_with(:id => 'Form1')
|
141
|
+
bttn = form.button_with(:name => 'ctl00$MainContent$btnOK')
|
142
|
+
if page = form.submit(bttn)
|
143
|
+
form = page.form_with(:id => 'Form1')
|
144
|
+
bttn = form.button_with(:name => 'ctl00$MainContent$BtnYes')
|
145
|
+
response = form.submit(bttn)
|
146
|
+
end
|
147
|
+
end
|
148
|
+
if response
|
149
|
+
response.save_as file
|
150
|
+
end
|
151
|
+
xml = ''
|
152
|
+
Zip::ZipFile.foreach(file) do |entry|
|
153
|
+
if entry.name =~ /^AipsDownload_/iu
|
154
|
+
entry.get_input_stream{ |io| xml = io.read }
|
155
|
+
end
|
156
|
+
end
|
157
|
+
return xml
|
158
|
+
rescue Timeout::Error
|
159
|
+
retrievable? ? retry : raise
|
160
|
+
rescue NoMethodError => e
|
161
|
+
# pass
|
162
|
+
ensure
|
163
|
+
if File.exists? file
|
164
|
+
File.unlink file
|
165
|
+
end
|
166
|
+
end
|
167
|
+
end
|
168
|
+
end
|
131
169
|
end
|
data/lib/oddb2xml/extractor.rb
CHANGED
@@ -172,4 +172,32 @@ module Oddb2xml
|
|
172
172
|
end
|
173
173
|
end
|
174
174
|
end
|
175
|
+
class SwissmedicInfoExtractor < Extractor
|
176
|
+
def to_hash
|
177
|
+
data = Hash.new{|h,k| h[k] = [] }
|
178
|
+
doc = Nokogiri::XML(@xml)
|
179
|
+
doc.xpath("//medicalInformations/medicalInformation[@type='fi']").each do |fi|
|
180
|
+
lang = fi.attr('lang').to_s
|
181
|
+
next unless lang =~ /de|fr/
|
182
|
+
item = {}
|
183
|
+
item[:name] = (name = fi.at_xpath('.//title')) ? name.text : ''
|
184
|
+
item[:owner] = (ownr = fi.at_xpath('.//authHolder')) ? ownr.text : ''
|
185
|
+
if content = fi.at_xpath('.//content').children.detect{|child| child.cdata? }
|
186
|
+
html = Nokogiri::HTML(content.to_s)
|
187
|
+
# all HTML contents without MonTitle and ownerCompany
|
188
|
+
item[:paragraph] = "<title><p>#{item[:name]}</p></title>" +
|
189
|
+
((paragraph = html.xpath("///div[@class='paragraph']")) ? paragraph.to_s : '')
|
190
|
+
if text = html.xpath("///div[@id='Section7750']/p").text
|
191
|
+
if text =~ /(\d{5})[,\s]*(\d{5})?/
|
192
|
+
[$1, $2].compact.each do |n|
|
193
|
+
item[:monid] = n
|
194
|
+
data[lang] << item
|
195
|
+
end
|
196
|
+
end
|
197
|
+
end
|
198
|
+
end
|
199
|
+
end
|
200
|
+
data
|
201
|
+
end
|
202
|
+
end
|
175
203
|
end
|
data/lib/oddb2xml/version.rb
CHANGED
data/oddb2xml.gemspec
CHANGED
@@ -21,7 +21,7 @@ Gem::Specification.new do |gem|
|
|
21
21
|
gem.add_dependency 'archive-tar-minitar'
|
22
22
|
gem.add_dependency 'mechanize'
|
23
23
|
gem.add_dependency 'nokogiri'
|
24
|
-
gem.add_dependency 'savon'
|
24
|
+
gem.add_dependency 'savon', '>= 2.0'
|
25
25
|
gem.add_dependency 'spreadsheet'
|
26
26
|
|
27
27
|
gem.add_development_dependency 'rspec'
|
data/spec/spec_helper.rb
CHANGED
@@ -20,6 +20,7 @@ module ServerMockHelper
|
|
20
20
|
setup_bag_xml_server_mock
|
21
21
|
setup_swiss_index_server_mock
|
22
22
|
setup_swissmedic_server_mock
|
23
|
+
setup_swissmedic_info_server_mock
|
23
24
|
end
|
24
25
|
def setup_bag_xml_server_mock
|
25
26
|
# zip
|
@@ -95,6 +96,34 @@ module ServerMockHelper
|
|
95
96
|
:body => stub_response)
|
96
97
|
end
|
97
98
|
end
|
99
|
+
def setup_swissmedic_info_server_mock
|
100
|
+
# html (dummy)
|
101
|
+
stub_html_url = "http://download.swissmedicinfo.ch/Accept.aspx?ReturnUrl=%2f"
|
102
|
+
stub_response = File.read(File.expand_path("../data/swissmedic_info.html", __FILE__))
|
103
|
+
stub_request(:get, stub_html_url).
|
104
|
+
with(:headers => {
|
105
|
+
'Accept' => '*/*',
|
106
|
+
'Host' => 'download.swissmedicinfo.ch',
|
107
|
+
}).
|
108
|
+
to_return(
|
109
|
+
:status => 200,
|
110
|
+
:headers => {'Content-Type' => 'text/html; charset=utf-8'},
|
111
|
+
:body => stub_response)
|
112
|
+
# zip
|
113
|
+
stub_zip_url = "http://download.swissmedicinfo.ch/"
|
114
|
+
stub_response = File.read(File.expand_path('../data/swissmedic_info.zip', __FILE__))
|
115
|
+
stub_request(:get, stub_zip_url).
|
116
|
+
with(:headers => {
|
117
|
+
'Accept' => '*/*',
|
118
|
+
'Accept-Encoding' => 'gzip,deflate,identity',
|
119
|
+
'Host' => 'download.swisssmedicinfo.ch',
|
120
|
+
}).
|
121
|
+
to_return(
|
122
|
+
:status => 200,
|
123
|
+
:headers => {'Content-Type' => 'application/zip; charset=utf-8'},
|
124
|
+
:body => stub_response)
|
125
|
+
|
126
|
+
end
|
98
127
|
end
|
99
128
|
|
100
129
|
RSpec.configure do |config|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: oddb2xml
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.1.
|
4
|
+
version: 1.1.1
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2013-01-07 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rdoc
|
16
|
-
requirement: &
|
16
|
+
requirement: &18836140 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ~>
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: '3.10'
|
22
22
|
type: :development
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *18836140
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: hoe
|
27
|
-
requirement: &
|
27
|
+
requirement: &18834840 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ~>
|
@@ -32,7 +32,7 @@ dependencies:
|
|
32
32
|
version: '2.13'
|
33
33
|
type: :development
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *18834840
|
36
36
|
description: ''
|
37
37
|
email:
|
38
38
|
- yasaka@ywesee.com, zdavatz@ywesee.com
|