patinfo2csv 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +3 -0
- data/Manifest.txt +9 -0
- data/README.txt +32 -0
- data/Rakefile +8 -0
- data/bin/patinfo2csv +45 -0
- data/lib/patinfo2csv/cli.rb +65 -0
- data/lib/patinfo2csv/converter.rb +79 -0
- data/lib/patinfo2csv.rb +91 -0
- data/lib/version.rb +3 -0
- metadata +99 -0
data/History.txt
ADDED
data/Manifest.txt
ADDED
data/README.txt
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
= Patinfo2csv
|
2
|
+
|
3
|
+
* http://scm.ywesee.com/?p=patinfo2csv/.git;a=summary
|
4
|
+
|
5
|
+
== Description
|
6
|
+
|
7
|
+
Patinfo2csv extracts and converts patinfo.yaml to patinfo.csv;
|
8
|
+
only 4 chapters(de, text) in patinfo.yaml are extracted.
|
9
|
+
|
10
|
+
* effects
|
11
|
+
* contra_indications
|
12
|
+
* pregnancy
|
13
|
+
* usage
|
14
|
+
* composition
|
15
|
+
|
16
|
+
You can specify the pharmacode, see `patinfo2csv --help`
|
17
|
+
|
18
|
+
== Usage
|
19
|
+
|
20
|
+
$ patinfo2csv --help
|
21
|
+
Usage: patinfo2csv <input patinfo.yaml> <output patinfo.csv> <input pharmacode.txt>
|
22
|
+
-v, --version Show version
|
23
|
+
-h, --help Print this help message
|
24
|
+
|
25
|
+
== Requirements
|
26
|
+
|
27
|
+
* slop gem
|
28
|
+
|
29
|
+
== Install
|
30
|
+
|
31
|
+
* sudo gem install slop
|
32
|
+
* sudo gem install patinfo2csv
|
data/Rakefile
ADDED
data/bin/patinfo2csv
ADDED
@@ -0,0 +1,45 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# encoding: utf-8
|
3
|
+
|
4
|
+
require 'slop'
|
5
|
+
require 'pathname'
|
6
|
+
|
7
|
+
LANGUAGE = [
|
8
|
+
'de',
|
9
|
+
'fr'
|
10
|
+
]
|
11
|
+
|
12
|
+
argv = ARGV.dup
|
13
|
+
opts = Slop.parse! argv, :help => true do
|
14
|
+
banner "Usage: patinfo2csv <input patinfo.yaml> <output patinfo.csv> <input pharmacode.txt> [-l (de|fr)]"
|
15
|
+
on :v, :version, 'Show version'
|
16
|
+
on :l, :lang, 'Language option (de|fr) default de', :argument => true
|
17
|
+
end
|
18
|
+
|
19
|
+
if opts[:lang]
|
20
|
+
lang = opts.options[:lang].argument_value.downcase
|
21
|
+
argv.delete(lang)
|
22
|
+
else
|
23
|
+
lang = "de"
|
24
|
+
end
|
25
|
+
|
26
|
+
if argv.empty? \
|
27
|
+
or !argv.length.eql? 3 \
|
28
|
+
or !File.exist?(argv[0]) \
|
29
|
+
or !File.exist?(argv[2]) \
|
30
|
+
or !LANGUAGE.include?(lang)
|
31
|
+
puts opts.banner
|
32
|
+
exit
|
33
|
+
end
|
34
|
+
|
35
|
+
root = Pathname.new(__FILE__).realpath.parent.parent
|
36
|
+
$:.unshift root.join('lib') if $0 == __FILE__
|
37
|
+
|
38
|
+
if opts.version? then
|
39
|
+
require 'patinfo2csv/version'
|
40
|
+
puts "patinfo2csv " + Patinfo2csv::VERSION + "\n"
|
41
|
+
exit
|
42
|
+
end
|
43
|
+
|
44
|
+
require 'patinfo2csv'
|
45
|
+
Patinfo2csv::CLI.run(argv[0], argv[2], argv[1], lang)
|
@@ -0,0 +1,65 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# encoding: utf-8
|
3
|
+
|
4
|
+
require 'yaml'
|
5
|
+
require 'patinfo2csv'
|
6
|
+
|
7
|
+
module Patinfo2csv
|
8
|
+
class CLI
|
9
|
+
class << self
|
10
|
+
# Param:: patinfo.yaml
|
11
|
+
# Param:: pharmacode.txt
|
12
|
+
# Param:: patinfo.csv
|
13
|
+
def run(patinfo, pharmacode, output, lang)
|
14
|
+
cv = Patinfo2csv::Converter.new()
|
15
|
+
cv.lang = lang
|
16
|
+
cv.patinfo = self.load_yaml(patinfo)
|
17
|
+
cv.pharmacode = self.parse_txt(pharmacode)
|
18
|
+
rows = cv.to_csv
|
19
|
+
self.output_rows(rows, output)
|
20
|
+
report(rows, cv.pharmacode, lang)
|
21
|
+
end
|
22
|
+
def load_yaml(patinfo)
|
23
|
+
# before yaml loading, replace escaped chars
|
24
|
+
file = ''
|
25
|
+
File.open(patinfo, "r:ascii:utf-8") { |f|
|
26
|
+
while line = f.gets
|
27
|
+
ESCAPED_STR_CODE_MAP.each do |code, char|
|
28
|
+
line.gsub!(code, char)
|
29
|
+
end
|
30
|
+
file << line
|
31
|
+
end
|
32
|
+
}
|
33
|
+
fh = StringIO.new file
|
34
|
+
YAML.load_documents(fh) # utf-8
|
35
|
+
end
|
36
|
+
def parse_txt(pharmacode)
|
37
|
+
codes = []
|
38
|
+
File.open(pharmacode, "r:utf-8") do |input|
|
39
|
+
while line=input.gets
|
40
|
+
codes << line.gsub(/[^\d]/, '').chomp
|
41
|
+
end
|
42
|
+
end
|
43
|
+
codes
|
44
|
+
end
|
45
|
+
def output_rows(rows, output)
|
46
|
+
return unless rows
|
47
|
+
open(output, "w:utf-8") do |out|
|
48
|
+
rows.each do |row|
|
49
|
+
out.print row, "\n"
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
def report(rows, nomatches, lang)
|
54
|
+
puts "patinfo[#{lang}] converted: #{(rows.length - 1 ).to_s} rows"
|
55
|
+
unless nomatches.empty?
|
56
|
+
puts "-----"
|
57
|
+
puts "There is no chapters of Patinfo of the following " \
|
58
|
+
"#{nomatches.length.to_s} codes"
|
59
|
+
puts "[#{nomatches.join(',')}]"
|
60
|
+
puts
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
@@ -0,0 +1,79 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# encoding: utf-8
|
3
|
+
|
4
|
+
module Patinfo2csv
|
5
|
+
class Converter
|
6
|
+
attr_accessor :patinfo, :pharmacode, :lang
|
7
|
+
attr_reader :rows
|
8
|
+
CHAPTERS = [
|
9
|
+
:effects,
|
10
|
+
:contra_indications,
|
11
|
+
:pregnancy,
|
12
|
+
:usage,
|
13
|
+
:composition
|
14
|
+
]
|
15
|
+
DELIMITER = ';'
|
16
|
+
def initialize
|
17
|
+
@lang = "de"
|
18
|
+
@patinfo = []
|
19
|
+
@pharmacode = []
|
20
|
+
@rows = []
|
21
|
+
end
|
22
|
+
def to_csv
|
23
|
+
data = []
|
24
|
+
@patinfo.each_with_index do |row, index|
|
25
|
+
target = []
|
26
|
+
next if row['article_codes'].nil?
|
27
|
+
row['article_codes'].each do |code|
|
28
|
+
if @pharmacode.include?(code[:article_pcode])
|
29
|
+
target << code[:article_pcode]
|
30
|
+
@pharmacode.delete(code[:article_pcode])
|
31
|
+
end
|
32
|
+
end
|
33
|
+
next if target.empty?
|
34
|
+
chapters = extract_chapters(row)
|
35
|
+
next if chapters.empty?
|
36
|
+
target.each do |pcode|
|
37
|
+
@rows << [%Q!"#{pcode}"!, chapters].flatten.join(DELIMITER)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
unless @rows.empty? #header
|
41
|
+
@rows.unshift([
|
42
|
+
"Pharmacode",
|
43
|
+
"Effects",
|
44
|
+
"Contra Indication",
|
45
|
+
"Pregnancy",
|
46
|
+
"Usage",
|
47
|
+
"Composition"
|
48
|
+
].join(DELIMITER))
|
49
|
+
end
|
50
|
+
return @rows
|
51
|
+
end
|
52
|
+
def extract_chapters(row)
|
53
|
+
return nil if row['descriptions'][@lang].empty?
|
54
|
+
desc = row['descriptions'][@lang]
|
55
|
+
chapters = []
|
56
|
+
CHAPTERS.each do |chapter|
|
57
|
+
if (desc.has_key?(chapter.to_s) and !desc[chapter.to_s].nil?) and
|
58
|
+
!desc[chapter.to_s]['sections'].nil?
|
59
|
+
text = ''
|
60
|
+
desc[chapter.to_s]['sections'].each do |section|
|
61
|
+
text << section['subheading']
|
62
|
+
if section.has_key?('paragraphs')
|
63
|
+
section['paragraphs'].each do |paragraph|
|
64
|
+
unless paragraph['text'].empty?
|
65
|
+
text << paragraph['text'].gsub(/\r\n|\r/, "\n") + "\n"
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
text << "\n"
|
70
|
+
end
|
71
|
+
else
|
72
|
+
text = "" #empty
|
73
|
+
end
|
74
|
+
chapters << %Q!"#{text.chomp}"!
|
75
|
+
end
|
76
|
+
chapters.join(DELIMITER)
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
data/lib/patinfo2csv.rb
ADDED
@@ -0,0 +1,91 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# encoding: utf-8
|
3
|
+
|
4
|
+
require 'patinfo2csv/version'
|
5
|
+
require 'patinfo2csv/cli'
|
6
|
+
require 'patinfo2csv/converter'
|
7
|
+
|
8
|
+
module Patinfo2csv
|
9
|
+
# FIXME
|
10
|
+
# escaped backslash and utf-8 code map
|
11
|
+
# for patinfo.yaml(ASII)
|
12
|
+
ESCAPED_STR_CODE_MAP = {
|
13
|
+
"\\x24" => "$",
|
14
|
+
"\\x25" => "%",
|
15
|
+
"\\x26" => "&",
|
16
|
+
"\\x2D" => "-",
|
17
|
+
"\\x5F" => "_",
|
18
|
+
"\\xC2\\xA0" => " ",
|
19
|
+
"\\xC2\\xAB" => "«",
|
20
|
+
"\\xC2\\xBB" => "»",
|
21
|
+
"\\xC2\\xAE" => "®",
|
22
|
+
"\\xC2\\xA9" => "©",
|
23
|
+
"\\xC2\\xB5" => "µ",
|
24
|
+
"\\xC2\\xBD" => "½",
|
25
|
+
"\\xC2\\xBC" => "¼",
|
26
|
+
"\\xC2\\xBE" => "¾",
|
27
|
+
"\\xC3\\x82" => "Â",
|
28
|
+
"\\xC3\\x83" => "Ã",
|
29
|
+
"\\xC3\\x84" => "Ä",
|
30
|
+
"\\xC3\\x85" => "Å",
|
31
|
+
"\\xC3\\x86" => "Æ",
|
32
|
+
"\\xC3\\x87" => "Ç",
|
33
|
+
"\\xC3\\x88" => "È",
|
34
|
+
"\\xC3\\x89" => "É",
|
35
|
+
"\\xC3\\x8A" => "Ê",
|
36
|
+
"\\xC3\\x8B" => "Ë",
|
37
|
+
"\\xC3\\x8C" => "Ì",
|
38
|
+
"\\xC3\\x8D" => "Í",
|
39
|
+
"\\xC3\\x8E" => "Î",
|
40
|
+
"\\xC3\\x8F" => "Ï",
|
41
|
+
"\\xC3\\x90" => "Ð",
|
42
|
+
"\\xC3\\x91" => "Ñ",
|
43
|
+
"\\xC3\\x92" => "Ò",
|
44
|
+
"\\xC3\\x93" => "Ó",
|
45
|
+
"\\xC3\\x94" => "Ô",
|
46
|
+
"\\xC3\\x95" => "Õ",
|
47
|
+
"\\xC3\\x96" => "Ö",
|
48
|
+
"\\xC3\\x97" => "×",
|
49
|
+
"\\xC3\\x98" => "Ø",
|
50
|
+
"\\xC3\\x99" => "Ù",
|
51
|
+
"\\xC3\\x9A" => "Ú",
|
52
|
+
"\\xC3\\x9B" => "Û",
|
53
|
+
"\\xC3\\x9C" => "Ü",
|
54
|
+
"\\xC3\\x9D" => "Ý",
|
55
|
+
"\\xC3\\x9E" => "Þ",
|
56
|
+
"\\xC3\\x9F" => "ß",
|
57
|
+
"\\xC3\\xA0" => "à",
|
58
|
+
"\\xC3\\xA1" => "á",
|
59
|
+
"\\xC3\\xA2" => "â",
|
60
|
+
"\\xC3\\xA3" => "ã",
|
61
|
+
"\\xC3\\xA4" => "ä",
|
62
|
+
"\\xC3\\xA5" => "å",
|
63
|
+
"\\xC3\\xA6" => "æ",
|
64
|
+
"\\xC3\\xA7" => "ç",
|
65
|
+
"\\xC3\\xA8" => "è",
|
66
|
+
"\\xC3\\xA9" => "é",
|
67
|
+
"\\xC3\\xAA" => "ê",
|
68
|
+
"\\xC3\\xAB" => "ë",
|
69
|
+
"\\xC3\\xAC" => "ì",
|
70
|
+
"\\xC3\\xAD" => "í",
|
71
|
+
"\\xC3\\xAE" => "î",
|
72
|
+
"\\xC3\\xAF" => "ï",
|
73
|
+
"\\xC3\\xB0" => "ð",
|
74
|
+
"\\xC3\\xB1" => "ñ",
|
75
|
+
"\\xC3\\xB2" => "ò",
|
76
|
+
"\\xC3\\xB3" => "ó",
|
77
|
+
"\\xC3\\xB4" => "ô",
|
78
|
+
"\\xC3\\xB5" => "õ",
|
79
|
+
"\\xC3\\xB6" => "ö",
|
80
|
+
"\\xC3\\xB7" => "÷",
|
81
|
+
"\\xC3\\xB8" => "ø",
|
82
|
+
"\\xC3\\xB9" => "ù",
|
83
|
+
"\\xC3\\xBA" => "ú",
|
84
|
+
"\\xC3\\xBB" => "û",
|
85
|
+
"\\xC3\\xBC" => "ü",
|
86
|
+
"\\xC3\\xBD" => "ý",
|
87
|
+
"\\xC3\\xBE" => "þ",
|
88
|
+
"\\xC3\\xBF" => "ÿ",
|
89
|
+
"\\xE2\\x80\\x93" => "–",
|
90
|
+
}
|
91
|
+
end
|
data/lib/version.rb
ADDED
metadata
ADDED
@@ -0,0 +1,99 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: patinfo2csv
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.0
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Yasuhiro Asaka, Zeno R.R. Davatz
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2012-03-22 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: rdoc
|
16
|
+
requirement: &25013040 !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ~>
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '3.10'
|
22
|
+
type: :development
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: *25013040
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: hoe
|
27
|
+
requirement: &25012580 !ruby/object:Gem::Requirement
|
28
|
+
none: false
|
29
|
+
requirements:
|
30
|
+
- - ~>
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: '2.13'
|
33
|
+
type: :development
|
34
|
+
prerelease: false
|
35
|
+
version_requirements: *25012580
|
36
|
+
description: ! 'Patinfo2csv extracts and converts patinfo.yaml to patinfo.csv;
|
37
|
+
|
38
|
+
only 4 chapters(de, text) in patinfo.yaml are extracted.
|
39
|
+
|
40
|
+
|
41
|
+
* effects
|
42
|
+
|
43
|
+
* contra_indications
|
44
|
+
|
45
|
+
* pregnancy
|
46
|
+
|
47
|
+
* usage
|
48
|
+
|
49
|
+
* composition
|
50
|
+
|
51
|
+
|
52
|
+
You can specify the pharmacode, see `patinfo2csv --help`'
|
53
|
+
email:
|
54
|
+
- yasaka@ywesee.com, zdavatz@ywesee.com
|
55
|
+
executables:
|
56
|
+
- patinfo2csv
|
57
|
+
extensions: []
|
58
|
+
extra_rdoc_files:
|
59
|
+
- History.txt
|
60
|
+
- Manifest.txt
|
61
|
+
- README.txt
|
62
|
+
files:
|
63
|
+
- History.txt
|
64
|
+
- Manifest.txt
|
65
|
+
- README.txt
|
66
|
+
- Rakefile
|
67
|
+
- bin/patinfo2csv
|
68
|
+
- lib/patinfo2csv.rb
|
69
|
+
- lib/patinfo2csv/cli.rb
|
70
|
+
- lib/patinfo2csv/converter.rb
|
71
|
+
- lib/version.rb
|
72
|
+
homepage: http://scm.ywesee.com/?p=patinfo2csv/.git;a=summary
|
73
|
+
licenses: []
|
74
|
+
post_install_message:
|
75
|
+
rdoc_options:
|
76
|
+
- --main
|
77
|
+
- README.txt
|
78
|
+
require_paths:
|
79
|
+
- lib
|
80
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
81
|
+
none: false
|
82
|
+
requirements:
|
83
|
+
- - ! '>='
|
84
|
+
- !ruby/object:Gem::Version
|
85
|
+
version: '0'
|
86
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
87
|
+
none: false
|
88
|
+
requirements:
|
89
|
+
- - ! '>='
|
90
|
+
- !ruby/object:Gem::Version
|
91
|
+
version: '0'
|
92
|
+
requirements: []
|
93
|
+
rubyforge_project: patinfo2csv
|
94
|
+
rubygems_version: 1.8.15
|
95
|
+
signing_key:
|
96
|
+
specification_version: 3
|
97
|
+
summary: Patinfo2csv extracts and converts patinfo.yaml to patinfo.csv; only 4 chapters(de,
|
98
|
+
text) in patinfo.yaml are extracted
|
99
|
+
test_files: []
|