patinfo2csv 1.0.4 → 1.0.5
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +4 -0
- data/lib/patinfo2csv.rb +64 -63
- data/lib/patinfo2csv/cli.rb +10 -21
- data/lib/patinfo2csv/converter.rb +35 -18
- data/lib/version.rb +1 -1
- metadata +6 -6
data/History.txt
CHANGED
data/lib/patinfo2csv.rb
CHANGED
@@ -3,13 +3,14 @@
|
|
3
3
|
|
4
4
|
require 'version'
|
5
5
|
require 'patinfo2csv/cli'
|
6
|
+
require 'patinfo2csv/loader'
|
6
7
|
require 'patinfo2csv/converter'
|
7
8
|
|
8
9
|
module Patinfo2csv
|
9
|
-
#
|
10
|
+
# NOTE
|
10
11
|
# escaped backslash and utf-8 code map
|
11
12
|
# for patinfo.yaml(ASCII)
|
12
|
-
|
13
|
+
ESCAPED_CHAR_CODE_MAP = {
|
13
14
|
/\\x24/ => "$",
|
14
15
|
/\\x25/ => "%",
|
15
16
|
/\\x26/ => "&",
|
@@ -26,67 +27,67 @@ module Patinfo2csv
|
|
26
27
|
/\\xC2\\xBE/ => "¾",
|
27
28
|
/\\xC2\\x[A-z0-9]{2}/ => "",
|
28
29
|
/\\xC3\\x82/ => "Â",
|
29
|
-
|
30
|
-
/\\xC3\\x84/ => "Ä",
|
31
|
-
|
32
|
-
|
33
|
-
/\\xC3\\x87/ => "Ç",
|
34
|
-
/\\xC3\\x88/ => "È",
|
35
|
-
/\\xC3\\x89/ => "É",
|
36
|
-
/\\xC3\\x8A/ => "Ê",
|
37
|
-
/\\xC3\\x8B/ => "Ë",
|
38
|
-
/\\xC3\\x8C/ => "Ì",
|
39
|
-
/\\xC3\\x8D/ => "Í",
|
40
|
-
/\\xC3\\x8E/ => "Î",
|
41
|
-
/\\xC3\\x8F/ => "Ï",
|
30
|
+
# /\\xC3\\x83/ => "Ã",
|
31
|
+
/\\xC3\\x84/ => "Ä",
|
32
|
+
# /\\xC3\\x85/ => "Å",
|
33
|
+
# /\\xC3\\x86/ => "Æ",
|
34
|
+
/\\xC3\\x87/ => "Ç",
|
35
|
+
/\\xC3\\x88/ => "È",
|
36
|
+
/\\xC3\\x89/ => "É",
|
37
|
+
/\\xC3\\x8A/ => "Ê",
|
38
|
+
/\\xC3\\x8B/ => "Ë",
|
39
|
+
/\\xC3\\x8C/ => "Ì",
|
40
|
+
/\\xC3\\x8D/ => "Í",
|
41
|
+
/\\xC3\\x8E/ => "Î",
|
42
|
+
/\\xC3\\x8F/ => "Ï",
|
42
43
|
/\\xC3\\x90/ => "Ð",
|
43
|
-
/\\xC3\\x91/ => "Ñ",
|
44
|
-
/\\xC3\\x92/ => "Ò",
|
45
|
-
/\\xC3\\x93/ => "Ó",
|
46
|
-
/\\xC3\\x94/ => "Ô",
|
47
|
-
/\\xC3\\x95/ => "Õ",
|
48
|
-
/\\xC3\\x96/ => "Ö",
|
49
|
-
/\\xC3\\x97/ => "×",
|
50
|
-
/\\xC3\\x98/ => "Ø",
|
51
|
-
/\\xC3\\x99/ => "Ù",
|
52
|
-
/\\xC3\\x9A/ => "Ú",
|
53
|
-
/\\xC3\\x9B/ => "Û",
|
54
|
-
/\\xC3\\x9C/ => "Ü",
|
55
|
-
/\\xC3\\x9D/ => "Ý",
|
56
|
-
/\\xC3\\x9E/ => "Þ",
|
57
|
-
/\\xC3\\x9F/ => "ß",
|
58
|
-
/\\xC3\\xA0/ => "à",
|
59
|
-
/\\xC3\\xA1/ => "á",
|
60
|
-
/\\xC3\\xA2/ => "â",
|
61
|
-
/\\xC3\\xA3/ => "ã",
|
62
|
-
/\\xC3\\xA4/ => "ä",
|
63
|
-
/\\xC3\\xA5/ => "å",
|
64
|
-
/\\xC3\\xA6/ => "æ",
|
65
|
-
/\\xC3\\xA7/ => "ç",
|
66
|
-
/\\xC3\\xA8/ => "è",
|
67
|
-
/\\xC3\\xA9/ => "é",
|
68
|
-
/\\xC3\\xAA/ => "ê",
|
69
|
-
/\\xC3\\xAB/ => "ë",
|
70
|
-
/\\xC3\\xAC/ => "ì",
|
71
|
-
/\\xC3\\xAD/ => "í",
|
72
|
-
/\\xC3\\xAE/ => "î",
|
73
|
-
/\\xC3\\xAF/ => "ï",
|
74
|
-
/\\xC3\\xB0/ => "ð",
|
75
|
-
/\\xC3\\xB1/ => "ñ",
|
76
|
-
/\\xC3\\xB2/ => "ò",
|
77
|
-
/\\xC3\\xB3/ => "ó",
|
78
|
-
/\\xC3\\xB4/ => "ô",
|
79
|
-
/\\xC3\\xB5/ => "õ",
|
80
|
-
/\\xC3\\xB6/ => "ö",
|
81
|
-
/\\xC3\\xB7/ => "÷",
|
82
|
-
/\\xC3\\xB8/ => "ø",
|
83
|
-
/\\xC3\\xB9/ => "ù",
|
84
|
-
/\\xC3\\xBA/ => "ú",
|
85
|
-
/\\xC3\\xBB/ => "û",
|
86
|
-
/\\xC3\\xBC/ => "ü",
|
87
|
-
/\\xC3\\xBD/ => "ý",
|
88
|
-
/\\xC3\\xBE/ => "þ",
|
89
|
-
/\\xC3\\xBF/ => "ÿ",
|
44
|
+
# /\\xC3\\x91/ => "Ñ",
|
45
|
+
/\\xC3\\x92/ => "Ò",
|
46
|
+
/\\xC3\\x93/ => "Ó",
|
47
|
+
/\\xC3\\x94/ => "Ô",
|
48
|
+
# /\\xC3\\x95/ => "Õ",
|
49
|
+
/\\xC3\\x96/ => "Ö",
|
50
|
+
/\\xC3\\x97/ => "×",
|
51
|
+
# /\\xC3\\x98/ => "Ø",
|
52
|
+
/\\xC3\\x99/ => "Ù",
|
53
|
+
/\\xC3\\x9A/ => "Ú",
|
54
|
+
/\\xC3\\x9B/ => "Û",
|
55
|
+
/\\xC3\\x9C/ => "Ü",
|
56
|
+
/\\xC3\\x9D/ => "Ý",
|
57
|
+
# /\\xC3\\x9E/ => "Þ",
|
58
|
+
/\\xC3\\x9F/ => "ß",
|
59
|
+
/\\xC3\\xA0/ => "à",
|
60
|
+
/\\xC3\\xA1/ => "á",
|
61
|
+
/\\xC3\\xA2/ => "â",
|
62
|
+
# /\\xC3\\xA3/ => "ã",
|
63
|
+
/\\xC3\\xA4/ => "ä",
|
64
|
+
# /\\xC3\\xA5/ => "å",
|
65
|
+
# /\\xC3\\xA6/ => "æ",
|
66
|
+
/\\xC3\\xA7/ => "ç",
|
67
|
+
/\\xC3\\xA8/ => "è",
|
68
|
+
/\\xC3\\xA9/ => "é",
|
69
|
+
/\\xC3\\xAA/ => "ê",
|
70
|
+
/\\xC3\\xAB/ => "ë",
|
71
|
+
/\\xC3\\xAC/ => "ì",
|
72
|
+
/\\xC3\\xAD/ => "í",
|
73
|
+
/\\xC3\\xAE/ => "î",
|
74
|
+
/\\xC3\\xAF/ => "ï",
|
75
|
+
# /\\xC3\\xB0/ => "ð",
|
76
|
+
# /\\xC3\\xB1/ => "ñ",
|
77
|
+
/\\xC3\\xB2/ => "ò",
|
78
|
+
/\\xC3\\xB3/ => "ó",
|
79
|
+
/\\xC3\\xB4/ => "ô",
|
80
|
+
# /\\xC3\\xB5/ => "õ",
|
81
|
+
/\\xC3\\xB6/ => "ö",
|
82
|
+
/\\xC3\\xB7/ => "÷",
|
83
|
+
# /\\xC3\\xB8/ => "ø",
|
84
|
+
/\\xC3\\xB9/ => "ù",
|
85
|
+
/\\xC3\\xBA/ => "ú",
|
86
|
+
/\\xC3\\xBB/ => "û",
|
87
|
+
/\\xC3\\xBC/ => "ü",
|
88
|
+
/\\xC3\\xBD/ => "ý",
|
89
|
+
# /\\xC3\\xBE/ => "þ",
|
90
|
+
/\\xC3\\xBF/ => "ÿ",
|
90
91
|
/\\xC3\\x[A-z0-9]{2}/ => "",
|
91
92
|
/\\xE2\\x80\\x90/ => "‐",
|
92
93
|
/\\xE2\\x80\\x91/ => "‑",
|
@@ -103,7 +104,7 @@ module Patinfo2csv
|
|
103
104
|
/\\xE2\\x80\\x9E/ => "„",
|
104
105
|
/\\xE2\\x80\\x9F/ => "‟",
|
105
106
|
/\\xE2\\x80\\xA6/ => "…",
|
106
|
-
/\\xE2\\x80\\xB0/ => "‰",
|
107
|
+
/\\xE2\\x80\\xB0/ => "‰",
|
107
108
|
/\\xE2\\x80\\xB1/ => "‱",
|
108
109
|
/\\xE2\\x80\\xB2/ => "′",
|
109
110
|
/\\xE2\\x80\\xB3/ => "″",
|
data/lib/patinfo2csv/cli.rb
CHANGED
@@ -12,32 +12,21 @@ module Patinfo2csv
|
|
12
12
|
# Param:: output_file patinfo.csv
|
13
13
|
# Param:: lang (de|fr)
|
14
14
|
def run(patinfo_yaml, code_txt, output_file, lang)
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
15
|
+
start = Time.now
|
16
|
+
loader = Patinfo2csv::Loader.new()
|
17
|
+
converter = Patinfo2csv::Converter.new()
|
18
|
+
converter.patinfos = loader.load_yaml(patinfo_yaml)
|
19
|
+
converter.lang = lang
|
20
|
+
converter.codes = self.parse_txt(code_txt)
|
21
|
+
rows = converter.to_csv
|
20
22
|
self.output_rows(rows, output_file)
|
21
|
-
report(rows,
|
22
|
-
|
23
|
-
def load_yaml(patinfo_yaml)
|
24
|
-
# before yaml loading, replace escaped chars
|
25
|
-
file = ''
|
26
|
-
File.open(patinfo_yaml, "r:ascii:utf-8") { |f|
|
27
|
-
while line = f.gets
|
28
|
-
ESCAPED_STR_CODE_MAP.each do |code, char|
|
29
|
-
line.gsub!(code, char)
|
30
|
-
end
|
31
|
-
file << line
|
32
|
-
end
|
33
|
-
}
|
34
|
-
fh = StringIO.new file
|
35
|
-
YAML.load_documents(fh) # utf-8
|
23
|
+
report(rows, converter.codes, lang)
|
24
|
+
puts "#{Time.now - start} sec."
|
36
25
|
end
|
37
26
|
def parse_txt(code_txt)
|
38
27
|
codes = []
|
39
28
|
File.open(code_txt, "r:utf-8") do |input|
|
40
|
-
while line=input.gets
|
29
|
+
while line = input.gets
|
41
30
|
codes << line.gsub(/[^\d]/, '').chomp
|
42
31
|
end
|
43
32
|
end
|
@@ -1,6 +1,8 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
# encoding: utf-8
|
3
3
|
|
4
|
+
require 'thread'
|
5
|
+
|
4
6
|
module Patinfo2csv
|
5
7
|
class Converter
|
6
8
|
attr_accessor :patinfos, :codes, :lang
|
@@ -13,33 +15,48 @@ module Patinfo2csv
|
|
13
15
|
:composition
|
14
16
|
]
|
15
17
|
DELIMITER = ';'
|
18
|
+
WORKERS = 2
|
16
19
|
def initialize
|
20
|
+
@mutex = Mutex.new
|
21
|
+
@q = Queue.new
|
17
22
|
@lang = "de"
|
18
23
|
@patinfos = []
|
19
24
|
@codes = []
|
20
25
|
@rows = []
|
21
26
|
end
|
22
27
|
def to_csv
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
28
|
+
@patinfos.each do |row|
|
29
|
+
@q << row
|
30
|
+
end
|
31
|
+
Array.new(WORKERS) do
|
32
|
+
@q << nil
|
33
|
+
Thread.new do
|
34
|
+
while row = @q.pop
|
35
|
+
target = []
|
36
|
+
next if row['article_codes'].nil?
|
37
|
+
row['article_codes'].each do |article|
|
38
|
+
@mutex.synchronize do
|
39
|
+
if @codes.include?(article[:article_ean13]) # EAN
|
40
|
+
target << article[:article_ean13]
|
41
|
+
@codes.delete(article[:article_ean13])
|
42
|
+
end
|
43
|
+
#elsif @code.include?(code[:article_pcode]) # Pharmacode
|
44
|
+
# target << code[:article_pcode]
|
45
|
+
# @code.delete(code[:article_pcode])
|
46
|
+
#end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
next if target.empty?
|
50
|
+
chapters = extract_chapters(row)
|
51
|
+
next if chapters.empty?
|
52
|
+
target.each do |code|
|
53
|
+
@mutex.synchronize do
|
54
|
+
@rows << [%Q!"#{code}"!, chapters].flatten.join(DELIMITER)
|
55
|
+
end
|
56
|
+
end
|
34
57
|
end
|
35
58
|
end
|
36
|
-
|
37
|
-
chapters = extract_chapters(row)
|
38
|
-
next if chapters.empty?
|
39
|
-
target.each do |code|
|
40
|
-
@rows << [%Q!"#{code}"!, chapters].flatten.join(DELIMITER)
|
41
|
-
end
|
42
|
-
end
|
59
|
+
end.map(&:join)
|
43
60
|
unless @rows.empty? #header
|
44
61
|
@rows.unshift([
|
45
62
|
"EAN",
|
data/lib/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: patinfo2csv
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.5
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-03-
|
12
|
+
date: 2012-03-26 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rdoc
|
16
|
-
requirement: &
|
16
|
+
requirement: &21274100 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ~>
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: '3.10'
|
22
22
|
type: :development
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *21274100
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: hoe
|
27
|
-
requirement: &
|
27
|
+
requirement: &21273660 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ~>
|
@@ -32,7 +32,7 @@ dependencies:
|
|
32
32
|
version: '2.13'
|
33
33
|
type: :development
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *21273660
|
36
36
|
description: ! 'Patinfo2csv extracts and converts patinfo.yaml to patinfo.csv;
|
37
37
|
|
38
38
|
only 4 chapters(de, text) in patinfo.yaml are extracted.
|