patinfo2csv 1.0.4 → 1.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/History.txt CHANGED
@@ -1,3 +1,7 @@
1
+ === 1.0.5 / 26.03.2012
2
+
3
+ * Refactor character replacing and chapter choosing
4
+
1
5
  === 1.0.4 / 23.03.2012
2
6
 
3
7
  * Deleted last <br /> tag in chapter
data/lib/patinfo2csv.rb CHANGED
@@ -3,13 +3,14 @@
3
3
 
4
4
  require 'version'
5
5
  require 'patinfo2csv/cli'
6
+ require 'patinfo2csv/loader'
6
7
  require 'patinfo2csv/converter'
7
8
 
8
9
  module Patinfo2csv
9
- # FIXME
10
+ # NOTE
10
11
  # escaped backslash and utf-8 code map
11
12
  # for patinfo.yaml(ASCII)
12
- ESCAPED_STR_CODE_MAP = {
13
+ ESCAPED_CHAR_CODE_MAP = {
13
14
  /\\x24/ => "$",
14
15
  /\\x25/ => "%",
15
16
  /\\x26/ => "&",
@@ -26,67 +27,67 @@ module Patinfo2csv
26
27
  /\\xC2\\xBE/ => "¾",
27
28
  /\\xC2\\x[A-z0-9]{2}/ => "",
28
29
  /\\xC3\\x82/ => "Â",
29
- /\\xC3\\x83/ => "Ã",
30
- /\\xC3\\x84/ => "Ä",
31
- /\\xC3\\x85/ => "Å",
32
- /\\xC3\\x86/ => "Æ",
33
- /\\xC3\\x87/ => "Ç",
34
- /\\xC3\\x88/ => "È",
35
- /\\xC3\\x89/ => "É",
36
- /\\xC3\\x8A/ => "Ê",
37
- /\\xC3\\x8B/ => "Ë",
38
- /\\xC3\\x8C/ => "Ì",
39
- /\\xC3\\x8D/ => "Í",
40
- /\\xC3\\x8E/ => "Î",
41
- /\\xC3\\x8F/ => "Ï",
30
+ # /\\xC3\\x83/ => "Ã",
31
+ /\\xC3\\x84/ => "Ä",
32
+ # /\\xC3\\x85/ => "Å",
33
+ # /\\xC3\\x86/ => "Æ",
34
+ /\\xC3\\x87/ => "Ç",
35
+ /\\xC3\\x88/ => "È",
36
+ /\\xC3\\x89/ => "É",
37
+ /\\xC3\\x8A/ => "Ê",
38
+ /\\xC3\\x8B/ => "Ë",
39
+ /\\xC3\\x8C/ => "Ì",
40
+ /\\xC3\\x8D/ => "Í",
41
+ /\\xC3\\x8E/ => "Î",
42
+ /\\xC3\\x8F/ => "Ï",
42
43
  /\\xC3\\x90/ => "Ð",
43
- /\\xC3\\x91/ => "Ñ",
44
- /\\xC3\\x92/ => "Ò",
45
- /\\xC3\\x93/ => "Ó",
46
- /\\xC3\\x94/ => "Ô",
47
- /\\xC3\\x95/ => "Õ",
48
- /\\xC3\\x96/ => "Ö",
49
- /\\xC3\\x97/ => "×",
50
- /\\xC3\\x98/ => "Ø",
51
- /\\xC3\\x99/ => "Ù",
52
- /\\xC3\\x9A/ => "Ú",
53
- /\\xC3\\x9B/ => "Û",
54
- /\\xC3\\x9C/ => "Ü",
55
- /\\xC3\\x9D/ => "Ý",
56
- /\\xC3\\x9E/ => "Þ",
57
- /\\xC3\\x9F/ => "ß",
58
- /\\xC3\\xA0/ => "à",
59
- /\\xC3\\xA1/ => "á",
60
- /\\xC3\\xA2/ => "â",
61
- /\\xC3\\xA3/ => "ã",
62
- /\\xC3\\xA4/ => "ä",
63
- /\\xC3\\xA5/ => "å",
64
- /\\xC3\\xA6/ => "æ",
65
- /\\xC3\\xA7/ => "ç",
66
- /\\xC3\\xA8/ => "è",
67
- /\\xC3\\xA9/ => "é",
68
- /\\xC3\\xAA/ => "ê",
69
- /\\xC3\\xAB/ => "ë",
70
- /\\xC3\\xAC/ => "ì",
71
- /\\xC3\\xAD/ => "í",
72
- /\\xC3\\xAE/ => "î",
73
- /\\xC3\\xAF/ => "ï",
74
- /\\xC3\\xB0/ => "ð",
75
- /\\xC3\\xB1/ => "ñ",
76
- /\\xC3\\xB2/ => "ò",
77
- /\\xC3\\xB3/ => "ó",
78
- /\\xC3\\xB4/ => "ô",
79
- /\\xC3\\xB5/ => "õ",
80
- /\\xC3\\xB6/ => "ö",
81
- /\\xC3\\xB7/ => "÷",
82
- /\\xC3\\xB8/ => "ø",
83
- /\\xC3\\xB9/ => "ù",
84
- /\\xC3\\xBA/ => "ú",
85
- /\\xC3\\xBB/ => "û",
86
- /\\xC3\\xBC/ => "ü",
87
- /\\xC3\\xBD/ => "ý",
88
- /\\xC3\\xBE/ => "þ",
89
- /\\xC3\\xBF/ => "ÿ",
44
+ # /\\xC3\\x91/ => "Ñ",
45
+ /\\xC3\\x92/ => "Ò",
46
+ /\\xC3\\x93/ => "Ó",
47
+ /\\xC3\\x94/ => "Ô",
48
+ # /\\xC3\\x95/ => "Õ",
49
+ /\\xC3\\x96/ => "Ö",
50
+ /\\xC3\\x97/ => "×",
51
+ # /\\xC3\\x98/ => "Ø",
52
+ /\\xC3\\x99/ => "Ù",
53
+ /\\xC3\\x9A/ => "Ú",
54
+ /\\xC3\\x9B/ => "Û",
55
+ /\\xC3\\x9C/ => "Ü",
56
+ /\\xC3\\x9D/ => "Ý",
57
+ # /\\xC3\\x9E/ => "Þ",
58
+ /\\xC3\\x9F/ => "ß",
59
+ /\\xC3\\xA0/ => "à",
60
+ /\\xC3\\xA1/ => "á",
61
+ /\\xC3\\xA2/ => "â",
62
+ # /\\xC3\\xA3/ => "ã",
63
+ /\\xC3\\xA4/ => "ä",
64
+ # /\\xC3\\xA5/ => "å",
65
+ # /\\xC3\\xA6/ => "æ",
66
+ /\\xC3\\xA7/ => "ç",
67
+ /\\xC3\\xA8/ => "è",
68
+ /\\xC3\\xA9/ => "é",
69
+ /\\xC3\\xAA/ => "ê",
70
+ /\\xC3\\xAB/ => "ë",
71
+ /\\xC3\\xAC/ => "ì",
72
+ /\\xC3\\xAD/ => "í",
73
+ /\\xC3\\xAE/ => "î",
74
+ /\\xC3\\xAF/ => "ï",
75
+ # /\\xC3\\xB0/ => "ð",
76
+ # /\\xC3\\xB1/ => "ñ",
77
+ /\\xC3\\xB2/ => "ò",
78
+ /\\xC3\\xB3/ => "ó",
79
+ /\\xC3\\xB4/ => "ô",
80
+ # /\\xC3\\xB5/ => "õ",
81
+ /\\xC3\\xB6/ => "ö",
82
+ /\\xC3\\xB7/ => "÷",
83
+ # /\\xC3\\xB8/ => "ø",
84
+ /\\xC3\\xB9/ => "ù",
85
+ /\\xC3\\xBA/ => "ú",
86
+ /\\xC3\\xBB/ => "û",
87
+ /\\xC3\\xBC/ => "ü",
88
+ /\\xC3\\xBD/ => "ý",
89
+ # /\\xC3\\xBE/ => "þ",
90
+ /\\xC3\\xBF/ => "ÿ",
90
91
  /\\xC3\\x[A-z0-9]{2}/ => "",
91
92
  /\\xE2\\x80\\x90/ => "‐",
92
93
  /\\xE2\\x80\\x91/ => "‑",
@@ -103,7 +104,7 @@ module Patinfo2csv
103
104
  /\\xE2\\x80\\x9E/ => "„",
104
105
  /\\xE2\\x80\\x9F/ => "‟",
105
106
  /\\xE2\\x80\\xA6/ => "…",
106
- /\\xE2\\x80\\xB0/ => "‰",
107
+ /\\xE2\\x80\\xB0/ => "‰",
107
108
  /\\xE2\\x80\\xB1/ => "‱",
108
109
  /\\xE2\\x80\\xB2/ => "′",
109
110
  /\\xE2\\x80\\xB3/ => "″",
@@ -12,32 +12,21 @@ module Patinfo2csv
12
12
  # Param:: output_file patinfo.csv
13
13
  # Param:: lang (de|fr)
14
14
  def run(patinfo_yaml, code_txt, output_file, lang)
15
- cv = Patinfo2csv::Converter.new()
16
- cv.lang = lang
17
- cv.patinfos = self.load_yaml(patinfo_yaml)
18
- cv.codes = self.parse_txt(code_txt)
19
- rows = cv.to_csv
15
+ start = Time.now
16
+ loader = Patinfo2csv::Loader.new()
17
+ converter = Patinfo2csv::Converter.new()
18
+ converter.patinfos = loader.load_yaml(patinfo_yaml)
19
+ converter.lang = lang
20
+ converter.codes = self.parse_txt(code_txt)
21
+ rows = converter.to_csv
20
22
  self.output_rows(rows, output_file)
21
- report(rows, cv.codes, lang)
22
- end
23
- def load_yaml(patinfo_yaml)
24
- # before yaml loading, replace escaped chars
25
- file = ''
26
- File.open(patinfo_yaml, "r:ascii:utf-8") { |f|
27
- while line = f.gets
28
- ESCAPED_STR_CODE_MAP.each do |code, char|
29
- line.gsub!(code, char)
30
- end
31
- file << line
32
- end
33
- }
34
- fh = StringIO.new file
35
- YAML.load_documents(fh) # utf-8
23
+ report(rows, converter.codes, lang)
24
+ puts "#{Time.now - start} sec."
36
25
  end
37
26
  def parse_txt(code_txt)
38
27
  codes = []
39
28
  File.open(code_txt, "r:utf-8") do |input|
40
- while line=input.gets
29
+ while line = input.gets
41
30
  codes << line.gsub(/[^\d]/, '').chomp
42
31
  end
43
32
  end
@@ -1,6 +1,8 @@
1
1
  #!/usr/bin/env ruby
2
2
  # encoding: utf-8
3
3
 
4
+ require 'thread'
5
+
4
6
  module Patinfo2csv
5
7
  class Converter
6
8
  attr_accessor :patinfos, :codes, :lang
@@ -13,33 +15,48 @@ module Patinfo2csv
13
15
  :composition
14
16
  ]
15
17
  DELIMITER = ';'
18
+ WORKERS = 2
16
19
  def initialize
20
+ @mutex = Mutex.new
21
+ @q = Queue.new
17
22
  @lang = "de"
18
23
  @patinfos = []
19
24
  @codes = []
20
25
  @rows = []
21
26
  end
22
27
  def to_csv
23
- data = []
24
- @patinfos.each_with_index do |row, index|
25
- target = []
26
- next if row['article_codes'].nil?
27
- row['article_codes'].each do |article|
28
- if @codes.include?(article[:article_ean13]) # EAN
29
- target << article[:article_ean13]
30
- @codes.delete(article[:article_ean13])
31
- #elsif @code.include?(code[:article_pcode]) # Pharmacode
32
- # target << code[:article_pcode]
33
- # @code.delete(code[:article_pcode])
28
+ @patinfos.each do |row|
29
+ @q << row
30
+ end
31
+ Array.new(WORKERS) do
32
+ @q << nil
33
+ Thread.new do
34
+ while row = @q.pop
35
+ target = []
36
+ next if row['article_codes'].nil?
37
+ row['article_codes'].each do |article|
38
+ @mutex.synchronize do
39
+ if @codes.include?(article[:article_ean13]) # EAN
40
+ target << article[:article_ean13]
41
+ @codes.delete(article[:article_ean13])
42
+ end
43
+ #elsif @code.include?(code[:article_pcode]) # Pharmacode
44
+ # target << code[:article_pcode]
45
+ # @code.delete(code[:article_pcode])
46
+ #end
47
+ end
48
+ end
49
+ next if target.empty?
50
+ chapters = extract_chapters(row)
51
+ next if chapters.empty?
52
+ target.each do |code|
53
+ @mutex.synchronize do
54
+ @rows << [%Q!"#{code}"!, chapters].flatten.join(DELIMITER)
55
+ end
56
+ end
34
57
  end
35
58
  end
36
- next if target.empty?
37
- chapters = extract_chapters(row)
38
- next if chapters.empty?
39
- target.each do |code|
40
- @rows << [%Q!"#{code}"!, chapters].flatten.join(DELIMITER)
41
- end
42
- end
59
+ end.map(&:join)
43
60
  unless @rows.empty? #header
44
61
  @rows.unshift([
45
62
  "EAN",
data/lib/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Patinfo2csv
2
- VERSION = "1.0.4"
2
+ VERSION = "1.0.5"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: patinfo2csv
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.4
4
+ version: 1.0.5
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-03-23 00:00:00.000000000 Z
12
+ date: 2012-03-26 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rdoc
16
- requirement: &7785560 !ruby/object:Gem::Requirement
16
+ requirement: &21274100 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ~>
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '3.10'
22
22
  type: :development
23
23
  prerelease: false
24
- version_requirements: *7785560
24
+ version_requirements: *21274100
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: hoe
27
- requirement: &7785140 !ruby/object:Gem::Requirement
27
+ requirement: &21273660 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ~>
@@ -32,7 +32,7 @@ dependencies:
32
32
  version: '2.13'
33
33
  type: :development
34
34
  prerelease: false
35
- version_requirements: *7785140
35
+ version_requirements: *21273660
36
36
  description: ! 'Patinfo2csv extracts and converts patinfo.yaml to patinfo.csv;
37
37
 
38
38
  only 4 chapters(de, text) in patinfo.yaml are extracted.