patinfo2csv 1.0.4 → 1.0.5

Sign up to get free protection for your applications and to get access to all the features.
data/History.txt CHANGED
@@ -1,3 +1,7 @@
1
+ === 1.0.5 / 26.03.2012
2
+
3
+ * Refactor character replacing and chapter choosing
4
+
1
5
  === 1.0.4 / 23.03.2012
2
6
 
3
7
  * Deleted last <br /> tag in chapter
data/lib/patinfo2csv.rb CHANGED
@@ -3,13 +3,14 @@
3
3
 
4
4
  require 'version'
5
5
  require 'patinfo2csv/cli'
6
+ require 'patinfo2csv/loader'
6
7
  require 'patinfo2csv/converter'
7
8
 
8
9
  module Patinfo2csv
9
- # FIXME
10
+ # NOTE
10
11
  # escaped backslash and utf-8 code map
11
12
  # for patinfo.yaml(ASCII)
12
- ESCAPED_STR_CODE_MAP = {
13
+ ESCAPED_CHAR_CODE_MAP = {
13
14
  /\\x24/ => "$",
14
15
  /\\x25/ => "%",
15
16
  /\\x26/ => "&",
@@ -26,67 +27,67 @@ module Patinfo2csv
26
27
  /\\xC2\\xBE/ => "¾",
27
28
  /\\xC2\\x[A-z0-9]{2}/ => "",
28
29
  /\\xC3\\x82/ => "Â",
29
- /\\xC3\\x83/ => "Ã",
30
- /\\xC3\\x84/ => "Ä",
31
- /\\xC3\\x85/ => "Å",
32
- /\\xC3\\x86/ => "Æ",
33
- /\\xC3\\x87/ => "Ç",
34
- /\\xC3\\x88/ => "È",
35
- /\\xC3\\x89/ => "É",
36
- /\\xC3\\x8A/ => "Ê",
37
- /\\xC3\\x8B/ => "Ë",
38
- /\\xC3\\x8C/ => "Ì",
39
- /\\xC3\\x8D/ => "Í",
40
- /\\xC3\\x8E/ => "Î",
41
- /\\xC3\\x8F/ => "Ï",
30
+ # /\\xC3\\x83/ => "Ã",
31
+ /\\xC3\\x84/ => "Ä",
32
+ # /\\xC3\\x85/ => "Å",
33
+ # /\\xC3\\x86/ => "Æ",
34
+ /\\xC3\\x87/ => "Ç",
35
+ /\\xC3\\x88/ => "È",
36
+ /\\xC3\\x89/ => "É",
37
+ /\\xC3\\x8A/ => "Ê",
38
+ /\\xC3\\x8B/ => "Ë",
39
+ /\\xC3\\x8C/ => "Ì",
40
+ /\\xC3\\x8D/ => "Í",
41
+ /\\xC3\\x8E/ => "Î",
42
+ /\\xC3\\x8F/ => "Ï",
42
43
  /\\xC3\\x90/ => "Ð",
43
- /\\xC3\\x91/ => "Ñ",
44
- /\\xC3\\x92/ => "Ò",
45
- /\\xC3\\x93/ => "Ó",
46
- /\\xC3\\x94/ => "Ô",
47
- /\\xC3\\x95/ => "Õ",
48
- /\\xC3\\x96/ => "Ö",
49
- /\\xC3\\x97/ => "×",
50
- /\\xC3\\x98/ => "Ø",
51
- /\\xC3\\x99/ => "Ù",
52
- /\\xC3\\x9A/ => "Ú",
53
- /\\xC3\\x9B/ => "Û",
54
- /\\xC3\\x9C/ => "Ü",
55
- /\\xC3\\x9D/ => "Ý",
56
- /\\xC3\\x9E/ => "Þ",
57
- /\\xC3\\x9F/ => "ß",
58
- /\\xC3\\xA0/ => "à",
59
- /\\xC3\\xA1/ => "á",
60
- /\\xC3\\xA2/ => "â",
61
- /\\xC3\\xA3/ => "ã",
62
- /\\xC3\\xA4/ => "ä",
63
- /\\xC3\\xA5/ => "å",
64
- /\\xC3\\xA6/ => "æ",
65
- /\\xC3\\xA7/ => "ç",
66
- /\\xC3\\xA8/ => "è",
67
- /\\xC3\\xA9/ => "é",
68
- /\\xC3\\xAA/ => "ê",
69
- /\\xC3\\xAB/ => "ë",
70
- /\\xC3\\xAC/ => "ì",
71
- /\\xC3\\xAD/ => "í",
72
- /\\xC3\\xAE/ => "î",
73
- /\\xC3\\xAF/ => "ï",
74
- /\\xC3\\xB0/ => "ð",
75
- /\\xC3\\xB1/ => "ñ",
76
- /\\xC3\\xB2/ => "ò",
77
- /\\xC3\\xB3/ => "ó",
78
- /\\xC3\\xB4/ => "ô",
79
- /\\xC3\\xB5/ => "õ",
80
- /\\xC3\\xB6/ => "ö",
81
- /\\xC3\\xB7/ => "÷",
82
- /\\xC3\\xB8/ => "ø",
83
- /\\xC3\\xB9/ => "ù",
84
- /\\xC3\\xBA/ => "ú",
85
- /\\xC3\\xBB/ => "û",
86
- /\\xC3\\xBC/ => "ü",
87
- /\\xC3\\xBD/ => "ý",
88
- /\\xC3\\xBE/ => "þ",
89
- /\\xC3\\xBF/ => "ÿ",
44
+ # /\\xC3\\x91/ => "Ñ",
45
+ /\\xC3\\x92/ => "Ò",
46
+ /\\xC3\\x93/ => "Ó",
47
+ /\\xC3\\x94/ => "Ô",
48
+ # /\\xC3\\x95/ => "Õ",
49
+ /\\xC3\\x96/ => "Ö",
50
+ /\\xC3\\x97/ => "×",
51
+ # /\\xC3\\x98/ => "Ø",
52
+ /\\xC3\\x99/ => "Ù",
53
+ /\\xC3\\x9A/ => "Ú",
54
+ /\\xC3\\x9B/ => "Û",
55
+ /\\xC3\\x9C/ => "Ü",
56
+ /\\xC3\\x9D/ => "Ý",
57
+ # /\\xC3\\x9E/ => "Þ",
58
+ /\\xC3\\x9F/ => "ß",
59
+ /\\xC3\\xA0/ => "à",
60
+ /\\xC3\\xA1/ => "á",
61
+ /\\xC3\\xA2/ => "â",
62
+ # /\\xC3\\xA3/ => "ã",
63
+ /\\xC3\\xA4/ => "ä",
64
+ # /\\xC3\\xA5/ => "å",
65
+ # /\\xC3\\xA6/ => "æ",
66
+ /\\xC3\\xA7/ => "ç",
67
+ /\\xC3\\xA8/ => "è",
68
+ /\\xC3\\xA9/ => "é",
69
+ /\\xC3\\xAA/ => "ê",
70
+ /\\xC3\\xAB/ => "ë",
71
+ /\\xC3\\xAC/ => "ì",
72
+ /\\xC3\\xAD/ => "í",
73
+ /\\xC3\\xAE/ => "î",
74
+ /\\xC3\\xAF/ => "ï",
75
+ # /\\xC3\\xB0/ => "ð",
76
+ # /\\xC3\\xB1/ => "ñ",
77
+ /\\xC3\\xB2/ => "ò",
78
+ /\\xC3\\xB3/ => "ó",
79
+ /\\xC3\\xB4/ => "ô",
80
+ # /\\xC3\\xB5/ => "õ",
81
+ /\\xC3\\xB6/ => "ö",
82
+ /\\xC3\\xB7/ => "÷",
83
+ # /\\xC3\\xB8/ => "ø",
84
+ /\\xC3\\xB9/ => "ù",
85
+ /\\xC3\\xBA/ => "ú",
86
+ /\\xC3\\xBB/ => "û",
87
+ /\\xC3\\xBC/ => "ü",
88
+ /\\xC3\\xBD/ => "ý",
89
+ # /\\xC3\\xBE/ => "þ",
90
+ /\\xC3\\xBF/ => "ÿ",
90
91
  /\\xC3\\x[A-z0-9]{2}/ => "",
91
92
  /\\xE2\\x80\\x90/ => "‐",
92
93
  /\\xE2\\x80\\x91/ => "‑",
@@ -103,7 +104,7 @@ module Patinfo2csv
103
104
  /\\xE2\\x80\\x9E/ => "„",
104
105
  /\\xE2\\x80\\x9F/ => "‟",
105
106
  /\\xE2\\x80\\xA6/ => "…",
106
- /\\xE2\\x80\\xB0/ => "‰",
107
+ /\\xE2\\x80\\xB0/ => "‰",
107
108
  /\\xE2\\x80\\xB1/ => "‱",
108
109
  /\\xE2\\x80\\xB2/ => "′",
109
110
  /\\xE2\\x80\\xB3/ => "″",
@@ -12,32 +12,21 @@ module Patinfo2csv
12
12
  # Param:: output_file patinfo.csv
13
13
  # Param:: lang (de|fr)
14
14
  def run(patinfo_yaml, code_txt, output_file, lang)
15
- cv = Patinfo2csv::Converter.new()
16
- cv.lang = lang
17
- cv.patinfos = self.load_yaml(patinfo_yaml)
18
- cv.codes = self.parse_txt(code_txt)
19
- rows = cv.to_csv
15
+ start = Time.now
16
+ loader = Patinfo2csv::Loader.new()
17
+ converter = Patinfo2csv::Converter.new()
18
+ converter.patinfos = loader.load_yaml(patinfo_yaml)
19
+ converter.lang = lang
20
+ converter.codes = self.parse_txt(code_txt)
21
+ rows = converter.to_csv
20
22
  self.output_rows(rows, output_file)
21
- report(rows, cv.codes, lang)
22
- end
23
- def load_yaml(patinfo_yaml)
24
- # before yaml loading, replace escaped chars
25
- file = ''
26
- File.open(patinfo_yaml, "r:ascii:utf-8") { |f|
27
- while line = f.gets
28
- ESCAPED_STR_CODE_MAP.each do |code, char|
29
- line.gsub!(code, char)
30
- end
31
- file << line
32
- end
33
- }
34
- fh = StringIO.new file
35
- YAML.load_documents(fh) # utf-8
23
+ report(rows, converter.codes, lang)
24
+ puts "#{Time.now - start} sec."
36
25
  end
37
26
  def parse_txt(code_txt)
38
27
  codes = []
39
28
  File.open(code_txt, "r:utf-8") do |input|
40
- while line=input.gets
29
+ while line = input.gets
41
30
  codes << line.gsub(/[^\d]/, '').chomp
42
31
  end
43
32
  end
@@ -1,6 +1,8 @@
1
1
  #!/usr/bin/env ruby
2
2
  # encoding: utf-8
3
3
 
4
+ require 'thread'
5
+
4
6
  module Patinfo2csv
5
7
  class Converter
6
8
  attr_accessor :patinfos, :codes, :lang
@@ -13,33 +15,48 @@ module Patinfo2csv
13
15
  :composition
14
16
  ]
15
17
  DELIMITER = ';'
18
+ WORKERS = 2
16
19
  def initialize
20
+ @mutex = Mutex.new
21
+ @q = Queue.new
17
22
  @lang = "de"
18
23
  @patinfos = []
19
24
  @codes = []
20
25
  @rows = []
21
26
  end
22
27
  def to_csv
23
- data = []
24
- @patinfos.each_with_index do |row, index|
25
- target = []
26
- next if row['article_codes'].nil?
27
- row['article_codes'].each do |article|
28
- if @codes.include?(article[:article_ean13]) # EAN
29
- target << article[:article_ean13]
30
- @codes.delete(article[:article_ean13])
31
- #elsif @code.include?(code[:article_pcode]) # Pharmacode
32
- # target << code[:article_pcode]
33
- # @code.delete(code[:article_pcode])
28
+ @patinfos.each do |row|
29
+ @q << row
30
+ end
31
+ Array.new(WORKERS) do
32
+ @q << nil
33
+ Thread.new do
34
+ while row = @q.pop
35
+ target = []
36
+ next if row['article_codes'].nil?
37
+ row['article_codes'].each do |article|
38
+ @mutex.synchronize do
39
+ if @codes.include?(article[:article_ean13]) # EAN
40
+ target << article[:article_ean13]
41
+ @codes.delete(article[:article_ean13])
42
+ end
43
+ #elsif @code.include?(code[:article_pcode]) # Pharmacode
44
+ # target << code[:article_pcode]
45
+ # @code.delete(code[:article_pcode])
46
+ #end
47
+ end
48
+ end
49
+ next if target.empty?
50
+ chapters = extract_chapters(row)
51
+ next if chapters.empty?
52
+ target.each do |code|
53
+ @mutex.synchronize do
54
+ @rows << [%Q!"#{code}"!, chapters].flatten.join(DELIMITER)
55
+ end
56
+ end
34
57
  end
35
58
  end
36
- next if target.empty?
37
- chapters = extract_chapters(row)
38
- next if chapters.empty?
39
- target.each do |code|
40
- @rows << [%Q!"#{code}"!, chapters].flatten.join(DELIMITER)
41
- end
42
- end
59
+ end.map(&:join)
43
60
  unless @rows.empty? #header
44
61
  @rows.unshift([
45
62
  "EAN",
data/lib/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Patinfo2csv
2
- VERSION = "1.0.4"
2
+ VERSION = "1.0.5"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: patinfo2csv
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.4
4
+ version: 1.0.5
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-03-23 00:00:00.000000000 Z
12
+ date: 2012-03-26 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rdoc
16
- requirement: &7785560 !ruby/object:Gem::Requirement
16
+ requirement: &21274100 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ~>
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '3.10'
22
22
  type: :development
23
23
  prerelease: false
24
- version_requirements: *7785560
24
+ version_requirements: *21274100
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: hoe
27
- requirement: &7785140 !ruby/object:Gem::Requirement
27
+ requirement: &21273660 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ~>
@@ -32,7 +32,7 @@ dependencies:
32
32
  version: '2.13'
33
33
  type: :development
34
34
  prerelease: false
35
- version_requirements: *7785140
35
+ version_requirements: *21273660
36
36
  description: ! 'Patinfo2csv extracts and converts patinfo.yaml to patinfo.csv;
37
37
 
38
38
  only 4 chapters(de, text) in patinfo.yaml are extracted.