patinfo2csv 1.0.4 → 1.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +4 -0
- data/lib/patinfo2csv.rb +64 -63
- data/lib/patinfo2csv/cli.rb +10 -21
- data/lib/patinfo2csv/converter.rb +35 -18
- data/lib/version.rb +1 -1
- metadata +6 -6
    
        data/History.txt
    CHANGED
    
    
    
        data/lib/patinfo2csv.rb
    CHANGED
    
    | @@ -3,13 +3,14 @@ | |
| 3 3 |  | 
| 4 4 | 
             
            require 'version'
         | 
| 5 5 | 
             
            require 'patinfo2csv/cli'
         | 
| 6 | 
            +
            require 'patinfo2csv/loader'
         | 
| 6 7 | 
             
            require 'patinfo2csv/converter'
         | 
| 7 8 |  | 
| 8 9 | 
             
            module Patinfo2csv
         | 
| 9 | 
            -
              #  | 
| 10 | 
            +
              # NOTE
         | 
| 10 11 | 
             
              # escaped backslash and utf-8 code map
         | 
| 11 12 | 
             
              # for patinfo.yaml(ASCII)
         | 
| 12 | 
            -
               | 
| 13 | 
            +
              ESCAPED_CHAR_CODE_MAP = {
         | 
| 13 14 | 
             
                /\\x24/      => "$",
         | 
| 14 15 | 
             
                /\\x25/      => "%",
         | 
| 15 16 | 
             
                /\\x26/      => "&",
         | 
| @@ -26,67 +27,67 @@ module Patinfo2csv | |
| 26 27 | 
             
                /\\xC2\\xBE/ => "¾",
         | 
| 27 28 | 
             
                /\\xC2\\x[A-z0-9]{2}/ => "",
         | 
| 28 29 | 
             
                /\\xC3\\x82/ => "Â",
         | 
| 29 | 
            -
             | 
| 30 | 
            -
                /\\xC3\\x84/ => "Ä", | 
| 31 | 
            -
             | 
| 32 | 
            -
             | 
| 33 | 
            -
                /\\xC3\\x87/ => "Ç", | 
| 34 | 
            -
                /\\xC3\\x88/ => "È", | 
| 35 | 
            -
                /\\xC3\\x89/ => "É", | 
| 36 | 
            -
                /\\xC3\\x8A/ => "Ê", | 
| 37 | 
            -
                /\\xC3\\x8B/ => "Ë", | 
| 38 | 
            -
                /\\xC3\\x8C/ => "Ì", | 
| 39 | 
            -
                /\\xC3\\x8D/ => "Í", | 
| 40 | 
            -
                /\\xC3\\x8E/ => "Î", | 
| 41 | 
            -
                /\\xC3\\x8F/ => "Ï", | 
| 30 | 
            +
            #     /\\xC3\\x83/ => "Ã",
         | 
| 31 | 
            +
                /\\xC3\\x84/ => "Ä",
         | 
| 32 | 
            +
            #     /\\xC3\\x85/ => "Å",
         | 
| 33 | 
            +
            #     /\\xC3\\x86/ => "Æ",
         | 
| 34 | 
            +
                /\\xC3\\x87/ => "Ç",
         | 
| 35 | 
            +
                /\\xC3\\x88/ => "È",
         | 
| 36 | 
            +
                /\\xC3\\x89/ => "É",
         | 
| 37 | 
            +
                /\\xC3\\x8A/ => "Ê",
         | 
| 38 | 
            +
                /\\xC3\\x8B/ => "Ë",
         | 
| 39 | 
            +
                /\\xC3\\x8C/ => "Ì",
         | 
| 40 | 
            +
                /\\xC3\\x8D/ => "Í",
         | 
| 41 | 
            +
                /\\xC3\\x8E/ => "Î",
         | 
| 42 | 
            +
                /\\xC3\\x8F/ => "Ï",
         | 
| 42 43 | 
             
                /\\xC3\\x90/ => "Ð",
         | 
| 43 | 
            -
                /\\xC3\\x91/ => "Ñ", | 
| 44 | 
            -
                /\\xC3\\x92/ => "Ò", | 
| 45 | 
            -
                /\\xC3\\x93/ => "Ó", | 
| 46 | 
            -
                /\\xC3\\x94/ => "Ô", | 
| 47 | 
            -
                /\\xC3\\x95/ => "Õ", | 
| 48 | 
            -
                /\\xC3\\x96/ => "Ö", | 
| 49 | 
            -
                /\\xC3\\x97/ => "×", | 
| 50 | 
            -
                /\\xC3\\x98/ => "Ø", | 
| 51 | 
            -
                /\\xC3\\x99/ => "Ù", | 
| 52 | 
            -
                /\\xC3\\x9A/ => "Ú", | 
| 53 | 
            -
                /\\xC3\\x9B/ => "Û", | 
| 54 | 
            -
                /\\xC3\\x9C/ => "Ü", | 
| 55 | 
            -
                /\\xC3\\x9D/ => "Ý", | 
| 56 | 
            -
                /\\xC3\\x9E/ => "Þ", | 
| 57 | 
            -
                /\\xC3\\x9F/ => "ß", | 
| 58 | 
            -
                /\\xC3\\xA0/ => "à", | 
| 59 | 
            -
                /\\xC3\\xA1/ => "á", | 
| 60 | 
            -
                /\\xC3\\xA2/ => "â", | 
| 61 | 
            -
                /\\xC3\\xA3/ => "ã", | 
| 62 | 
            -
                /\\xC3\\xA4/ => "ä", | 
| 63 | 
            -
                /\\xC3\\xA5/ => "å", | 
| 64 | 
            -
                /\\xC3\\xA6/ => "æ", | 
| 65 | 
            -
                /\\xC3\\xA7/ => "ç", | 
| 66 | 
            -
                /\\xC3\\xA8/ => "è", | 
| 67 | 
            -
                /\\xC3\\xA9/ => "é", | 
| 68 | 
            -
                /\\xC3\\xAA/ => "ê", | 
| 69 | 
            -
                /\\xC3\\xAB/ => "ë", | 
| 70 | 
            -
                /\\xC3\\xAC/ => "ì", | 
| 71 | 
            -
                /\\xC3\\xAD/ => "í", | 
| 72 | 
            -
                /\\xC3\\xAE/ => "î", | 
| 73 | 
            -
                /\\xC3\\xAF/ => "ï", | 
| 74 | 
            -
                /\\xC3\\xB0/ => "ð", | 
| 75 | 
            -
                /\\xC3\\xB1/ => "ñ", 
         | 
| 76 | 
            -
                /\\xC3\\xB2/ => "ò", | 
| 77 | 
            -
                /\\xC3\\xB3/ => "ó", | 
| 78 | 
            -
                /\\xC3\\xB4/ => "ô", | 
| 79 | 
            -
                /\\xC3\\xB5/ => "õ", | 
| 80 | 
            -
                /\\xC3\\xB6/ => "ö", | 
| 81 | 
            -
                /\\xC3\\xB7/ => "÷", | 
| 82 | 
            -
                /\\xC3\\xB8/ => "ø", | 
| 83 | 
            -
                /\\xC3\\xB9/ => "ù", | 
| 84 | 
            -
                /\\xC3\\xBA/ => "ú", | 
| 85 | 
            -
                /\\xC3\\xBB/ => "û", | 
| 86 | 
            -
                /\\xC3\\xBC/ => "ü", | 
| 87 | 
            -
                /\\xC3\\xBD/ => "ý", | 
| 88 | 
            -
                /\\xC3\\xBE/ => "þ", | 
| 89 | 
            -
                /\\xC3\\xBF/ => "ÿ", | 
| 44 | 
            +
            #    /\\xC3\\x91/ => "Ñ",
         | 
| 45 | 
            +
                /\\xC3\\x92/ => "Ò",
         | 
| 46 | 
            +
                /\\xC3\\x93/ => "Ó",
         | 
| 47 | 
            +
                /\\xC3\\x94/ => "Ô",
         | 
| 48 | 
            +
            #    /\\xC3\\x95/ => "Õ",
         | 
| 49 | 
            +
                /\\xC3\\x96/ => "Ö",
         | 
| 50 | 
            +
                /\\xC3\\x97/ => "×",
         | 
| 51 | 
            +
            #    /\\xC3\\x98/ => "Ø",
         | 
| 52 | 
            +
                /\\xC3\\x99/ => "Ù",
         | 
| 53 | 
            +
                /\\xC3\\x9A/ => "Ú",
         | 
| 54 | 
            +
                /\\xC3\\x9B/ => "Û",
         | 
| 55 | 
            +
                /\\xC3\\x9C/ => "Ü",
         | 
| 56 | 
            +
                /\\xC3\\x9D/ => "Ý",
         | 
| 57 | 
            +
            #    /\\xC3\\x9E/ => "Þ",
         | 
| 58 | 
            +
                /\\xC3\\x9F/ => "ß",
         | 
| 59 | 
            +
                /\\xC3\\xA0/ => "à",
         | 
| 60 | 
            +
                /\\xC3\\xA1/ => "á",
         | 
| 61 | 
            +
                /\\xC3\\xA2/ => "â",
         | 
| 62 | 
            +
            #    /\\xC3\\xA3/ => "ã",
         | 
| 63 | 
            +
                /\\xC3\\xA4/ => "ä",
         | 
| 64 | 
            +
            #    /\\xC3\\xA5/ => "å",
         | 
| 65 | 
            +
            #    /\\xC3\\xA6/ => "æ",
         | 
| 66 | 
            +
                /\\xC3\\xA7/ => "ç",
         | 
| 67 | 
            +
                /\\xC3\\xA8/ => "è",
         | 
| 68 | 
            +
                /\\xC3\\xA9/ => "é",
         | 
| 69 | 
            +
                /\\xC3\\xAA/ => "ê",
         | 
| 70 | 
            +
                /\\xC3\\xAB/ => "ë",
         | 
| 71 | 
            +
                /\\xC3\\xAC/ => "ì",
         | 
| 72 | 
            +
                /\\xC3\\xAD/ => "í",
         | 
| 73 | 
            +
                /\\xC3\\xAE/ => "î",
         | 
| 74 | 
            +
                /\\xC3\\xAF/ => "ï",
         | 
| 75 | 
            +
            #    /\\xC3\\xB0/ => "ð",
         | 
| 76 | 
            +
            #    /\\xC3\\xB1/ => "ñ", 
         | 
| 77 | 
            +
                /\\xC3\\xB2/ => "ò",
         | 
| 78 | 
            +
                /\\xC3\\xB3/ => "ó",
         | 
| 79 | 
            +
                /\\xC3\\xB4/ => "ô",
         | 
| 80 | 
            +
            #    /\\xC3\\xB5/ => "õ",
         | 
| 81 | 
            +
                /\\xC3\\xB6/ => "ö",
         | 
| 82 | 
            +
                /\\xC3\\xB7/ => "÷",
         | 
| 83 | 
            +
            #    /\\xC3\\xB8/ => "ø",
         | 
| 84 | 
            +
                /\\xC3\\xB9/ => "ù",
         | 
| 85 | 
            +
                /\\xC3\\xBA/ => "ú",
         | 
| 86 | 
            +
                /\\xC3\\xBB/ => "û",
         | 
| 87 | 
            +
                /\\xC3\\xBC/ => "ü",
         | 
| 88 | 
            +
                /\\xC3\\xBD/ => "ý",
         | 
| 89 | 
            +
            #    /\\xC3\\xBE/ => "þ",
         | 
| 90 | 
            +
                /\\xC3\\xBF/ => "ÿ",
         | 
| 90 91 | 
             
                /\\xC3\\x[A-z0-9]{2}/ => "",
         | 
| 91 92 | 
             
                /\\xE2\\x80\\x90/ => "‐",
         | 
| 92 93 | 
             
                /\\xE2\\x80\\x91/ => "‑",
         | 
| @@ -103,7 +104,7 @@ module Patinfo2csv | |
| 103 104 | 
             
                /\\xE2\\x80\\x9E/ => "„",
         | 
| 104 105 | 
             
                /\\xE2\\x80\\x9F/ => "‟",
         | 
| 105 106 | 
             
                /\\xE2\\x80\\xA6/ => "…",
         | 
| 106 | 
            -
                /\\xE2\\x80\\xB0/ => "‰", | 
| 107 | 
            +
                /\\xE2\\x80\\xB0/ => "‰",
         | 
| 107 108 | 
             
                /\\xE2\\x80\\xB1/ => "‱",
         | 
| 108 109 | 
             
                /\\xE2\\x80\\xB2/ => "′",
         | 
| 109 110 | 
             
                /\\xE2\\x80\\xB3/ => "″",
         | 
    
        data/lib/patinfo2csv/cli.rb
    CHANGED
    
    | @@ -12,32 +12,21 @@ module Patinfo2csv | |
| 12 12 | 
             
                  # Param:: output_file  patinfo.csv
         | 
| 13 13 | 
             
                  # Param:: lang         (de|fr)
         | 
| 14 14 | 
             
                  def run(patinfo_yaml, code_txt, output_file, lang)
         | 
| 15 | 
            -
                     | 
| 16 | 
            -
                     | 
| 17 | 
            -
                     | 
| 18 | 
            -
                     | 
| 19 | 
            -
                     | 
| 15 | 
            +
                    start = Time.now
         | 
| 16 | 
            +
                    loader    = Patinfo2csv::Loader.new()
         | 
| 17 | 
            +
                    converter = Patinfo2csv::Converter.new()
         | 
| 18 | 
            +
                    converter.patinfos = loader.load_yaml(patinfo_yaml)
         | 
| 19 | 
            +
                    converter.lang     = lang
         | 
| 20 | 
            +
                    converter.codes    = self.parse_txt(code_txt)
         | 
| 21 | 
            +
                    rows = converter.to_csv
         | 
| 20 22 | 
             
                    self.output_rows(rows, output_file)
         | 
| 21 | 
            -
                    report(rows,  | 
| 22 | 
            -
             | 
| 23 | 
            -
                  def load_yaml(patinfo_yaml)
         | 
| 24 | 
            -
                    # before yaml loading, replace escaped chars
         | 
| 25 | 
            -
                    file = ''
         | 
| 26 | 
            -
                    File.open(patinfo_yaml, "r:ascii:utf-8") { |f|
         | 
| 27 | 
            -
                      while line = f.gets
         | 
| 28 | 
            -
                        ESCAPED_STR_CODE_MAP.each do |code, char|
         | 
| 29 | 
            -
                          line.gsub!(code, char)
         | 
| 30 | 
            -
                        end
         | 
| 31 | 
            -
                        file << line
         | 
| 32 | 
            -
                      end
         | 
| 33 | 
            -
                    }
         | 
| 34 | 
            -
                    fh = StringIO.new file
         | 
| 35 | 
            -
                    YAML.load_documents(fh) # utf-8
         | 
| 23 | 
            +
                    report(rows, converter.codes, lang)
         | 
| 24 | 
            +
                    puts "#{Time.now - start} sec."
         | 
| 36 25 | 
             
                  end
         | 
| 37 26 | 
             
                  def parse_txt(code_txt)
         | 
| 38 27 | 
             
                    codes = []
         | 
| 39 28 | 
             
                    File.open(code_txt, "r:utf-8") do |input|
         | 
| 40 | 
            -
                      while line=input.gets
         | 
| 29 | 
            +
                      while line = input.gets
         | 
| 41 30 | 
             
                        codes << line.gsub(/[^\d]/, '').chomp
         | 
| 42 31 | 
             
                      end
         | 
| 43 32 | 
             
                    end
         | 
| @@ -1,6 +1,8 @@ | |
| 1 1 | 
             
            #!/usr/bin/env ruby
         | 
| 2 2 | 
             
            # encoding: utf-8
         | 
| 3 3 |  | 
| 4 | 
            +
            require 'thread'
         | 
| 5 | 
            +
             | 
| 4 6 | 
             
            module Patinfo2csv
         | 
| 5 7 | 
             
              class Converter
         | 
| 6 8 | 
             
                attr_accessor :patinfos, :codes, :lang
         | 
| @@ -13,33 +15,48 @@ module Patinfo2csv | |
| 13 15 | 
             
                  :composition
         | 
| 14 16 | 
             
                ]
         | 
| 15 17 | 
             
                DELIMITER = ';'
         | 
| 18 | 
            +
                WORKERS = 2
         | 
| 16 19 | 
             
                def initialize
         | 
| 20 | 
            +
                  @mutex = Mutex.new
         | 
| 21 | 
            +
                  @q     = Queue.new
         | 
| 17 22 | 
             
                  @lang     = "de"
         | 
| 18 23 | 
             
                  @patinfos = []
         | 
| 19 24 | 
             
                  @codes    = []
         | 
| 20 25 | 
             
                  @rows     = []
         | 
| 21 26 | 
             
                end
         | 
| 22 27 | 
             
                def to_csv
         | 
| 23 | 
            -
                   | 
| 24 | 
            -
             | 
| 25 | 
            -
             | 
| 26 | 
            -
             | 
| 27 | 
            -
                     | 
| 28 | 
            -
             | 
| 29 | 
            -
             | 
| 30 | 
            -
                         | 
| 31 | 
            -
             | 
| 32 | 
            -
             | 
| 33 | 
            -
             | 
| 28 | 
            +
                  @patinfos.each do |row|
         | 
| 29 | 
            +
                    @q << row
         | 
| 30 | 
            +
                  end
         | 
| 31 | 
            +
                  Array.new(WORKERS) do
         | 
| 32 | 
            +
                    @q << nil
         | 
| 33 | 
            +
                    Thread.new do
         | 
| 34 | 
            +
                      while row = @q.pop
         | 
| 35 | 
            +
                        target = []
         | 
| 36 | 
            +
                        next if row['article_codes'].nil?
         | 
| 37 | 
            +
                        row['article_codes'].each do |article|
         | 
| 38 | 
            +
                          @mutex.synchronize do
         | 
| 39 | 
            +
                            if @codes.include?(article[:article_ean13]) # EAN
         | 
| 40 | 
            +
                              target << article[:article_ean13]
         | 
| 41 | 
            +
                              @codes.delete(article[:article_ean13])
         | 
| 42 | 
            +
                            end
         | 
| 43 | 
            +
                            #elsif @code.include?(code[:article_pcode]) # Pharmacode
         | 
| 44 | 
            +
                            #  target << code[:article_pcode]
         | 
| 45 | 
            +
                            #  @code.delete(code[:article_pcode])
         | 
| 46 | 
            +
                            #end
         | 
| 47 | 
            +
                          end
         | 
| 48 | 
            +
                        end
         | 
| 49 | 
            +
                        next if target.empty?
         | 
| 50 | 
            +
                        chapters = extract_chapters(row)
         | 
| 51 | 
            +
                        next if chapters.empty?
         | 
| 52 | 
            +
                        target.each do |code|
         | 
| 53 | 
            +
                          @mutex.synchronize do
         | 
| 54 | 
            +
                            @rows << [%Q!"#{code}"!, chapters].flatten.join(DELIMITER)
         | 
| 55 | 
            +
                          end
         | 
| 56 | 
            +
                        end
         | 
| 34 57 | 
             
                      end
         | 
| 35 58 | 
             
                    end
         | 
| 36 | 
            -
             | 
| 37 | 
            -
                    chapters = extract_chapters(row)
         | 
| 38 | 
            -
                    next if chapters.empty?
         | 
| 39 | 
            -
                    target.each do |code|
         | 
| 40 | 
            -
                      @rows << [%Q!"#{code}"!, chapters].flatten.join(DELIMITER)
         | 
| 41 | 
            -
                    end
         | 
| 42 | 
            -
                  end
         | 
| 59 | 
            +
                  end.map(&:join)
         | 
| 43 60 | 
             
                  unless @rows.empty? #header
         | 
| 44 61 | 
             
                    @rows.unshift([
         | 
| 45 62 | 
             
                      "EAN",
         | 
    
        data/lib/version.rb
    CHANGED
    
    
    
        metadata
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            --- !ruby/object:Gem::Specification
         | 
| 2 2 | 
             
            name: patinfo2csv
         | 
| 3 3 | 
             
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            -
              version: 1.0. | 
| 4 | 
            +
              version: 1.0.5
         | 
| 5 5 | 
             
              prerelease: 
         | 
| 6 6 | 
             
            platform: ruby
         | 
| 7 7 | 
             
            authors:
         | 
| @@ -9,11 +9,11 @@ authors: | |
| 9 9 | 
             
            autorequire: 
         | 
| 10 10 | 
             
            bindir: bin
         | 
| 11 11 | 
             
            cert_chain: []
         | 
| 12 | 
            -
            date: 2012-03- | 
| 12 | 
            +
            date: 2012-03-26 00:00:00.000000000 Z
         | 
| 13 13 | 
             
            dependencies:
         | 
| 14 14 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 15 15 | 
             
              name: rdoc
         | 
| 16 | 
            -
              requirement: & | 
| 16 | 
            +
              requirement: &21274100 !ruby/object:Gem::Requirement
         | 
| 17 17 | 
             
                none: false
         | 
| 18 18 | 
             
                requirements:
         | 
| 19 19 | 
             
                - - ~>
         | 
| @@ -21,10 +21,10 @@ dependencies: | |
| 21 21 | 
             
                    version: '3.10'
         | 
| 22 22 | 
             
              type: :development
         | 
| 23 23 | 
             
              prerelease: false
         | 
| 24 | 
            -
              version_requirements: * | 
| 24 | 
            +
              version_requirements: *21274100
         | 
| 25 25 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 26 26 | 
             
              name: hoe
         | 
| 27 | 
            -
              requirement: & | 
| 27 | 
            +
              requirement: &21273660 !ruby/object:Gem::Requirement
         | 
| 28 28 | 
             
                none: false
         | 
| 29 29 | 
             
                requirements:
         | 
| 30 30 | 
             
                - - ~>
         | 
| @@ -32,7 +32,7 @@ dependencies: | |
| 32 32 | 
             
                    version: '2.13'
         | 
| 33 33 | 
             
              type: :development
         | 
| 34 34 | 
             
              prerelease: false
         | 
| 35 | 
            -
              version_requirements: * | 
| 35 | 
            +
              version_requirements: *21273660
         | 
| 36 36 | 
             
            description: ! 'Patinfo2csv extracts and converts patinfo.yaml to patinfo.csv;
         | 
| 37 37 |  | 
| 38 38 | 
             
              only 4 chapters(de, text) in patinfo.yaml are extracted.
         |