athena 0.1.3 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/COPYING +68 -81
- data/README +13 -13
- data/Rakefile +7 -8
- data/bin/athena +13 -15
- data/lib/athena.rb +9 -9
- data/lib/athena/formats.rb +22 -16
- data/lib/athena/formats/dbm.rb +29 -21
- data/lib/athena/formats/ferret.rb +19 -22
- data/lib/athena/formats/lingo.rb +45 -39
- data/lib/athena/formats/sisis.rb +22 -25
- data/lib/athena/formats/xml.rb +55 -82
- data/lib/athena/parser.rb +29 -28
- data/lib/athena/record.rb +24 -25
- data/lib/athena/util.rb +13 -10
- data/lib/athena/version.rb +20 -44
- metadata +47 -24
    
        data/lib/athena/formats/dbm.rb
    CHANGED
    
    | @@ -3,64 +3,72 @@ | |
| 3 3 | 
             
            #                                                                             #
         | 
| 4 4 | 
             
            # A component of athena, the database file converter.                         #
         | 
| 5 5 | 
             
            #                                                                             #
         | 
| 6 | 
            -
            # Copyright (C) 2007- | 
| 6 | 
            +
            # Copyright (C) 2007-2011 University of Cologne,                              #
         | 
| 7 7 | 
             
            #                         Albertus-Magnus-Platz,                              #
         | 
| 8 | 
            -
            #                          | 
| 8 | 
            +
            #                         50923 Cologne, Germany                              #
         | 
| 9 9 | 
             
            #                                                                             #
         | 
| 10 10 | 
             
            # Authors:                                                                    #
         | 
| 11 11 | 
             
            #     Jens Wille <jens.wille@uni-koeln.de>                                    #
         | 
| 12 12 | 
             
            #                                                                             #
         | 
| 13 13 | 
             
            # athena is free software; you can redistribute it and/or modify it under the #
         | 
| 14 | 
            -
            # terms of the GNU General Public License as published by the Free | 
| 15 | 
            -
            # Foundation; either version 3 of the License, or (at your option) | 
| 16 | 
            -
            # version. | 
| 14 | 
            +
            # terms of the GNU Affero General Public License as published by the Free     #
         | 
| 15 | 
            +
            # Software Foundation; either version 3 of the License, or (at your option)   #
         | 
| 16 | 
            +
            # any later version.                                                          #
         | 
| 17 17 | 
             
            #                                                                             #
         | 
| 18 18 | 
             
            # athena is distributed in the hope that it will be useful, but WITHOUT ANY   #
         | 
| 19 19 | 
             
            # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS   #
         | 
| 20 | 
            -
            # FOR A PARTICULAR PURPOSE. See the GNU General Public License for | 
| 21 | 
            -
            # details. | 
| 20 | 
            +
            # FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for     #
         | 
| 21 | 
            +
            # more details.                                                               #
         | 
| 22 22 | 
             
            #                                                                             #
         | 
| 23 | 
            -
            # You should have received a copy of the GNU General Public License | 
| 24 | 
            -
            # with athena. If not, see <http://www.gnu.org/licenses/>. | 
| 23 | 
            +
            # You should have received a copy of the GNU Affero General Public License    #
         | 
| 24 | 
            +
            # along with athena. If not, see <http://www.gnu.org/licenses/>.              #
         | 
| 25 25 | 
             
            #                                                                             #
         | 
| 26 26 | 
             
            ###############################################################################
         | 
| 27 27 | 
             
            #++
         | 
| 28 28 |  | 
| 29 29 | 
             
            require 'iconv'
         | 
| 30 30 |  | 
| 31 | 
            -
            module Athena | 
| 31 | 
            +
            module Athena
         | 
| 32 | 
            +
              module Formats
         | 
| 32 33 |  | 
| 33 34 | 
             
              class DBM < Base
         | 
| 34 35 |  | 
| 35 | 
            -
                CRLF = "\015\012"
         | 
| 36 | 
            -
             | 
| 37 | 
            -
                ICONV_TO_LATIN1 = Iconv.new('latin1//TRANSLIT//IGNORE', 'utf-8')
         | 
| 38 | 
            -
             | 
| 39 36 | 
             
                VALUE_SEPARATOR  = '|'
         | 
| 40 37 | 
             
                RECORD_SEPARATOR = '&&&'
         | 
| 41 38 |  | 
| 39 | 
            +
                ICONV_TO_LATIN1 = Iconv.new('latin1//TRANSLIT//IGNORE', 'utf-8')
         | 
| 40 | 
            +
             | 
| 42 41 | 
             
                register_format :out, 'midos'
         | 
| 43 42 |  | 
| 44 43 | 
             
                def convert(record)
         | 
| 45 44 | 
             
                  dbm = ["ID:#{record.id}"]
         | 
| 46 45 |  | 
| 47 46 | 
             
                  record.struct.each { |field, struct|
         | 
| 48 | 
            -
                     | 
| 49 | 
            -
             | 
| 50 | 
            -
             | 
| 51 | 
            -
             | 
| 47 | 
            +
                    struct_values = struct[:values]
         | 
| 48 | 
            +
                    struct_values.default = []
         | 
| 49 | 
            +
             | 
| 50 | 
            +
                    strings = struct[:elements].map { |element|
         | 
| 51 | 
            +
                      values = []
         | 
| 52 52 |  | 
| 53 | 
            -
                       | 
| 53 | 
            +
                      struct_values[element].each { |value|
         | 
| 54 | 
            +
                        if value
         | 
| 55 | 
            +
                          value = value.strip.gsub(CRLF_RE, ' ')
         | 
| 56 | 
            +
                          values << value unless value.empty?
         | 
| 57 | 
            +
                        end
         | 
| 58 | 
            +
                      }
         | 
| 59 | 
            +
             | 
| 60 | 
            +
                      values.empty? ? struct[:empty] : values.join(VALUE_SEPARATOR)
         | 
| 54 61 | 
             
                    }
         | 
| 55 62 |  | 
| 56 63 | 
             
                    dbm << "#{field.to_s.upcase}:#{ICONV_TO_LATIN1.iconv(struct[:string] % strings)}"
         | 
| 57 64 | 
             
                  }
         | 
| 58 65 |  | 
| 59 | 
            -
                  dbm << RECORD_SEPARATOR
         | 
| 66 | 
            +
                  dbm << RECORD_SEPARATOR << CRLF
         | 
| 60 67 |  | 
| 61 | 
            -
                  dbm.join(CRLF) | 
| 68 | 
            +
                  dbm.join(CRLF)
         | 
| 62 69 | 
             
                end
         | 
| 63 70 |  | 
| 64 71 | 
             
              end
         | 
| 65 72 |  | 
| 73 | 
            +
              end
         | 
| 66 74 | 
             
            end
         | 
| @@ -3,38 +3,42 @@ | |
| 3 3 | 
             
            #                                                                             #
         | 
| 4 4 | 
             
            # A component of athena, the database file converter.                         #
         | 
| 5 5 | 
             
            #                                                                             #
         | 
| 6 | 
            -
            # Copyright (C) 2007- | 
| 6 | 
            +
            # Copyright (C) 2007-2011 University of Cologne,                              #
         | 
| 7 7 | 
             
            #                         Albertus-Magnus-Platz,                              #
         | 
| 8 | 
            -
            #                          | 
| 8 | 
            +
            #                         50923 Cologne, Germany                              #
         | 
| 9 9 | 
             
            #                                                                             #
         | 
| 10 10 | 
             
            # Authors:                                                                    #
         | 
| 11 11 | 
             
            #     Jens Wille <jens.wille@uni-koeln.de>                                    #
         | 
| 12 12 | 
             
            #                                                                             #
         | 
| 13 13 | 
             
            # athena is free software; you can redistribute it and/or modify it under the #
         | 
| 14 | 
            -
            # terms of the GNU General Public License as published by the Free | 
| 15 | 
            -
            # Foundation; either version 3 of the License, or (at your option) | 
| 16 | 
            -
            # version. | 
| 14 | 
            +
            # terms of the GNU Affero General Public License as published by the Free     #
         | 
| 15 | 
            +
            # Software Foundation; either version 3 of the License, or (at your option)   #
         | 
| 16 | 
            +
            # any later version.                                                          #
         | 
| 17 17 | 
             
            #                                                                             #
         | 
| 18 18 | 
             
            # athena is distributed in the hope that it will be useful, but WITHOUT ANY   #
         | 
| 19 19 | 
             
            # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS   #
         | 
| 20 | 
            -
            # FOR A PARTICULAR PURPOSE. See the GNU General Public License for | 
| 21 | 
            -
            # details. | 
| 20 | 
            +
            # FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for     #
         | 
| 21 | 
            +
            # more details.                                                               #
         | 
| 22 22 | 
             
            #                                                                             #
         | 
| 23 | 
            -
            # You should have received a copy of the GNU General Public License | 
| 24 | 
            -
            # with athena. If not, see <http://www.gnu.org/licenses/>. | 
| 23 | 
            +
            # You should have received a copy of the GNU Affero General Public License    #
         | 
| 24 | 
            +
            # along with athena. If not, see <http://www.gnu.org/licenses/>.              #
         | 
| 25 25 | 
             
            #                                                                             #
         | 
| 26 26 | 
             
            ###############################################################################
         | 
| 27 27 | 
             
            #++
         | 
| 28 28 |  | 
| 29 | 
            -
             | 
| 29 | 
            +
            if ferret_version = ENV['FERRET_VERSION']
         | 
| 30 | 
            +
              require 'rubygems'
         | 
| 31 | 
            +
              gem 'ferret', ferret_version
         | 
| 32 | 
            +
            end
         | 
| 30 33 |  | 
| 31 34 | 
             
            begin
         | 
| 32 | 
            -
              gem 'ferret', ENV['FERRET_VERSION'] if ENV['FERRET_VERSION']
         | 
| 33 35 | 
             
              require 'ferret'
         | 
| 34 | 
            -
            rescue LoadError
         | 
| 36 | 
            +
            rescue LoadError => err
         | 
| 37 | 
            +
              warn "ferret#{" #{ferret_version}" if ferret_version} not available (#{err})"
         | 
| 35 38 | 
             
            end
         | 
| 36 39 |  | 
| 37 | 
            -
            module Athena | 
| 40 | 
            +
            module Athena
         | 
| 41 | 
            +
              module Formats
         | 
| 38 42 |  | 
| 39 43 | 
             
              class Ferret < Base
         | 
| 40 44 |  | 
| @@ -87,7 +91,7 @@ module Athena::Formats | |
| 87 91 | 
             
                    unless index.deleted?(i)
         | 
| 88 92 | 
             
                      doc = index[i]
         | 
| 89 93 |  | 
| 90 | 
            -
                       | 
| 94 | 
            +
                      Record.new(doc[record_element], block) { |record|
         | 
| 91 95 | 
             
                        config.each { |element, field_config|
         | 
| 92 96 | 
             
                          record.update(element, doc[element], field_config)
         | 
| 93 97 | 
             
                        }
         | 
| @@ -98,14 +102,7 @@ module Athena::Formats | |
| 98 102 | 
             
                  index.num_docs
         | 
| 99 103 | 
             
                end
         | 
| 100 104 |  | 
| 101 | 
            -
                private
         | 
| 102 | 
            -
             | 
| 103 | 
            -
                class NoRecordElementError < StandardError
         | 
| 104 | 
            -
                end
         | 
| 105 | 
            -
             | 
| 106 | 
            -
                class IllegalRecordElementError < StandardError
         | 
| 107 | 
            -
                end
         | 
| 108 | 
            -
             | 
| 109 105 | 
             
              end
         | 
| 110 106 |  | 
| 107 | 
            +
              end
         | 
| 111 108 | 
             
            end
         | 
    
        data/lib/athena/formats/lingo.rb
    CHANGED
    
    | @@ -3,25 +3,25 @@ | |
| 3 3 | 
             
            #                                                                             #
         | 
| 4 4 | 
             
            # A component of athena, the database file converter.                         #
         | 
| 5 5 | 
             
            #                                                                             #
         | 
| 6 | 
            -
            # Copyright (C) 2007- | 
| 6 | 
            +
            # Copyright (C) 2007-2011 University of Cologne,                              #
         | 
| 7 7 | 
             
            #                         Albertus-Magnus-Platz,                              #
         | 
| 8 | 
            -
            #                          | 
| 8 | 
            +
            #                         50923 Cologne, Germany                              #
         | 
| 9 9 | 
             
            #                                                                             #
         | 
| 10 10 | 
             
            # Authors:                                                                    #
         | 
| 11 11 | 
             
            #     Jens Wille <jens.wille@uni-koeln.de>                                    #
         | 
| 12 12 | 
             
            #                                                                             #
         | 
| 13 13 | 
             
            # athena is free software; you can redistribute it and/or modify it under the #
         | 
| 14 | 
            -
            # terms of the GNU General Public License as published by the Free | 
| 15 | 
            -
            # Foundation; either version 3 of the License, or (at your option) | 
| 16 | 
            -
            # version. | 
| 14 | 
            +
            # terms of the GNU Affero General Public License as published by the Free     #
         | 
| 15 | 
            +
            # Software Foundation; either version 3 of the License, or (at your option)   #
         | 
| 16 | 
            +
            # any later version.                                                          #
         | 
| 17 17 | 
             
            #                                                                             #
         | 
| 18 18 | 
             
            # athena is distributed in the hope that it will be useful, but WITHOUT ANY   #
         | 
| 19 19 | 
             
            # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS   #
         | 
| 20 | 
            -
            # FOR A PARTICULAR PURPOSE. See the GNU General Public License for | 
| 21 | 
            -
            # details. | 
| 20 | 
            +
            # FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for     #
         | 
| 21 | 
            +
            # more details.                                                               #
         | 
| 22 22 | 
             
            #                                                                             #
         | 
| 23 | 
            -
            # You should have received a copy of the GNU General Public License | 
| 24 | 
            -
            # with athena. If not, see <http://www.gnu.org/licenses/>. | 
| 23 | 
            +
            # You should have received a copy of the GNU Affero General Public License    #
         | 
| 24 | 
            +
            # along with athena. If not, see <http://www.gnu.org/licenses/>.              #
         | 
| 25 25 | 
             
            #                                                                             #
         | 
| 26 26 | 
             
            ###############################################################################
         | 
| 27 27 | 
             
            #++
         | 
| @@ -29,18 +29,37 @@ | |
| 29 29 | 
             
            require 'iconv'
         | 
| 30 30 | 
             
            require 'enumerator'
         | 
| 31 31 |  | 
| 32 | 
            -
            module Athena | 
| 32 | 
            +
            module Athena
         | 
| 33 | 
            +
              module Formats
         | 
| 33 34 |  | 
| 34 35 | 
             
              class Lingo < Base
         | 
| 35 36 |  | 
| 37 | 
            +
                KV_SEPARATOR = '*'
         | 
| 38 | 
            +
                WC_SEPARATOR = ','
         | 
| 39 | 
            +
                MV_SEPARATOR = ';'
         | 
| 40 | 
            +
             | 
| 36 41 | 
             
                def convert(record)
         | 
| 37 | 
            -
                   | 
| 38 | 
            -
             | 
| 39 | 
            -
             | 
| 40 | 
            -
             | 
| 41 | 
            -
             | 
| 42 | 
            +
                  terms = []
         | 
| 43 | 
            +
             | 
| 44 | 
            +
                  record.struct.each { |field, struct|
         | 
| 45 | 
            +
                    struct_values = struct[:values]
         | 
| 46 | 
            +
                    struct_values.default = []
         | 
| 47 | 
            +
             | 
| 48 | 
            +
                    values = []
         | 
| 49 | 
            +
             | 
| 50 | 
            +
                    struct[:elements].each { |element|
         | 
| 51 | 
            +
                      struct_values[element].each { |value|
         | 
| 52 | 
            +
                        if value
         | 
| 53 | 
            +
                          value = value.strip.gsub(CRLF_RE, ' ')
         | 
| 54 | 
            +
                          values << value unless value.empty?
         | 
| 55 | 
            +
                        end
         | 
| 56 | 
            +
                      }
         | 
| 42 57 | 
             
                    }
         | 
| 58 | 
            +
             | 
| 59 | 
            +
                    terms << values
         | 
| 43 60 | 
             
                  }
         | 
| 61 | 
            +
             | 
| 62 | 
            +
                  terms
         | 
| 44 63 | 
             
                end
         | 
| 45 64 |  | 
| 46 65 | 
             
                def deferred?
         | 
| @@ -49,23 +68,15 @@ module Athena::Formats | |
| 49 68 |  | 
| 50 69 | 
             
                private
         | 
| 51 70 |  | 
| 52 | 
            -
                def  | 
| 53 | 
            -
                   | 
| 54 | 
            -
             | 
| 55 | 
            -
                  msg = "wrong number of arguments for #{self} (#{actual} for #{expected})"
         | 
| 56 | 
            -
             | 
| 57 | 
            -
                  if blow
         | 
| 58 | 
            -
                    raise FormatArgumentError, msg
         | 
| 71 | 
            +
                def check_args(expected, actual, &block)
         | 
| 72 | 
            +
                  if block ? block[actual] : expected == actual
         | 
| 73 | 
            +
                    true
         | 
| 59 74 | 
             
                  else
         | 
| 60 | 
            -
                    warn  | 
| 61 | 
            -
                     | 
| 75 | 
            +
                    warn "wrong number of arguments for #{self} (#{actual} for #{expected})"
         | 
| 76 | 
            +
                    false
         | 
| 62 77 | 
             
                  end
         | 
| 63 78 | 
             
                end
         | 
| 64 79 |  | 
| 65 | 
            -
                def check_number_of_arguments!(expected, actual, &block)
         | 
| 66 | 
            -
                  check_number_of_arguments(expected, actual, true, &block)
         | 
| 67 | 
            -
                end
         | 
| 68 | 
            -
             | 
| 69 80 | 
             
                # "Nasenbär\n"
         | 
| 70 81 | 
             
                register_format! :out, 'lingo/single_word' do
         | 
| 71 82 |  | 
| @@ -80,9 +91,7 @@ module Athena::Formats | |
| 80 91 |  | 
| 81 92 | 
             
                  def convert(record)
         | 
| 82 93 | 
             
                    super.map { |terms|
         | 
| 83 | 
            -
                       | 
| 84 | 
            -
             | 
| 85 | 
            -
                      terms.join('*')
         | 
| 94 | 
            +
                      terms.join(KV_SEPARATOR) if check_args(2, terms.size)
         | 
| 86 95 | 
             
                    }.compact
         | 
| 87 96 | 
             
                  end
         | 
| 88 97 |  | 
| @@ -93,13 +102,11 @@ module Athena::Formats | |
| 93 102 |  | 
| 94 103 | 
             
                  def convert(record)
         | 
| 95 104 | 
             
                    super.map { |terms|
         | 
| 96 | 
            -
                       | 
| 105 | 
            +
                      [ terms.shift,
         | 
| 106 | 
            +
                        terms.to_enum(:each_slice, 2).map { |w, c| "#{w} ##{c}" }.join(' ')
         | 
| 107 | 
            +
                      ].join(WC_SEPARATOR) if check_args('odd, > 1', terms.size) { |actual|
         | 
| 97 108 | 
             
                        actual > 1 && actual % 2 == 1
         | 
| 98 109 | 
             
                      }
         | 
| 99 | 
            -
             | 
| 100 | 
            -
                      [terms.shift, terms.to_enum(:each_slice, 2).map { |form, wc|
         | 
| 101 | 
            -
                        "#{form} ##{wc}"
         | 
| 102 | 
            -
                      }.join(' ')].join(',')
         | 
| 103 110 | 
             
                    }.compact
         | 
| 104 111 | 
             
                  end
         | 
| 105 112 |  | 
| @@ -110,11 +117,9 @@ module Athena::Formats | |
| 110 117 |  | 
| 111 118 | 
             
                  def convert(record)
         | 
| 112 119 | 
             
                    super.map { |terms|
         | 
| 113 | 
            -
                       | 
| 120 | 
            +
                      terms.join(MV_SEPARATOR) if check_args('> 1', terms.size) { |actual|
         | 
| 114 121 | 
             
                        actual > 1
         | 
| 115 122 | 
             
                      }
         | 
| 116 | 
            -
             | 
| 117 | 
            -
                      terms.join(';')
         | 
| 118 123 | 
             
                    }.compact
         | 
| 119 124 | 
             
                  end
         | 
| 120 125 |  | 
| @@ -122,4 +127,5 @@ module Athena::Formats | |
| 122 127 |  | 
| 123 128 | 
             
              end
         | 
| 124 129 |  | 
| 130 | 
            +
              end
         | 
| 125 131 | 
             
            end
         | 
    
        data/lib/athena/formats/sisis.rb
    CHANGED
    
    | @@ -3,33 +3,36 @@ | |
| 3 3 | 
             
            #                                                                             #
         | 
| 4 4 | 
             
            # A component of athena, the database file converter.                         #
         | 
| 5 5 | 
             
            #                                                                             #
         | 
| 6 | 
            -
            # Copyright (C) 2007- | 
| 6 | 
            +
            # Copyright (C) 2007-2011 University of Cologne,                              #
         | 
| 7 7 | 
             
            #                         Albertus-Magnus-Platz,                              #
         | 
| 8 | 
            -
            #                          | 
| 8 | 
            +
            #                         50923 Cologne, Germany                              #
         | 
| 9 9 | 
             
            #                                                                             #
         | 
| 10 10 | 
             
            # Authors:                                                                    #
         | 
| 11 11 | 
             
            #     Jens Wille <jens.wille@uni-koeln.de>                                    #
         | 
| 12 12 | 
             
            #                                                                             #
         | 
| 13 13 | 
             
            # athena is free software; you can redistribute it and/or modify it under the #
         | 
| 14 | 
            -
            # terms of the GNU General Public License as published by the Free | 
| 15 | 
            -
            # Foundation; either version 3 of the License, or (at your option) | 
| 16 | 
            -
            # version. | 
| 14 | 
            +
            # terms of the GNU Affero General Public License as published by the Free     #
         | 
| 15 | 
            +
            # Software Foundation; either version 3 of the License, or (at your option)   #
         | 
| 16 | 
            +
            # any later version.                                                          #
         | 
| 17 17 | 
             
            #                                                                             #
         | 
| 18 18 | 
             
            # athena is distributed in the hope that it will be useful, but WITHOUT ANY   #
         | 
| 19 19 | 
             
            # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS   #
         | 
| 20 | 
            -
            # FOR A PARTICULAR PURPOSE. See the GNU General Public License for | 
| 21 | 
            -
            # details. | 
| 20 | 
            +
            # FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for     #
         | 
| 21 | 
            +
            # more details.                                                               #
         | 
| 22 22 | 
             
            #                                                                             #
         | 
| 23 | 
            -
            # You should have received a copy of the GNU General Public License | 
| 24 | 
            -
            # with athena. If not, see <http://www.gnu.org/licenses/>. | 
| 23 | 
            +
            # You should have received a copy of the GNU Affero General Public License    #
         | 
| 24 | 
            +
            # along with athena. If not, see <http://www.gnu.org/licenses/>.              #
         | 
| 25 25 | 
             
            #                                                                             #
         | 
| 26 26 | 
             
            ###############################################################################
         | 
| 27 27 | 
             
            #++
         | 
| 28 28 |  | 
| 29 | 
            -
            module Athena | 
| 29 | 
            +
            module Athena
         | 
| 30 | 
            +
              module Formats
         | 
| 30 31 |  | 
| 31 32 | 
             
              class Sisis < Base
         | 
| 32 33 |  | 
| 34 | 
            +
                RECORD_RE = %r{(\d+).*?:\s*(.*)}
         | 
| 35 | 
            +
             | 
| 33 36 | 
             
                register_format :in do
         | 
| 34 37 |  | 
| 35 38 | 
             
                  attr_reader :record_element, :config, :parser
         | 
| @@ -56,15 +59,14 @@ module Athena::Formats | |
| 56 59 | 
             
                  record, num = nil, 0
         | 
| 57 60 |  | 
| 58 61 | 
             
                  source.each { |line|
         | 
| 59 | 
            -
                    element, value = line.match( | 
| 60 | 
            -
             | 
| 61 | 
            -
                     | 
| 62 | 
            -
                       | 
| 63 | 
            -
             | 
| 64 | 
            -
             | 
| 65 | 
            -
             | 
| 66 | 
            -
                       | 
| 67 | 
            -
                        record.update(element, value, config[element])
         | 
| 62 | 
            +
                    element, value = line.match(RECORD_RE)[1, 2]
         | 
| 63 | 
            +
             | 
| 64 | 
            +
                    if element == record_element
         | 
| 65 | 
            +
                      record.close if record
         | 
| 66 | 
            +
                      record = Record.new(value, block)
         | 
| 67 | 
            +
                      num += 1
         | 
| 68 | 
            +
                    else
         | 
| 69 | 
            +
                      record.update(element, value, config[element])
         | 
| 68 70 | 
             
                    end
         | 
| 69 71 | 
             
                  }
         | 
| 70 72 |  | 
| @@ -73,12 +75,7 @@ module Athena::Formats | |
| 73 75 | 
             
                  num
         | 
| 74 76 | 
             
                end
         | 
| 75 77 |  | 
| 76 | 
            -
                class NoRecordElementError < StandardError
         | 
| 77 | 
            -
                end
         | 
| 78 | 
            -
             | 
| 79 | 
            -
                class IllegalRecordElementError < StandardError
         | 
| 80 | 
            -
                end
         | 
| 81 | 
            -
             | 
| 82 78 | 
             
              end
         | 
| 83 79 |  | 
| 80 | 
            +
              end
         | 
| 84 81 | 
             
            end
         | 
    
        data/lib/athena/formats/xml.rb
    CHANGED
    
    | @@ -3,46 +3,45 @@ | |
| 3 3 | 
             
            #                                                                             #
         | 
| 4 4 | 
             
            # A component of athena, the database file converter.                         #
         | 
| 5 5 | 
             
            #                                                                             #
         | 
| 6 | 
            -
            # Copyright (C) 2007- | 
| 6 | 
            +
            # Copyright (C) 2007-2011 University of Cologne,                              #
         | 
| 7 7 | 
             
            #                         Albertus-Magnus-Platz,                              #
         | 
| 8 | 
            -
            #                          | 
| 8 | 
            +
            #                         50923 Cologne, Germany                              #
         | 
| 9 9 | 
             
            #                                                                             #
         | 
| 10 10 | 
             
            # Authors:                                                                    #
         | 
| 11 11 | 
             
            #     Jens Wille <jens.wille@uni-koeln.de>                                    #
         | 
| 12 12 | 
             
            #                                                                             #
         | 
| 13 13 | 
             
            # athena is free software; you can redistribute it and/or modify it under the #
         | 
| 14 | 
            -
            # terms of the GNU General Public License as published by the Free | 
| 15 | 
            -
            # Foundation; either version 3 of the License, or (at your option) | 
| 16 | 
            -
            # version. | 
| 14 | 
            +
            # terms of the GNU Affero General Public License as published by the Free     #
         | 
| 15 | 
            +
            # Software Foundation; either version 3 of the License, or (at your option)   #
         | 
| 16 | 
            +
            # any later version.                                                          #
         | 
| 17 17 | 
             
            #                                                                             #
         | 
| 18 18 | 
             
            # athena is distributed in the hope that it will be useful, but WITHOUT ANY   #
         | 
| 19 19 | 
             
            # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS   #
         | 
| 20 | 
            -
            # FOR A PARTICULAR PURPOSE. See the GNU General Public License for | 
| 21 | 
            -
            # details. | 
| 20 | 
            +
            # FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for     #
         | 
| 21 | 
            +
            # more details.                                                               #
         | 
| 22 22 | 
             
            #                                                                             #
         | 
| 23 | 
            -
            # You should have received a copy of the GNU General Public License | 
| 24 | 
            -
            # with athena. If not, see <http://www.gnu.org/licenses/>. | 
| 23 | 
            +
            # You should have received a copy of the GNU Affero General Public License    #
         | 
| 24 | 
            +
            # along with athena. If not, see <http://www.gnu.org/licenses/>.              #
         | 
| 25 25 | 
             
            #                                                                             #
         | 
| 26 26 | 
             
            ###############################################################################
         | 
| 27 27 | 
             
            #++
         | 
| 28 28 |  | 
| 29 29 | 
             
            require 'forwardable'
         | 
| 30 30 |  | 
| 31 | 
            -
            require 'rubygems'
         | 
| 32 | 
            -
             | 
| 33 31 | 
             
            require 'builder'
         | 
| 34 32 | 
             
            require 'xmlstreamin'
         | 
| 35 33 | 
             
            require 'nuggets/hash/insert'
         | 
| 36 34 |  | 
| 37 | 
            -
            module Athena | 
| 35 | 
            +
            module Athena
         | 
| 36 | 
            +
              module Formats
         | 
| 38 37 |  | 
| 39 38 | 
             
              class XML < Base
         | 
| 40 39 |  | 
| 41 | 
            -
                include  | 
| 40 | 
            +
                include Util
         | 
| 42 41 |  | 
| 43 42 | 
             
                # <http://www.w3.org/TR/2006/REC-xml-20060816/#NT-Name>
         | 
| 44 | 
            -
                 | 
| 45 | 
            -
                 | 
| 43 | 
            +
                ELEMENT_START_RE    = %r{\A[a-zA-Z_:]}
         | 
| 44 | 
            +
                NON_ELEMENT_CHAR_RE = %r{[^\w:.-]}
         | 
| 46 45 |  | 
| 47 46 | 
             
                VALUE_SEPARATOR = '|'
         | 
| 48 47 |  | 
| @@ -87,12 +86,14 @@ module Athena::Formats | |
| 87 86 |  | 
| 88 87 | 
             
                  def convert(record)
         | 
| 89 88 | 
             
                    super { |field, struct|
         | 
| 90 | 
            -
                      strings =  | 
| 89 | 
            +
                      strings = []
         | 
| 90 | 
            +
             | 
| 91 | 
            +
                      struct[:elements].each { |element|
         | 
| 91 92 | 
             
                        values = (struct[:values][element] || []).map { |v|
         | 
| 92 93 | 
             
                          (v || '').strip
         | 
| 93 94 | 
             
                        }.reject { |v| v.empty? }
         | 
| 94 95 |  | 
| 95 | 
            -
                         | 
| 96 | 
            +
                        strings << (values.empty? ? struct[:empty] : values.join(VALUE_SEPARATOR))
         | 
| 96 97 | 
             
                      }
         | 
| 97 98 |  | 
| 98 99 | 
             
                      builder.tag!(field, struct[:string] % strings)
         | 
| @@ -125,8 +126,8 @@ module Athena::Formats | |
| 125 126 | 
             
                    def builder.method_missing(sym, *args, &block)
         | 
| 126 127 | 
             
                      elem = sym.to_s
         | 
| 127 128 |  | 
| 128 | 
            -
                      elem.insert(0, '_') unless elem =~  | 
| 129 | 
            -
                      elem.gsub!( | 
| 129 | 
            +
                      elem.insert(0, '_') unless elem =~ ELEMENT_START_RE
         | 
| 130 | 
            +
                      elem.gsub!(NON_ELEMENT_CHAR_RE, '_')
         | 
| 130 131 |  | 
| 131 132 | 
             
                      super(elem, *args, &block)
         | 
| 132 133 | 
             
                    end
         | 
| @@ -183,9 +184,7 @@ module Athena::Formats | |
| 183 184 | 
             
                    spec.default!(prev_spec)
         | 
| 184 185 | 
             
                  }
         | 
| 185 186 |  | 
| 186 | 
            -
                  verbose(:spec, BaseSpec)  | 
| 187 | 
            -
                    spec.inspect_spec
         | 
| 188 | 
            -
                  end
         | 
| 187 | 
            +
                  verbose(:spec, BaseSpec) { spec.inspect_spec }
         | 
| 189 188 |  | 
| 190 189 | 
             
                  XMLStreamin::XMLStreamListener.new(spec)
         | 
| 191 190 | 
             
                end
         | 
| @@ -194,10 +193,8 @@ module Athena::Formats | |
| 194 193 | 
             
                  spec = ElementSpec.new(element, field, config)
         | 
| 195 194 |  | 
| 196 195 | 
             
                  case arg
         | 
| 197 | 
            -
                    when Hash
         | 
| 198 | 
            -
             | 
| 199 | 
            -
                    else
         | 
| 200 | 
            -
                      spec.default!(SubElementSpec.new(spec))
         | 
| 196 | 
            +
                    when Hash then spec.specs!(arg)
         | 
| 197 | 
            +
                    else spec.default!(SubElementSpec.new(spec))
         | 
| 201 198 | 
             
                  end
         | 
| 202 199 |  | 
| 203 200 | 
             
                  spec
         | 
| @@ -216,69 +213,51 @@ module Athena::Formats | |
| 216 213 |  | 
| 217 214 | 
             
                class BaseSpec < XMLStreamin::XMLSpec
         | 
| 218 215 |  | 
| 219 | 
            -
                  include  | 
| 216 | 
            +
                  include Util
         | 
| 220 217 |  | 
| 221 218 | 
             
                  @level = 0
         | 
| 222 219 |  | 
| 223 220 | 
             
                  def start(context, name, attrs)
         | 
| 224 | 
            -
                    verbose(:xml)  | 
| 225 | 
            -
                      spit "#{indent(level)}<#{name}>"
         | 
| 226 | 
            -
                       | 
| 227 | 
            -
             | 
| 228 | 
            -
                      attrs.each { |attr|
         | 
| 229 | 
            -
                        spit "#{indent(level + 1)}[#{attr[0]} = #{attr[1]}]"
         | 
| 230 | 
            -
                      }
         | 
| 231 | 
            -
                    end
         | 
| 221 | 
            +
                    verbose(:xml) {
         | 
| 222 | 
            +
                      spit "#{indent(level)}<#{name}>"; step :down
         | 
| 223 | 
            +
                      attrs.each { |attr| spit "#{indent(level + 1)}[#{attr[0]} = #{attr[1]}]" }
         | 
| 224 | 
            +
                    }
         | 
| 232 225 |  | 
| 233 | 
            -
                     | 
| 226 | 
            +
                    context
         | 
| 234 227 | 
             
                  end
         | 
| 235 228 |  | 
| 236 229 | 
             
                  def text(context, data)
         | 
| 237 | 
            -
                    verbose(:xml)  | 
| 238 | 
            -
             | 
| 239 | 
            -
                      spit "#{indent(level)}#{content}" unless content.empty?
         | 
| 240 | 
            -
                    end
         | 
| 241 | 
            -
             | 
| 242 | 
            -
                    return context
         | 
| 230 | 
            +
                    verbose(:xml) { spit "#{indent(level)}#{data.strip}" unless data.strip.empty?  }
         | 
| 231 | 
            +
                    context
         | 
| 243 232 | 
             
                  end
         | 
| 244 233 |  | 
| 245 234 | 
             
                  def done(context, name)
         | 
| 246 | 
            -
                    verbose(:xml)  | 
| 247 | 
            -
             | 
| 248 | 
            -
                      spit "#{indent(level)}</#{name}>"
         | 
| 249 | 
            -
                    end
         | 
| 250 | 
            -
             | 
| 251 | 
            -
                    return context
         | 
| 235 | 
            +
                    verbose(:xml) { step :up; spit "#{indent(level)}</#{name}>" }
         | 
| 236 | 
            +
                    context
         | 
| 252 237 | 
             
                  end
         | 
| 253 238 |  | 
| 254 239 | 
             
                  def empty(context)
         | 
| 255 | 
            -
                    verbose(:xml)  | 
| 256 | 
            -
             | 
| 257 | 
            -
                    end
         | 
| 258 | 
            -
             | 
| 259 | 
            -
                    return context
         | 
| 240 | 
            +
                    verbose(:xml) { step :up }
         | 
| 241 | 
            +
                    context
         | 
| 260 242 | 
             
                  end
         | 
| 261 243 |  | 
| 262 244 | 
             
                  def inspect_spec(element = nil, level = 0)
         | 
| 263 245 | 
             
                    if respond_to?(:field)
         | 
| 264 246 | 
             
                      msg = "#{indent(level)}[#{element}] #{field.to_s.upcase} -> #{name}"
         | 
| 265 247 | 
             
                      respond_to?(:spit) ? spit(msg) : warn(msg)
         | 
| 266 | 
            -
             | 
| 267 | 
            -
             | 
| 268 | 
            -
                      }
         | 
| 248 | 
            +
             | 
| 249 | 
            +
                      inspect_specs(level + 1)
         | 
| 269 250 | 
             
                    else
         | 
| 270 | 
            -
                       | 
| 271 | 
            -
                        specs.default.inspect_spec('?', level)
         | 
| 272 | 
            -
                      else
         | 
| 273 | 
            -
                        specs.each { |e, s|
         | 
| 274 | 
            -
                          s.inspect_spec(e, level)
         | 
| 275 | 
            -
                        }
         | 
| 276 | 
            -
                      end
         | 
| 251 | 
            +
                      specs.empty? ? specs.default.inspect_spec('?', level) : inspect_specs(level)
         | 
| 277 252 | 
             
                    end
         | 
| 278 253 | 
             
                  end
         | 
| 279 254 |  | 
| 280 255 | 
             
                  private
         | 
| 281 256 |  | 
| 257 | 
            +
                  def inspect_specs(level = 0)
         | 
| 258 | 
            +
                    specs.each { |element, spec| spec.inspect_spec(element, level) }
         | 
| 259 | 
            +
                  end
         | 
| 260 | 
            +
             | 
| 282 261 | 
             
                  def level
         | 
| 283 262 | 
             
                    BaseSpec.instance_variable_get(:@level)
         | 
| 284 263 | 
             
                  end
         | 
| @@ -298,19 +277,19 @@ module Athena::Formats | |
| 298 277 | 
             
                  def initialize(&block)
         | 
| 299 278 | 
             
                    super()
         | 
| 300 279 |  | 
| 301 | 
            -
                    @block | 
| 280 | 
            +
                    @block = block
         | 
| 302 281 | 
             
                  end
         | 
| 303 282 |  | 
| 304 283 | 
             
                  def start(context, name, attrs)
         | 
| 305 | 
            -
                    super
         | 
| 306 | 
            -
             | 
| 307 | 
            -
                     | 
| 284 | 
            +
                    context = super
         | 
| 285 | 
            +
                    self.record = Record.new(nil, block, true)
         | 
| 286 | 
            +
                    context
         | 
| 308 287 | 
             
                  end
         | 
| 309 288 |  | 
| 310 289 | 
             
                  def done(context, name)
         | 
| 311 | 
            -
                    super
         | 
| 312 | 
            -
             | 
| 290 | 
            +
                    context = super
         | 
| 313 291 | 
             
                    record.close
         | 
| 292 | 
            +
                    context
         | 
| 314 293 | 
             
                  end
         | 
| 315 294 |  | 
| 316 295 | 
             
                end
         | 
| @@ -329,15 +308,15 @@ module Athena::Formats | |
| 329 308 | 
             
                  end
         | 
| 330 309 |  | 
| 331 310 | 
             
                  def start(context, name, attrs)
         | 
| 332 | 
            -
                    super
         | 
| 333 | 
            -
             | 
| 334 | 
            -
                     | 
| 311 | 
            +
                    context = super
         | 
| 312 | 
            +
                    self.record = Record[field, config]
         | 
| 313 | 
            +
                    context
         | 
| 335 314 | 
             
                  end
         | 
| 336 315 |  | 
| 337 316 | 
             
                  def text(context, data)
         | 
| 338 | 
            -
                    super
         | 
| 339 | 
            -
             | 
| 317 | 
            +
                    context = super
         | 
| 340 318 | 
             
                    record.update(name, data)
         | 
| 319 | 
            +
                    context
         | 
| 341 320 | 
             
                  end
         | 
| 342 321 |  | 
| 343 322 | 
             
                end
         | 
| @@ -353,19 +332,13 @@ module Athena::Formats | |
| 353 332 | 
             
                    super()
         | 
| 354 333 |  | 
| 355 334 | 
             
                    @parent = parent
         | 
| 335 | 
            +
             | 
| 356 336 | 
             
                    default!(self)
         | 
| 357 337 | 
             
                  end
         | 
| 358 338 |  | 
| 359 339 | 
             
                end
         | 
| 360 340 |  | 
| 361 | 
            -
                ConfigError = Athena::Parser::ConfigError
         | 
| 362 | 
            -
             | 
| 363 | 
            -
                class NoRecordElementError < ConfigError
         | 
| 364 | 
            -
                end
         | 
| 365 | 
            -
             | 
| 366 | 
            -
                class IllegalRecordElementError < ConfigError
         | 
| 367 | 
            -
                end
         | 
| 368 | 
            -
             | 
| 369 341 | 
             
              end
         | 
| 370 342 |  | 
| 343 | 
            +
              end
         | 
| 371 344 | 
             
            end
         |