biointerchange 0.1.3 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +17 -0
- data/VERSION +1 -1
- data/generators/GOxrefify.rb +41 -0
- data/generators/rdfxml.rb +6 -4
- data/lib/biointerchange/core.rb +94 -20
- data/lib/biointerchange/genomics/gff3_feature_set.rb +11 -3
- data/lib/biointerchange/genomics/gff3_pragmas.rb +3 -3
- data/lib/biointerchange/genomics/gff3_rdf_ntriples.rb +217 -12
- data/lib/biointerchange/genomics/gff3_reader.rb +78 -20
- data/lib/biointerchange/genomics/gvf_reader.rb +9 -3
- data/lib/biointerchange/gff3o.rb +69 -55
- data/lib/biointerchange/goxref.rb +867 -0
- data/lib/biointerchange/gvf1o.rb +546 -82
- data/lib/biointerchange/textmining/text_mining_reader.rb +9 -0
- data/spec/gff3_rdfwriter_spec.rb +1 -1
- data/spec/gvf_rdfwriter_spec.rb +1 -1
- data/spec/text_mining_pdfx_xml_reader_spec.rb +3 -0
- data/spec/text_mining_pubannos_json_reader_spec.rb +4 -1
- data/supplemental/java/biointerchange/pom.xml +1 -1
- data/supplemental/java/biointerchange/src/main/java/org/biointerchange/vocabulary/GFF3O.java +93 -125
- data/supplemental/java/biointerchange/src/main/java/org/biointerchange/vocabulary/GVF1O.java +304 -205
- data/supplemental/java/biointerchange/src/main/java/org/biointerchange/vocabulary/SIO.java +4044 -4290
- data/supplemental/java/biointerchange/src/main/java/org/biointerchange/vocabulary/SOFA.java +3 -3
- data/supplemental/python/biointerchange/gff3o.py +1 -89
- data/supplemental/python/biointerchange/gvf1o.py +129 -147
- data/supplemental/python/biointerchange/sio.py +817 -46
- data/supplemental/python/biointerchange/sofa.py +543 -543
- data/supplemental/python/setup.py +1 -1
- data/web/ontologies.html +1 -3
- metadata +7 -2
    
        data/README.md
    CHANGED
    
    | @@ -274,6 +274,12 @@ Building a new version of the Ruby vocabulary classes for GFF3, SIO, SOFA (requi | |
| 274 274 | 
             
                ruby generators/rdfxml.rb <path-to-rdf/xml-version-of-sofa> SOFA >> lib/biointerchange/sofa.rb
         | 
| 275 275 | 
             
                echo -e "\nend" >> lib/biointerchange/sofa.rb
         | 
| 276 276 |  | 
| 277 | 
            +
            A Geno Ontology external reference (GOxref) vocabulary can be created by directly downloading the latest version of `GO.xrf_abbs`:
         | 
| 278 | 
            +
             | 
| 279 | 
            +
                echo -e "module BioInterchange\n" > lib/biointerchange/goxref.rb
         | 
| 280 | 
            +
                curl ftp://ftp.geneontology.org/pub/go/doc/GO.xrf_abbs | ruby generators/GOxrefify.rb
         | 
| 281 | 
            +
                echo -e "\nend" >> lib/biointerchange/goxref.rb
         | 
| 282 | 
            +
             | 
| 277 283 | 
             
            #### Python Vocabulary Classes
         | 
| 278 284 |  | 
| 279 285 | 
             
            The source-code generation can be skipped, if none of the ontologies that are used by BioInterchange have been changed. Otherwise, the existing Python vocabulary class wrappers can be generated as follows:
         | 
| @@ -282,6 +288,7 @@ The source-code generation can be skipped, if none of the ontologies that are us | |
| 282 288 | 
             
                ruby generators/rdfxml.rb <path-to-rdf/xml-version-of-gvf1o> GVF1O | ruby generators/pythonify.rb > supplemental/python/biointerchange/gvf1o.py
         | 
| 283 289 | 
             
                ruby generators/rdfxml.rb <path-to-rdf/xml-version-of-sio> SIO | ruby generators/pythonify.rb > supplemental/python/biointerchange/sio.py
         | 
| 284 290 | 
             
                ruby generators/rdfxml.rb <path-to-rdf/xml-version-of-sofa> SOFA | ruby generators/pythonify.rb > supplemental/python/biointerchange/sofa.py
         | 
| 291 | 
            +
                curl ftp://ftp.geneontology.org/pub/go/doc/GO.xrf_abbs | ruby generators/GOxrefify.rb | ruby generators/pythonify.rb > supplemental/python/biointerchange/goxref.py
         | 
| 285 292 |  | 
| 286 293 | 
             
            Generate the BioInterchange Python vocabulary egg:
         | 
| 287 294 |  | 
| @@ -302,6 +309,7 @@ The source-code generation can be skipped, if none of the ontologies that are us | |
| 302 309 | 
             
                ruby generators/rdfxml.rb <path-to-rdf/xml-version-of-gvf1o> GVF1O | ruby generators/javaify.rb > supplemental/java/biointerchange/src/main/java/org/biointerchange/vocabulary/GVF1O.java
         | 
| 303 310 | 
             
                ruby generators/rdfxml.rb <path-to-rdf/xml-version-of-sio> SIO | ruby generators/javaify.rb > supplemental/java/biointerchange/src/main/java/org/biointerchange/vocabulary/SIO.java
         | 
| 304 311 | 
             
                ruby generators/rdfxml.rb <path-to-rdf/xml-version-of-sofa> SOFA | ruby generators/javaify.rb "http://purl.obolibrary.org/obo/" > supplemental/java/biointerchange/src/main/java/org/biointerchange/vocabulary/SOFA.java
         | 
| 312 | 
            +
                curl ftp://ftp.geneontology.org/pub/go/doc/GO.xrf_abbs | ruby generators/GOxrefify.rb | ruby generators/javaify.rb > supplemental/java/biointerchange/src/main/java/org/biointerchange/vocabulary/GOXRef.java
         | 
| 305 313 |  | 
| 306 314 | 
             
            Generate the BioInterchange Java vocabulary artifact:
         | 
| 307 315 |  | 
| @@ -346,6 +354,15 @@ A more verbose is produced by calling `rspec` directly: | |
| 346 354 |  | 
| 347 355 | 
             
                bundle exec rake rdoc
         | 
| 348 356 |  | 
| 357 | 
            +
            ### Deploying on Rubygems
         | 
| 358 | 
            +
             | 
| 359 | 
            +
            _Note:_ Only BioInterchange package maintainers can deploy the 'biointerchange' gem on Rubygems.
         | 
| 360 | 
            +
             | 
| 361 | 
            +
                bundle exec rake version:bump:(major | minor | patch)
         | 
| 362 | 
            +
                bundle exec rake gemspec
         | 
| 363 | 
            +
                bundle exec gem build biointerchange.gemspec
         | 
| 364 | 
            +
                bundle exec gem push biointerchange-VERSION.gem
         | 
| 365 | 
            +
             | 
| 349 366 | 
             
            ### Troubleshooting
         | 
| 350 367 |  | 
| 351 368 | 
             
            #### GCC: No such file or directory
         | 
    
        data/VERSION
    CHANGED
    
    | @@ -1 +1 @@ | |
| 1 | 
            -
            0. | 
| 1 | 
            +
            0.2.0
         | 
| @@ -0,0 +1,41 @@ | |
| 1 | 
            +
            #!/usr/bin/ruby
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            def record(id, description, uri)
         | 
| 4 | 
            +
              puts "  # Returns the link-out URI for objects of \"#{description}\"."
         | 
| 5 | 
            +
              puts "  def self.#{id}"
         | 
| 6 | 
            +
              puts "    RDF::URI.new(\"#{uri}\")"
         | 
| 7 | 
            +
              puts '  end'
         | 
| 8 | 
            +
              puts ''
         | 
| 9 | 
            +
            end
         | 
| 10 | 
            +
             | 
| 11 | 
            +
            puts 'class GOXRef'
         | 
| 12 | 
            +
            puts ''
         | 
| 13 | 
            +
             | 
| 14 | 
            +
            in_record = false
         | 
| 15 | 
            +
             | 
| 16 | 
            +
            id = nil
         | 
| 17 | 
            +
            description = nil
         | 
| 18 | 
            +
            uri = nil
         | 
| 19 | 
            +
             | 
| 20 | 
            +
            STDIN.each { |line|
         | 
| 21 | 
            +
              line.chomp!
         | 
| 22 | 
            +
             | 
| 23 | 
            +
              if line.empty? then
         | 
| 24 | 
            +
                record(id, description, uri) if uri and not uri.match(/\[.*\]/)
         | 
| 25 | 
            +
                uri = nil
         | 
| 26 | 
            +
                in_record = false
         | 
| 27 | 
            +
              end
         | 
| 28 | 
            +
             | 
| 29 | 
            +
              if line.start_with?('abbreviation:') and not in_record then
         | 
| 30 | 
            +
                id = line.sub(/^abbreviation: /, '').gsub(/[-\/]/, '_')
         | 
| 31 | 
            +
                in_record = true
         | 
| 32 | 
            +
              end
         | 
| 33 | 
            +
              
         | 
| 34 | 
            +
              description = line.sub(/^database: /, '') if line.start_with?('database:') and in_record
         | 
| 35 | 
            +
              uri = line.sub(/^url_syntax: /, '').sub(/\[example_id\]$/, '') if line.start_with?('url_syntax:') and in_record
         | 
| 36 | 
            +
            }
         | 
| 37 | 
            +
             | 
| 38 | 
            +
            record(id, description, uri) if uri
         | 
| 39 | 
            +
             | 
| 40 | 
            +
            puts 'end'
         | 
| 41 | 
            +
             | 
    
        data/generators/rdfxml.rb
    CHANGED
    
    | @@ -16,8 +16,10 @@ OBO_DEF = RDF::URI.new('http://purl.obolibrary.org/obo/def') | |
| 16 16 | 
             
            # For handling synonyms in SIO:
         | 
| 17 17 | 
             
            SIO_SYN = RDF::URI.new('http://semanticscience.org/resource/synonym')
         | 
| 18 18 |  | 
| 19 | 
            -
             | 
| 20 | 
            -
             | 
| 19 | 
            +
            # This label conversion also appears in:
         | 
| 20 | 
            +
            #   +lib/biointerchange/core.rb+
         | 
| 21 | 
            +
            def make_safe_label(label)
         | 
| 22 | 
            +
              label.gsub(/[ '-.<>\/]/, '_').gsub(/\([^\)]*?\)/, '').sub(/^(\d+)/, "a_#{$1}").gsub(/^_+|_+$/, '').gsub(/_+/, '_')
         | 
| 21 23 | 
             
            end
         | 
| 22 24 |  | 
| 23 25 | 
             
            reader = RDF::RDFXML::Reader.open(ARGV[0])
         | 
| @@ -55,7 +57,7 @@ model.keys.each { |key| | |
| 55 57 | 
             
              next unless type
         | 
| 56 58 |  | 
| 57 59 | 
             
              label = entry[RDF::RDFS.label].to_s
         | 
| 58 | 
            -
              next if  | 
| 60 | 
            +
              next if make_safe_label(label).empty?
         | 
| 59 61 | 
             
              uri = key.to_s
         | 
| 60 62 |  | 
| 61 63 | 
             
              # Only deal with URI sub-classes/sub-properties, whilst ignoring restrictions, etc.
         | 
| @@ -84,7 +86,7 @@ model.keys.each { |key| | |
| 84 86 | 
             
                set = combined_uris[label_or_synonym]
         | 
| 85 87 | 
             
                set = [] unless set
         | 
| 86 88 | 
             
                combined_uris[label_or_synonym] = set | [ uri ]
         | 
| 87 | 
            -
                generated_labels[label_or_synonym] =  | 
| 89 | 
            +
                generated_labels[label_or_synonym] = make_safe_label(label_or_synonym)
         | 
| 88 90 | 
             
              }
         | 
| 89 91 |  | 
| 90 92 | 
             
              object_properties[uri] = true if type == RDF::OWL.ObjectProperty
         | 
    
        data/lib/biointerchange/core.rb
    CHANGED
    
    | @@ -5,11 +5,23 @@ | |
| 5 5 | 
             
            # of it as a gem in your own Ruby implementation.
         | 
| 6 6 | 
             
            module BioInterchange
         | 
| 7 7 |  | 
| 8 | 
            +
              ### Global behaviour settings, which can be altered programmatically or via the CLI:
         | 
| 9 | 
            +
             | 
| 10 | 
            +
              # If true, then RDF::Graph's "insert" function will be overwritten so that it
         | 
| 11 | 
            +
              # immediately outputs N-Triples. This reduces memory requirements (since no RDF
         | 
| 12 | 
            +
              # graph is kept in memory) and performance (since no looping through an RDF graph
         | 
| 13 | 
            +
              # is necessary).
         | 
| 14 | 
            +
              @@skip_rdf_graph = true
         | 
| 15 | 
            +
              def self.skip_rdf_graph
         | 
| 16 | 
            +
                @@skip_rdf_graph
         | 
| 17 | 
            +
              end
         | 
| 18 | 
            +
             | 
| 8 19 | 
             
              # Custom Exceptions and Errors
         | 
| 9 20 | 
             
              require 'biointerchange/exceptions'
         | 
| 10 21 |  | 
| 11 22 | 
             
              # Ontologies (besides the ones from the 'rdf' gem)
         | 
| 12 23 | 
             
              require 'biointerchange/gff3o'
         | 
| 24 | 
            +
              require 'biointerchange/goxref'
         | 
| 13 25 | 
             
              require 'biointerchange/gvf1o'
         | 
| 14 26 | 
             
              require 'biointerchange/sio'
         | 
| 15 27 | 
             
              require 'biointerchange/sofa'
         | 
| @@ -78,14 +90,17 @@ module BioInterchange | |
| 78 90 | 
             
                  opt = Getopt::Long.getopts(
         | 
| 79 91 | 
             
                    ["--help", "-h", Getopt::BOOLEAN],
         | 
| 80 92 | 
             
                    ["--debug", "-d", Getopt::BOOLEAN],  # set debug mode => print stack traces
         | 
| 93 | 
            +
                    ["--no_rdf_graph_optimization", "-n", Getopt::BOOLEAN], # set self.skip_rdf_graph to false
         | 
| 94 | 
            +
                    ["--batchsize", "-b", Getopt::OPTIONAL], # batchsize for readers/writers that support +postpone?+
         | 
| 81 95 | 
             
                    ["--input", "-i", Getopt::REQUIRED], # input file format
         | 
| 82 96 | 
             
                    ["--rdf", "-r", Getopt::REQUIRED], # output file format
         | 
| 83 | 
            -
                    ["-- | 
| 84 | 
            -
                    ["-- | 
| 85 | 
            -
                    ["-- | 
| 86 | 
            -
                    ["-- | 
| 97 | 
            +
                    ["--annotate_name", Getopt::OPTIONAL], # name of resourcce/tool/person
         | 
| 98 | 
            +
                    ["--annotate_name_id", Getopt::OPTIONAL], # uri of resource/tool/person
         | 
| 99 | 
            +
                    ["--annotate_date", Getopt::OPTIONAL], # date of processing/annotation
         | 
| 100 | 
            +
                    ["--annotate_version", Getopt::OPTIONAL], # version number of resource
         | 
| 87 101 | 
             
                    ["--file", "-f", Getopt::OPTIONAL], # file to read, will read from STDIN if not supplied
         | 
| 88 | 
            -
                    ["--out", "-o", Getopt::OPTIONAL] # output file, will out to STDOUT if not supplied
         | 
| 102 | 
            +
                    ["--out", "-o", Getopt::OPTIONAL], # output file, will out to STDOUT if not supplied
         | 
| 103 | 
            +
                    ["--version", "-v", Getopt::OPTIONAL] # output the version number of the gem and exit
         | 
| 89 104 | 
             
                  )
         | 
| 90 105 |  | 
| 91 106 | 
             
                  if opt['help'] or not opt['input'] or not opt['rdf'] then
         | 
| @@ -115,26 +130,38 @@ module BioInterchange | |
| 115 130 | 
             
                    puts '  Input: dbcls.catanns.json, uk.ac.man.pdfx'
         | 
| 116 131 | 
             
                    puts '  Output: rdf.bh12.sio'
         | 
| 117 132 | 
             
                    puts '  Options:'
         | 
| 118 | 
            -
                    puts '     | 
| 119 | 
            -
                    puts '     | 
| 120 | 
            -
                    puts '    -- | 
| 121 | 
            -
                    puts '    -- | 
| 133 | 
            +
                    puts '    --annotate_date <date>           : date of processing/annotation (optional)'
         | 
| 134 | 
            +
                    puts '    --annotate_version <version>     : version number of resource (optional)'
         | 
| 135 | 
            +
                    puts '    --annotate_name <name>           : name of resource/tool/person (required)'
         | 
| 136 | 
            +
                    puts '    --annotate_name_id <id>          : URI of resource/tool/person (required)'
         | 
| 122 137 | 
             
                    puts ''
         | 
| 123 138 | 
             
                    puts 'Input-/RDF-format specific options:'
         | 
| 124 139 | 
             
                    puts '  Input: biointerchange.gff3 or biointerchange.gvf'
         | 
| 125 140 | 
             
                    puts '  Output: rdf.biointerchange.gff3 or rdf.biointerchange.gvf'
         | 
| 126 141 | 
             
                    puts '  Options:'
         | 
| 142 | 
            +
                    puts '    -b <size>/--batchsize <size>     : process features in batches of the given size (optional)'
         | 
| 127 143 | 
             
                    puts '    -t <date>/--date <date>          : date when the GFF3/GVF file was created (optional)'
         | 
| 128 144 | 
             
                    puts '    --name <name>                    : name of the GFF3/GVF file creator (optional)'
         | 
| 129 145 | 
             
                    puts '    --name_id <id>                   : email address of the GFF3/GVF file creator (optional)'
         | 
| 130 146 | 
             
                    puts ''
         | 
| 131 147 | 
             
                    puts 'Other options:'
         | 
| 148 | 
            +
                    puts '  -v / --version                     : print the Gem\'s version number and exit'
         | 
| 132 149 | 
             
                    puts '  -d / --debug                       : turn on debugging output (for stacktraces)'
         | 
| 133 150 | 
             
                    puts '  -h  --help                         : this message'
         | 
| 134 151 |  | 
| 135 152 | 
             
                    exit 1
         | 
| 136 153 | 
             
                  end
         | 
| 137 154 |  | 
| 155 | 
            +
                  # Print version number and exit:
         | 
| 156 | 
            +
                  if opt['version'] then
         | 
| 157 | 
            +
                    puts 'BioInterchange 0.1.4'
         | 
| 158 | 
            +
                    exit
         | 
| 159 | 
            +
                  end
         | 
| 160 | 
            +
             | 
| 161 | 
            +
                  # Turn off optimization, if requested. This will generate an RDF graph in memory and
         | 
| 162 | 
            +
                  # at least double memory requirements and runtime.
         | 
| 163 | 
            +
                  @@skip_rdf_graph = false if opt['no_rdf_graph_optimization']
         | 
| 164 | 
            +
             | 
| 138 165 | 
             
                  # Check if the input/rdf options are supported:
         | 
| 139 166 | 
             
                  if opt['input'] == 'dbcls.catanns.json' or opt['input'] == 'uk.ac.man.pdfx' then
         | 
| 140 167 | 
             
                    if opt['rdf'] == 'rdf.bh12.sio' then
         | 
| @@ -158,27 +185,27 @@ module BioInterchange | |
| 158 185 | 
             
                    unsupported_combination
         | 
| 159 186 | 
             
                  end
         | 
| 160 187 |  | 
| 161 | 
            -
                  opt[' | 
| 162 | 
            -
             | 
| 163 | 
            -
                  
         | 
| 188 | 
            +
                  wrong_type('batchsize', 'a positive integer') if opt['batchsize'] and not opt['batchsize'].match(/^[1-9][0-9]*$/)
         | 
| 189 | 
            +
             | 
| 190 | 
            +
                  opt['batchsize'] = opt['batchsize'].to_i if opt['batchsize']
         | 
| 191 | 
            +
             | 
| 164 192 | 
             
                  # Generate model from file (deserialization).
         | 
| 165 193 | 
             
                  # Note: if-clauses are lexicographically ordered. 
         | 
| 166 194 | 
             
                  reader = nil
         | 
| 167 195 | 
             
                  if opt['input'] == 'biointerchange.gff3' then
         | 
| 168 | 
            -
                    reader = BioInterchange::Genomics::GFF3Reader.new(opt[' | 
| 196 | 
            +
                    reader = BioInterchange::Genomics::GFF3Reader.new(opt['annotate_name'], opt['annotate_name_id'], opt['annotate_date'], opt['batchsize'])
         | 
| 169 197 | 
             
                  elsif opt['input'] == 'biointerchange.gvf' then
         | 
| 170 | 
            -
                    reader = BioInterchange::Genomics::GVFReader.new(opt[' | 
| 198 | 
            +
                    reader = BioInterchange::Genomics::GVFReader.new(opt['annotate_name'], opt['annotate_name_id'], opt['annotate_date'], opt['batchsize'])
         | 
| 171 199 | 
             
                  elsif opt['input'] == 'dbcls.catanns.json' then
         | 
| 172 | 
            -
                    reader = BioInterchange::TextMining::PubannosJsonReader.new(opt[' | 
| 200 | 
            +
                    reader = BioInterchange::TextMining::PubannosJsonReader.new(opt['annotate_name'], opt['annotate_name_id'], opt['annotate_date'], BioInterchange::TextMining::Process::UNSPECIFIED, opt['version'])
         | 
| 173 201 | 
             
                  elsif opt['input'] == 'uk.ac.man.pdfx' then
         | 
| 174 | 
            -
                    reader = BioInterchange::TextMining::PdfxXmlReader.new(opt[' | 
| 202 | 
            +
                    reader = BioInterchange::TextMining::PdfxXmlReader.new(opt['annotate_name'], opt['annotate_name_id'], opt['annotate_date'], BioInterchange::TextMining::Process::UNSPECIFIED, opt['annotate_version'])
         | 
| 175 203 | 
             
                  end
         | 
| 176 204 |  | 
| 177 | 
            -
                  model = nil
         | 
| 178 205 | 
             
                  if opt["file"]
         | 
| 179 | 
            -
                     | 
| 206 | 
            +
                    input_source = File.new(opt["file"],'r')
         | 
| 180 207 | 
             
                  else
         | 
| 181 | 
            -
                     | 
| 208 | 
            +
                    input_source = STDIN
         | 
| 182 209 | 
             
                  end
         | 
| 183 210 |  | 
| 184 211 | 
             
                  # Generate rdf from model (serialization).
         | 
| @@ -193,7 +220,10 @@ module BioInterchange | |
| 193 220 | 
             
                    writer = BioInterchange::Genomics::RDFWriter.new(STDOUT) unless opt['out']
         | 
| 194 221 | 
             
                  end
         | 
| 195 222 |  | 
| 196 | 
            -
                   | 
| 223 | 
            +
                  begin
         | 
| 224 | 
            +
                    model = reader.deserialize(input_source)
         | 
| 225 | 
            +
                    writer.serialize(model)
         | 
| 226 | 
            +
                  end while reader.postponed?
         | 
| 197 227 |  | 
| 198 228 | 
             
                rescue ArgumentError => e
         | 
| 199 229 | 
             
                  $stderr.puts e.message
         | 
| @@ -228,11 +258,55 @@ module BioInterchange | |
| 228 258 | 
             
                }
         | 
| 229 259 | 
             
              end
         | 
| 230 260 |  | 
| 261 | 
            +
              # Returns a "safe" version of a label that can be used as a Ruby method name.
         | 
| 262 | 
            +
              #
         | 
| 263 | 
            +
              # +label+:: string that should be converted into a "safe" string that can be used as a Ruby method name
         | 
| 264 | 
            +
              def self.make_safe_label(label)
         | 
| 265 | 
            +
                label.gsub(/[ '-.<>\/]/, '_').gsub(/\([^\)]*?\)/, '').sub(/^(\d+)/, "a_#{$1}").gsub(/^_+|_+$/, '').gsub(/_+/, '_')
         | 
| 266 | 
            +
              end
         | 
| 267 | 
            +
             | 
| 231 268 | 
             
            private
         | 
| 232 269 |  | 
| 233 270 | 
             
              def self.unsupported_combination
         | 
| 234 271 | 
             
                raise ArgumentError, 'This input/output format combination is not supported.'
         | 
| 235 272 | 
             
              end
         | 
| 236 273 |  | 
| 274 | 
            +
              def self.wrong_type(parameter, expected_type)
         | 
| 275 | 
            +
                raise ArgumentError, "The parameter '#{parameter}' needs to be #{expected_type}."
         | 
| 276 | 
            +
              end
         | 
| 277 | 
            +
             | 
| 278 | 
            +
            end
         | 
| 279 | 
            +
             | 
| 280 | 
            +
            # Overwrite RDF::Graph implementation, in case we do not want to keep
         | 
| 281 | 
            +
            # the complete graph in memory. If the implementing writer does not
         | 
| 282 | 
            +
            # set an output stream via +fast_ostream+, then fall back to the original
         | 
| 283 | 
            +
            # implementation.
         | 
| 284 | 
            +
            module RDF
         | 
| 285 | 
            +
             | 
| 286 | 
            +
            class Graph
         | 
| 287 | 
            +
              # DO NOT keep old insert implementation due to infinite recursion caused by module loading dependencies!
         | 
| 288 | 
            +
              # alias_method :graph_building_insert, :insert
         | 
| 289 | 
            +
             | 
| 290 | 
            +
              # Set an output stream for writing in +insert+.
         | 
| 291 | 
            +
              #
         | 
| 292 | 
            +
              # +ostream+:: Output stream that is populated by +insert+, if optimization can be carried out.
         | 
| 293 | 
            +
              def fast_ostream(ostream)
         | 
| 294 | 
            +
                @ostream = ostream
         | 
| 295 | 
            +
              end
         | 
| 296 | 
            +
             | 
| 297 | 
            +
              # Alternative implementation to +insert+, which can immediately output N-Triples instead
         | 
| 298 | 
            +
              # of building an in-memory graph first.
         | 
| 299 | 
            +
              #
         | 
| 300 | 
            +
              # +statement+:: RDF statement that should be serialized.
         | 
| 301 | 
            +
              def insert(statement)
         | 
| 302 | 
            +
                if BioInterchange::skip_rdf_graph and @ostream then
         | 
| 303 | 
            +
                  @ostream.puts(statement.to_ntriples)
         | 
| 304 | 
            +
                else
         | 
| 305 | 
            +
                  insert_statement(statement)
         | 
| 306 | 
            +
                end
         | 
| 307 | 
            +
              end
         | 
| 308 | 
            +
             | 
| 309 | 
            +
            end
         | 
| 310 | 
            +
             | 
| 237 311 | 
             
            end
         | 
| 238 312 |  | 
| @@ -38,11 +38,11 @@ class GFF3FeatureSet | |
| 38 38 | 
             
                @pragmas.keys
         | 
| 39 39 | 
             
              end
         | 
| 40 40 |  | 
| 41 | 
            -
              # Returns an URI for this particular feature set, which is a SHA1 hash over the  | 
| 41 | 
            +
              # Returns an URI for this particular feature set, which is a SHA1 hash over the pragma's concatenated properties.
         | 
| 42 42 | 
             
              def uri
         | 
| 43 43 | 
             
                clob = ''
         | 
| 44 | 
            -
                 | 
| 45 | 
            -
                  clob << "#{ | 
| 44 | 
            +
                pragmas.each { |pragma_name|
         | 
| 45 | 
            +
                  clob << "#{pragma_name}\t#{pragma(pragma_name).to_s}\n"
         | 
| 46 46 | 
             
                }
         | 
| 47 47 | 
             
                "biointerchange://gff3/featureset/self/#{Digest::SHA1.hexdigest(clob)}"
         | 
| 48 48 | 
             
              end
         | 
| @@ -62,6 +62,14 @@ class GFF3FeatureSet | |
| 62 62 | 
             
                 # TODO Should throw exception if name is not a string.
         | 
| 63 63 | 
             
                 @pragmas[name] = value
         | 
| 64 64 | 
             
               end
         | 
| 65 | 
            +
             | 
| 66 | 
            +
               # Removes all features from the set, but keeps the pragmas. This enables
         | 
| 67 | 
            +
               # batched processing, since the URI for the set is only determined by the
         | 
| 68 | 
            +
               # pragma statement contents.
         | 
| 69 | 
            +
               def prune
         | 
| 70 | 
            +
                 @set.clear
         | 
| 71 | 
            +
               end
         | 
| 72 | 
            +
             | 
| 65 73 | 
             
            end
         | 
| 66 74 |  | 
| 67 75 | 
             
            end
         | 
| @@ -1,15 +1,15 @@ | |
| 1 1 |  | 
| 2 2 | 
             
            module BioInterchange::Genomics
         | 
| 3 3 |  | 
| 4 | 
            -
            # Represents a named region, which is defined by the pragma statement 'sequence-region'.
         | 
| 5 | 
            -
            class  | 
| 4 | 
            +
            # Represents a named region, a.k.a. landmark, which is defined by the pragma statement 'sequence-region'.
         | 
| 5 | 
            +
            class GFF3Landmark
         | 
| 6 6 |  | 
| 7 7 | 
             
              # Create a new instance of a named region.
         | 
| 8 8 | 
             
              #
         | 
| 9 9 | 
             
              # +seqid+:: unique identifier (in the GFF3 file context) that identifies this region
         | 
| 10 10 | 
             
              # +start_coordinate+:: genomic start coordinate of the region
         | 
| 11 11 | 
             
              # +end_coordinate+:: genomic end coordinate of the region
         | 
| 12 | 
            -
              def initialize(seqid, start_coordinate, end_coordinate)
         | 
| 12 | 
            +
              def initialize(seqid, start_coordinate = nil, end_coordinate = nil)
         | 
| 13 13 | 
             
                @seqid = seqid
         | 
| 14 14 | 
             
                @start_coordinate = start_coordinate
         | 
| 15 15 | 
             
                @end_coordinate = end_coordinate
         | 
| @@ -46,7 +46,16 @@ protected | |
| 46 46 | 
             
              #
         | 
| 47 47 | 
             
              # +model+:: an instance of +BioInterchange::Genomics::GFF3FeatureSet+
         | 
| 48 48 | 
             
              def serialize_model(model)
         | 
| 49 | 
            +
                # We record landmarks, because they can either be written when their "##sequence-region"
         | 
| 50 | 
            +
                # pragma statement appears, or otherwise, when the first feature with said landmark is
         | 
| 51 | 
            +
                # being serialized.
         | 
| 52 | 
            +
                @landmarks = {}
         | 
| 53 | 
            +
             | 
| 54 | 
            +
                # Record written variants in order to avoid writing out RDF.type multiple times.
         | 
| 55 | 
            +
                @variants = {}
         | 
| 56 | 
            +
             | 
| 49 57 | 
             
                graph = RDF::Graph.new
         | 
| 58 | 
            +
                graph.fast_ostream(@ostream) if BioInterchange::skip_rdf_graph
         | 
| 50 59 | 
             
                set_uri = RDF::URI.new(model.uri)
         | 
| 51 60 | 
             
                graph.insert(RDF::Statement.new(set_uri, RDF.type, @base.Set))
         | 
| 52 61 | 
             
                model.pragmas.each { |pragma_name|
         | 
| @@ -57,21 +66,32 @@ protected | |
| 57 66 | 
             
                }
         | 
| 58 67 | 
             
                RDF::NTriples::Writer.dump(graph, @ostream)
         | 
| 59 68 | 
             
                # TODO Figure out why the following is very slow. Use with 'rdf-raptor'.
         | 
| 69 | 
            +
                #      Having said that, Jena's rdfcat is very good for converting formats
         | 
| 70 | 
            +
                #      anyway, so perhaps it is not worth investigating the following.
         | 
| 60 71 | 
             
                # RDF::RDFXML::Writer.dump(graph, @ostream)
         | 
| 61 72 | 
             
              end
         | 
| 62 73 |  | 
| 63 74 | 
             
              # Serializes pragmas for a given feature set URI.
         | 
| 75 | 
            +
              #
         | 
| 64 76 | 
             
              # +graph+:: RDF graph to which the pragmas are added
         | 
| 65 77 | 
             
              # +set_uri+:: the feature set URI to which the pragmas belong to
         | 
| 66 78 | 
             
              # +pragma+:: an object representing a pragma statement
         | 
| 67 79 | 
             
              def serialize_pragma(graph, set_uri, pragma)
         | 
| 68 80 | 
             
                if pragma.kind_of?(Hash) then
         | 
| 69 | 
            -
                  if pragma.has_key?(' | 
| 81 | 
            +
                  if (pragma.has_key?('attribute-method') or pragma.has_key?('data-source') or pragma.has_key?('score-method') or pragma.has_key?('source-method') or pragma.has_key?('technology-platform')) and @base == BioInterchange::GVF1O then
         | 
| 82 | 
            +
                    serialize_structured_attribute(graph, set_uri, pragma)
         | 
| 83 | 
            +
                  elsif pragma.has_key?('gff-version') and @base == BioInterchange::GFF3O then
         | 
| 70 84 | 
             
                    graph.insert(RDF::Statement.new(set_uri, @base.version, RDF::Literal.new(pragma['gff-version'], :datatype => RDF::XSD.float )))
         | 
| 71 85 | 
             
                  elsif pragma.has_key?('gff-version') and @base == BioInterchange::GVF1O then
         | 
| 72 86 | 
             
                    graph.insert(RDF::Statement.new(set_uri, @base.gff_version, RDF::Literal.new(pragma['gff-version'], :datatype => RDF::XSD.float )))
         | 
| 73 87 | 
             
                  elsif pragma.has_key?('gvf-version') and @base == BioInterchange::GVF1O then
         | 
| 74 88 | 
             
                    graph.insert(RDF::Statement.new(set_uri, @base.gvf_version, RDF::Literal.new(pragma['gvf-version'], :datatype => RDF::XSD.float )))
         | 
| 89 | 
            +
                  elsif pragma.has_key?('sequence-region') then
         | 
| 90 | 
            +
                    pragma['sequence-region'].keys.each { |seqid|
         | 
| 91 | 
            +
                      serialize_landmark(graph, set_uri, pragma['sequence-region'][seqid])
         | 
| 92 | 
            +
                    }
         | 
| 93 | 
            +
                  elsif pragma.has_key?('species') then
         | 
| 94 | 
            +
                    graph.insert(RDF::Statement.new(set_uri, @base.species, RDF::URI.new(pragma['species'])))
         | 
| 75 95 | 
             
                  end
         | 
| 76 96 | 
             
                else
         | 
| 77 97 | 
             
                  # TODO
         | 
| @@ -87,17 +107,18 @@ protected | |
| 87 107 | 
             
                # TODO Make sure there is only one value in the 'ID' list.
         | 
| 88 108 | 
             
                feature_uri = RDF::URI.new("#{set_uri.to_s}/feature/#{feature.sequence_id},#{feature.source},#{feature.type.to_s.sub(/^[^:]+:\/\//, '')},#{feature.start_coordinate},#{feature.end_coordinate},#{feature.strand},#{feature.phase}") unless feature.attributes.has_key?('ID')
         | 
| 89 109 | 
             
                feature_uri = RDF::URI.new("#{set_uri.to_s}/feature/#{feature.attributes['ID'][0]}") if feature.attributes.has_key?('ID')
         | 
| 90 | 
            -
                 | 
| 110 | 
            +
                feature_datatype_properties = @base.feature_properties.select { |uri| @base.is_datatype_property?(uri) }[0]
         | 
| 111 | 
            +
                feature_object_properties = @base.feature_properties.select { |uri| @base.is_object_property?(uri) }[0]
         | 
| 91 112 | 
             
                graph.insert(RDF::Statement.new(set_uri, @base.contains, feature_uri))
         | 
| 92 113 | 
             
                graph.insert(RDF::Statement.new(feature_uri, RDF.type, @base.Feature))
         | 
| 93 | 
            -
                graph | 
| 114 | 
            +
                serialize_landmark(graph, set_uri, GFF3Landmark.new(feature.sequence_id)) unless @landmarks.has_key?(feature.sequence_id)
         | 
| 115 | 
            +
                graph.insert(RDF::Statement.new(feature_uri, @base.with_parent([ @base.seqid ].flatten, feature_object_properties)[0], @landmarks[feature.sequence_id]))
         | 
| 94 116 | 
             
                graph.insert(RDF::Statement.new(feature_uri, @base.source, RDF::Literal.new(feature.source)))
         | 
| 95 | 
            -
                graph.insert(RDF::Statement.new(feature_uri, @base.type,  | 
| 96 | 
            -
                graph.insert(RDF::Statement.new(feature_uri, @base.with_parent(@base.start,  | 
| 97 | 
            -
                graph.insert(RDF::Statement.new(feature_uri, @base.with_parent(@base.end,  | 
| 117 | 
            +
                graph.insert(RDF::Statement.new(feature_uri, @base.type, feature.type))
         | 
| 118 | 
            +
                graph.insert(RDF::Statement.new(feature_uri, @base.with_parent(@base.start, feature_datatype_properties)[0], RDF::Literal.new(feature.start_coordinate)))
         | 
| 119 | 
            +
                graph.insert(RDF::Statement.new(feature_uri, @base.with_parent(@base.end, feature_datatype_properties)[0], RDF::Literal.new(feature.end_coordinate)))
         | 
| 98 120 | 
             
                graph.insert(RDF::Statement.new(feature_uri, @base.score, RDF::Literal.new(feature.score))) if feature.score
         | 
| 99 | 
            -
                 | 
| 100 | 
            -
                strand_uri = @base.with_parent(@base.strand, feature_properties)[0]
         | 
| 121 | 
            +
                strand_uri = @base.with_parent(@base.strand, feature_object_properties)[0]
         | 
| 101 122 | 
             
                case feature.strand
         | 
| 102 123 | 
             
                when BioInterchange::Genomics::GFF3Feature::NOT_STRANDED
         | 
| 103 124 | 
             
                  graph.insert(RDF::Statement.new(feature_uri, strand_uri, @base.NotStranded))
         | 
| @@ -108,13 +129,27 @@ protected | |
| 108 129 | 
             
                when BioInterchange::Genomics::GFF3Feature::NEGATIVE
         | 
| 109 130 | 
             
                  graph.insert(RDF::Statement.new(feature_uri, strand_uri, @base.Negative))
         | 
| 110 131 | 
             
                else
         | 
| 111 | 
            -
                  raise  | 
| 132 | 
            +
                  raise BioInterchange::Exceptions::InputFormatError, 'Strand of feature is set to an unknown constant.'
         | 
| 112 133 | 
             
                end
         | 
| 113 134 | 
             
                graph.insert(RDF::Statement.new(feature_uri, @base.phase, RDF::Literal.new(feature.phase))) if feature.phase
         | 
| 114 135 |  | 
| 115 136 | 
             
                serialize_attributes(graph, set_uri, feature_uri, feature.attributes) unless feature.attributes.keys.empty?
         | 
| 116 137 | 
             
              end
         | 
| 117 138 |  | 
| 139 | 
            +
              # Serializes a genomic feature landmark ("seqid").
         | 
| 140 | 
            +
              #
         | 
| 141 | 
            +
              # +graph+:: RDF graph to which the landmark is added
         | 
| 142 | 
            +
              # +set_uri+:: the feature set URI to which the landmark belongs to
         | 
| 143 | 
            +
              # +landmark+:: encapsuled landmark data
         | 
| 144 | 
            +
              def serialize_landmark(graph, set_uri, landmark)
         | 
| 145 | 
            +
                return if @landmarks.has_key?(landmark.seqid)
         | 
| 146 | 
            +
                landmark_uri = RDF::URI.new("#{set_uri.to_s}/landmark/#{landmark.seqid}")
         | 
| 147 | 
            +
                @landmarks[landmark.seqid] = landmark_uri
         | 
| 148 | 
            +
                graph.insert(RDF::Statement.new(landmark_uri, @base.with_parent([ @base.id ].flatten, @base.landmark_properties)[0], RDF::Literal.new(landmark.seqid)))
         | 
| 149 | 
            +
                graph.insert(RDF::Statement.new(landmark_uri, @base.with_parent([ @base.start ].flatten, @base.landmark_properties)[0], RDF::Literal.new(landmark.start_coordinate))) if landmark.start_coordinate
         | 
| 150 | 
            +
                graph.insert(RDF::Statement.new(landmark_uri, @base.with_parent([ @base.end ].flatten, @base.landmark_properties)[0], RDF::Literal.new(landmark.end_coordinate))) if landmark.end_coordinate
         | 
| 151 | 
            +
              end
         | 
| 152 | 
            +
             | 
| 118 153 | 
             
              # Serializes the attributes of a feature.
         | 
| 119 154 | 
             
              #
         | 
| 120 155 | 
             
              # +graph+:: RDF graph to which the feature is added
         | 
| @@ -123,24 +158,194 @@ protected | |
| 123 158 | 
             
              # +attribtues+:: a map of tag/value pairs
         | 
| 124 159 | 
             
              def serialize_attributes(graph, set_uri, feature_uri, attributes)
         | 
| 125 160 | 
             
                attributes.each_pair { |tag, list|
         | 
| 126 | 
            -
                  if  | 
| 161 | 
            +
                  # Check for defined tags (in alphabetical order), if not matched, serialize as generic Attribute:
         | 
| 162 | 
            +
                  if tag == 'Alias' then
         | 
| 163 | 
            +
                    list.each { |value|
         | 
| 164 | 
            +
                      graph.insert(RDF::Statement.new(feature_uri, @base.alias, RDF::Literal.new(value)))
         | 
| 165 | 
            +
                    }
         | 
| 166 | 
            +
                  elsif tag == 'Dbxref' then
         | 
| 167 | 
            +
                    feature_properties = @base.feature_properties.select { |uri| @base.is_object_property?(uri) }[0]
         | 
| 168 | 
            +
                    list.each { |value|
         | 
| 169 | 
            +
                      begin
         | 
| 170 | 
            +
                        if value.match(/^dbSNP(_\d+)?:rs\d+$/) then
         | 
| 171 | 
            +
                          graph.insert(RDF::Statement.new(feature_uri, @base.with_parent([ @base.dbxref ].flatten, feature_properties)[0], RDF::URI.new("http://www.ncbi.nlm.nih.gov/projects/SNP/snp_ref.cgi?rs=#{value.split(/:/)[1].sub(/^rs/, '')}")))
         | 
| 172 | 
            +
                        elsif value.match(/^COSMIC(_\d+)?:COSM\d+$/) then
         | 
| 173 | 
            +
                          graph.insert(RDF::Statement.new(feature_uri, @base.with_parent([ @base.dbxref ].flatten, feature_properties)[0], RDF::URI.new("http://cancer.sanger.ac.uk/cosmic/mutation/overview?id=#{value.split(/:/)[1].sub(/^COSM/, '')}")))
         | 
| 174 | 
            +
                        else
         | 
| 175 | 
            +
                          abbreviation, id = value.split(':', 2)
         | 
| 176 | 
            +
                          graph.insert(RDF::Statement.new(feature_uri, @base.with_parent([ @base.dbxref ].flatten, feature_properties)[0], RDF::URI.new(BioInterchange::GOXRef.send(BioInterchange.make_safe_label(abbreviation)).to_s + id)))
         | 
| 177 | 
            +
                        end
         | 
| 178 | 
            +
                      rescue NoMethodError
         | 
| 179 | 
            +
                        raise BioInterchange::Exceptions::InputFormatError, 'Attribute Dbxref link-out is not resolvable, i.e. the name cannot be turned into an URL.'
         | 
| 180 | 
            +
                      end
         | 
| 181 | 
            +
                    }
         | 
| 182 | 
            +
                  elsif tag == 'Derives_from' and @base == BioInterchange::GFF3O then
         | 
| 183 | 
            +
                    list.each { |value|
         | 
| 184 | 
            +
                      graph.insert(RDF::Statement.new(feature_uri, @base.derives_from, RDF::URI.new("#{set_uri.to_s}/feature/#{value}")))
         | 
| 185 | 
            +
                    }
         | 
| 186 | 
            +
                  elsif tag == 'Gap' and @base == BioInterchange::GFF3O then
         | 
| 187 | 
            +
                    graph.insert(RDF::Statement.new(feature_uri, @base.gap, RDF::Literal.new(list.join(','))))
         | 
| 188 | 
            +
                  elsif tag == 'ID' then
         | 
| 189 | 
            +
                    # Do nothing. The feature ID is the URI of the feature. It is not relevant as information anymore.
         | 
| 190 | 
            +
                  elsif tag == 'Is_circular' and @base == BioInterchange::GFF3O then
         | 
| 191 | 
            +
                    value = list.join(',')
         | 
| 192 | 
            +
                    graph.insert(RDF::Statement.new(feature_uri, @base.is_circular, true)) if value == 'true'
         | 
| 193 | 
            +
                    graph.insert(RDF::Statement.new(feature_uri, @base.is_circular, false)) if value == 'false'
         | 
| 194 | 
            +
                    # TODO Report invalid value.
         | 
| 195 | 
            +
                  elsif tag == 'Name' and @base == BioInterchange::GFF3O then
         | 
| 196 | 
            +
                    graph.insert(RDF::Statement.new(feature_uri, @base.name, RDF::Literal.new(list.join(','))))
         | 
| 197 | 
            +
                  elsif tag == 'Note' and @base == BioInterchange::GFF3O then
         | 
| 198 | 
            +
                    list.each { |value|
         | 
| 199 | 
            +
                      graph.insert(RDF::Statement.new(feature_uri, @base.note, RDF::Literal.new(value)))
         | 
| 200 | 
            +
                    }
         | 
| 201 | 
            +
                  elsif tag == 'Ontology_term' and @base == BioInterchange::GFF3O then
         | 
| 202 | 
            +
                    list.each { |value|
         | 
| 203 | 
            +
                      # TODO Sanitize values that are either not in GO xrf_abbs or need conversion to match
         | 
| 204 | 
            +
                      #      match their associated Ruby method.
         | 
| 205 | 
            +
                      namespace, accession = value.split(/:/, 2)
         | 
| 206 | 
            +
                      graph.insert(RDF::Statement.new(feature_uri, @base.ontology_term, RDF::URI.new("#{BioInterchange::GOXRef.send(namespace).to_s}#{accession}")))
         | 
| 207 | 
            +
                    }
         | 
| 208 | 
            +
                  elsif tag == 'Parent' then
         | 
| 127 209 | 
             
                    list.each { |parent_id|
         | 
| 128 210 | 
             
                      graph.insert(RDF::Statement.new(feature_uri, @base.parent, RDF::URI.new("#{set_uri.to_s}/feature/#{parent_id}")))
         | 
| 129 211 | 
             
                    }
         | 
| 212 | 
            +
                  elsif tag == 'Reference_seq' then
         | 
| 213 | 
            +
                    list.each { |value|
         | 
| 214 | 
            +
                      graph.insert(RDF::Statement.new(feature_uri, @base.reference_seq, RDF::Literal.new(value)))
         | 
| 215 | 
            +
                    }
         | 
| 216 | 
            +
                  elsif tag == 'Target' then
         | 
| 217 | 
            +
                    target_id, start_coordinate, end_coordinate, strand = list.join(',').split(/\s+/, 4)
         | 
| 218 | 
            +
                    target_datatype_properties = @base.target_properties.select { |uri| @base.is_datatype_property?(uri) }[0]
         | 
| 219 | 
            +
                    target_object_properties = @base.target_properties.select { |uri| @base.is_object_property?(uri) }[0]
         | 
| 220 | 
            +
                    target_uri = RDF::URI.new("#{feature_uri.to_s}/target/#{target_id}")
         | 
| 221 | 
            +
                    graph.insert(RDF::Statement.new(target_uri, RDF.type, @base.Target))
         | 
| 222 | 
            +
                    graph.insert(RDF::Statement.new(target_uri, @base.target_id, RDF::Literal.new(target_id)))
         | 
| 223 | 
            +
                    graph.insert(RDF::Statement.new(target_uri, @base.with_parent([ @base.start ].flatten, target_datatype_properties)[0], RDF::Literal.new(start_coordinate.to_i)))
         | 
| 224 | 
            +
                    graph.insert(RDF::Statement.new(target_uri, @base.with_parent([ @base.end ].flatten, target_datatype_properties)[0], RDF::Literal.new(end_coordinate.to_i)))
         | 
| 225 | 
            +
                    graph.insert(RDF::Statement.new(target_uri, @base.with_parent([ @base.end ].flatten, target_object_properties)[0], @base.Positive)) if strand and strand == '+'
         | 
| 226 | 
            +
                    graph.insert(RDF::Statement.new(target_uri, @base.with_parent([ @base.end ].flatten, target_object_properties)[0], @base.Negative)) if strand and strand == '-'
         | 
| 227 | 
            +
                    graph.insert(RDF::Statement.new(feature_uri, @base.target, target_uri))
         | 
| 228 | 
            +
                  elsif tag == 'Variant_seq' and @base == BioInterchange::GVF1O then
         | 
| 229 | 
            +
                    serialize_variant_seqs(graph, set_uri, feature_uri, list)
         | 
| 130 230 | 
             
                  else
         | 
| 231 | 
            +
                    # TODO Report unknown upper case letters here? That would be a spec. validation...
         | 
| 232 | 
            +
                    #      Well, or it would show that this implementation is incomplete. Could be either.
         | 
| 233 | 
            +
                    attribute_properties = @base.attribute_properties
         | 
| 234 | 
            +
                    attribute_properties = attribute_properties.select { |uri| @base.is_datatype_property?(uri) }[0] if attribute_properties.kind_of?(Array)
         | 
| 235 | 
            +
                    feature_properties = @base.feature_properties.select { |uri| @base.is_object_property?(uri) }[0]
         | 
| 131 236 | 
             
                    list.each_index { |index|
         | 
| 132 237 | 
             
                      value = list[index]
         | 
| 133 238 | 
             
                      attribute_uri = RDF::URI.new("#{feature_uri.to_s}/attribute/#{tag}") if list.size == 1
         | 
| 134 239 | 
             
                      attribute_uri = RDF::URI.new("#{feature_uri.to_s}/attribute/#{tag}-#{index + 1}") unless list.size == 1
         | 
| 135 | 
            -
                      graph.insert(RDF::Statement.new(feature_uri, @base.attributes, attribute_uri))
         | 
| 240 | 
            +
                      graph.insert(RDF::Statement.new(feature_uri, @base.with_parent([ @base.attributes ].flatten, feature_properties)[0], attribute_uri))
         | 
| 136 241 | 
             
                      graph.insert(RDF::Statement.new(attribute_uri, RDF.type, @base.Attribute))
         | 
| 137 | 
            -
                      graph.insert(RDF::Statement.new(attribute_uri, @base.tag, RDF::Literal.new("#{tag}")))
         | 
| 242 | 
            +
                      graph.insert(RDF::Statement.new(attribute_uri, @base.with_parent([ @base.tag ].flatten, attribute_properties)[0], RDF::Literal.new("#{tag}")))
         | 
| 138 243 | 
             
                      graph.insert(RDF::Statement.new(attribute_uri, RDF.value, RDF::Literal.new(value)))
         | 
| 139 244 | 
             
                    }
         | 
| 140 245 | 
             
                  end
         | 
| 141 246 | 
             
                }
         | 
| 142 247 | 
             
              end
         | 
| 143 248 |  | 
| 249 | 
            +
              # Serializes a structured attribute (given as a pragma statement), which later
         | 
| 250 | 
            +
              # can be referred to from feature instances.
         | 
| 251 | 
            +
              #
         | 
| 252 | 
            +
              # +graph+:: RDF graph to which the structured attribute is added
         | 
| 253 | 
            +
              # +set_uri+:: the feature set URI to which the structured attribute belongs to
         | 
| 254 | 
            +
              # +pragma+:: a map that encapsulates the structured attribute data
         | 
| 255 | 
            +
              def serialize_structured_attribute(graph, set_uri, pragma)
         | 
| 256 | 
            +
                attribute_uri = RDF::URI.new("#{set_uri.to_s}/structured_attribute/#{pragma.object_id}")
         | 
| 257 | 
            +
                attributes = nil
         | 
| 258 | 
            +
                class_type = nil
         | 
| 259 | 
            +
                if pragma.has_key?('attribute-method') then
         | 
| 260 | 
            +
                  attributes = pragma['attribute-method'][0]
         | 
| 261 | 
            +
                  class_type = @base.Method
         | 
| 262 | 
            +
                elsif pragma.has_key?('data-source') then
         | 
| 263 | 
            +
                  attributes = pragma['data-source'][0]
         | 
| 264 | 
            +
                  class_type = @base.DataSource
         | 
| 265 | 
            +
                elsif pragma.has_key?('score-method') then
         | 
| 266 | 
            +
                  attributes = pragma['score-method'][0]
         | 
| 267 | 
            +
                  class_type = @base.Method
         | 
| 268 | 
            +
                elsif pragma.has_key?('source-method') then
         | 
| 269 | 
            +
                  attributes = pragma['source-method'][0]
         | 
| 270 | 
            +
                  class_type = @base.Method
         | 
| 271 | 
            +
                elsif pragma.has_key?('technology-platform') then
         | 
| 272 | 
            +
                  attributes = pragma['technology-platform'][0]
         | 
| 273 | 
            +
                  class_type = @base.TechnologyPlatform
         | 
| 274 | 
            +
                else
         | 
| 275 | 
            +
                  # TODO Error.
         | 
| 276 | 
            +
                end
         | 
| 277 | 
            +
                graph.insert(RDF::Statement.new(attribute_uri, RDF.type, class_type))
         | 
| 278 | 
            +
                if class_type == @base.DataSource and attributes.has_key?('Data_type') then
         | 
| 279 | 
            +
                  data_type_individual = nil
         | 
| 280 | 
            +
                  attributes['Data_type'] = attributes['Data_type'][0] # TODO Make sure array is of length 1.
         | 
| 281 | 
            +
                  if attributes['Data_type'] == 'Array_CGH' then
         | 
| 282 | 
            +
                    data_type_individual = @base.ArrayComparativeGenomicHybridization
         | 
| 283 | 
            +
                  elsif attributes['Data_type'] == 'DNA_microarray' then
         | 
| 284 | 
            +
                    data_type_individual = @base.DNAMicroarray
         | 
| 285 | 
            +
                  elsif attributes['Data_type'] == 'DNA_sequence' then
         | 
| 286 | 
            +
                    data_type_individual = @base.DNASequence
         | 
| 287 | 
            +
                  elsif attributes['Data_type'] == 'RNA_sequence' then
         | 
| 288 | 
            +
                    data_type_individual = @base.RNASequence
         | 
| 289 | 
            +
                  else
         | 
| 290 | 
            +
                    # TODO Error.
         | 
| 291 | 
            +
                  end
         | 
| 292 | 
            +
                  graph.insert(RDF::Statement.new(attribute_uri, @base.data_type, data_type_individual))
         | 
| 293 | 
            +
                elsif class_type == @base.TechnologyPlatform then
         | 
| 294 | 
            +
                  if attributes.has_key?('Average_coverage') then
         | 
| 295 | 
            +
                    graph.insert(RDF::Statement.new(attribute_uri, @base.average_coverage, RDF::Literal.new(attributes['Average_coverage'][0].to_i)))
         | 
| 296 | 
            +
                  end
         | 
| 297 | 
            +
                  if attributes.has_key?('Platform_class') then
         | 
| 298 | 
            +
                    graph.insert(RDF::Statement.new(attribute_uri, @base.platform_class, RDF::Literal.new(attributes['Platform_class'][0])))
         | 
| 299 | 
            +
                  end
         | 
| 300 | 
            +
                  if attributes.has_key?('Platform_name') then
         | 
| 301 | 
            +
                    graph.insert(RDF::Statement.new(attribute_uri, @base.platform_name, RDF::Literal.new(attributes['Platform_name'][0])))
         | 
| 302 | 
            +
                  end
         | 
| 303 | 
            +
                  if attributes.has_key?('Read_length') then
         | 
| 304 | 
            +
                    graph.insert(RDF::Statement.new(attribute_uri, @base.read_length, RDF::Literal.new(attributes['Read_length'][0].to_i)))
         | 
| 305 | 
            +
                  end
         | 
| 306 | 
            +
                  if attributes.has_key?('Read_pair_span') then
         | 
| 307 | 
            +
                    graph.insert(RDF::Statement.new(attribute_uri, @base.read_pair_span, RDF::Literal.new(attributes['Read_pair_span'][0].to_i)))
         | 
| 308 | 
            +
                  end
         | 
| 309 | 
            +
                  if attributes.has_key?('Read_type') then
         | 
| 310 | 
            +
                    read_type_individual = nil
         | 
| 311 | 
            +
                    attributes['Read_type'] = attributes['Read_type'][0] # TODO Make sure array is of length 1.
         | 
| 312 | 
            +
                    if attributes['Read_type'] == 'fragment' then
         | 
| 313 | 
            +
                      read_type_individual = @base.Fragment
         | 
| 314 | 
            +
                    elsif attributes['Read_type'] == 'pair' then
         | 
| 315 | 
            +
                      read_type_individual = @base.Pair
         | 
| 316 | 
            +
                    else
         | 
| 317 | 
            +
                      # TODO Error.
         | 
| 318 | 
            +
                    end
         | 
| 319 | 
            +
                    graph.insert(RDF::Statement.new(attribute_uri, @base.read_type, read_type_individual))
         | 
| 320 | 
            +
                  end
         | 
| 321 | 
            +
                end
         | 
| 322 | 
            +
                structuredpragma_properties = @base.structuredpragma_properties.select { |uri| @base.is_object_property?(uri) }[0]
         | 
| 323 | 
            +
                attributes.keys.each { |tag|
         | 
| 324 | 
            +
                  if tag.match(/^[a-z]/) then
         | 
| 325 | 
            +
                    custom_attribute_uri = RDF::URI.new("#{attribute_uri.to_s}/attribute/#{tag}")
         | 
| 326 | 
            +
                    graph.insert(RDF::Statement.new(custom_attribute_uri, RDF.type, @base.StructuredAttribute))
         | 
| 327 | 
            +
                    graph.insert(RDF::Statement.new(custom_attribute_uri, @base.with_parent([ @base.tag ].flatten, @base.structuredattribute_properties)[0], tag))
         | 
| 328 | 
            +
                    graph.insert(RDF::Statement.new(custom_attribute_uri, RDF.value, RDF::Literal.new(attributes[tag].join(','))))
         | 
| 329 | 
            +
                    graph.insert(RDF::Statement.new(attribute_uri, @base.with_parent([ @base.attributes ].flatten, structuredpragma_properties)[0], custom_attribute_uri))
         | 
| 330 | 
            +
                  end
         | 
| 331 | 
            +
                }
         | 
| 332 | 
            +
              end
         | 
| 333 | 
            +
             | 
| 334 | 
            +
              # Serializes a list of variant sequences.
         | 
| 335 | 
            +
              #
         | 
| 336 | 
            +
              # +graph+:: RDF graph to which the structured attribute is added
         | 
| 337 | 
            +
              # +set_uri+:: the feature set URI to which the feature belongs to
         | 
| 338 | 
            +
              # +feature_uri+:: the feature URI to the feature that is annotated with variant data
         | 
| 339 | 
            +
              # +list+:: list of variant values
         | 
| 340 | 
            +
              def serialize_variant_seqs(graph, set_uri, feature_uri, list)
         | 
| 341 | 
            +
                list.each_index { |index|
         | 
| 342 | 
            +
                  value = list[index]
         | 
| 343 | 
            +
                  variant_uri = RDF::URI.new("#{feature_uri.to_s}/variant/#{index}")
         | 
| 344 | 
            +
                  graph.insert(RDF::Statement.new(variant_uri, RDF.type, @base.Variant)) unless @variants.has_key?(variant_uri.to_s)
         | 
| 345 | 
            +
                  @variants[variant_uri.to_s] = true
         | 
| 346 | 
            +
                  graph.insert(RDF::Statement.new(variant_uri, @base.variant_seq, RDF::Literal.new(value)))
         | 
| 347 | 
            +
                }
         | 
| 348 | 
            +
              end
         | 
| 144 349 | 
             
            end
         | 
| 145 350 |  | 
| 146 351 | 
             
            end
         |