biointerchange 1.0.4 → 1.0.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: ac318ac0a65f19b3d411a9e478227dbfe80503d3
4
- data.tar.gz: f821af2f98f214f44f9053227e46cf06a187383f
3
+ metadata.gz: 740da15b29a3a3da4b012e17b16e4ce3c5cef989
4
+ data.tar.gz: fb7508cdacae1aa18d5d142cebed3717b4c66e91
5
5
  SHA512:
6
- metadata.gz: b30d9a4bbf5684f5fcd131934b2ae06233c7bab5c680be35ce554996b27bb7a2b43a6a2d6c79093acdca4dadcce20e7c653c48e615aecb4d68fd2f015ce2f93e
7
- data.tar.gz: cd3308acbed7bcaaa763a8f1abd65cd597b9622e8cb36d8dc0fc5bc40d71a47f808082fdcc5b9a5a70c1cd5fd753121353c9ac2585e8616daebaf2aee4218e50
6
+ metadata.gz: 9cb7e6845df394b0a7d5e54310456841471baa54c2a081c18d6a7fb3c5a6a929074acc42690101c083d6f16f8d02e1f995a12806017a45c5b5e3ead24e75c5d7
7
+ data.tar.gz: 7921f0f48abbb8bdc8e0c2d0c47251b40b70468cde3210d9f39edb98bc94e21c778c49d79a5740101df1bae1dbeedf9e88cdac7bb08e95d642409d4915e78f12
data/README.md CHANGED
@@ -107,7 +107,7 @@ The following list provides information on the origin of the example-data files
107
107
 
108
108
  The following lists data that has been used in testing BioInterchange's implementation, but was not included in the GitHub repository due to their size:
109
109
 
110
- * `mgp.v3.indels.rsIDdbSNPv137.vcf.gz`: Gzipped VCF file of M. musculus indels by the Sanger Institute. Downloaded from [ftp://ftp-mouse.sanger.ac.uk/current_snps/mgp.v3.indels.rsIDdbSNPv137.vcf.gz](ftp://ftp-mouse.sanger.ac.uk/current_snps/mgp.v3.indels.rsIDdbSNPv137.vcf.gz)
110
+ * `mgp.v4.indels.dbSNP.vcf.gz`: Gzipped VCF file of M. musculus indels by the Sanger Institute. Downloaded from [ftp://ftp-mouse.sanger.ac.uk/REL-1410-SNPs_Indels/mgp.v4.indels.dbSNP.vcf.gz](ftp://ftp-mouse.sanger.ac.uk/REL-1410-SNPs_Indels/mgp.v4.indels.dbSNP.vcf.gz)
111
111
 
112
112
  ### Application Programming Interface
113
113
 
data/VERSION CHANGED
@@ -1 +1 @@
1
- 1.0.4
1
+ 1.0.5
@@ -255,26 +255,24 @@ protected
255
255
  bin_uri = RDF::URI.new("bin://#{feature.sequence_id}/#{BioInterchange::Genomics::Locations.reg2bin(feature.start_coordinate, feature.end_coordinate)}")
256
256
  create_triple(bin_uri, RDF::URI.new('bin://contains'), feature_uri)
257
257
  end
258
- create_triple(region_uri, BioInterchange::FALDO.begin, start_position_uri)
259
- create_triple(region_uri, BioInterchange::FALDO.end, end_position_uri)
260
- case feature.strand
261
- when BioInterchange::Genomics::GFF3Feature::NOT_STRANDED
262
- create_triple(start_position_uri, RDF.type, BioInterchange::FALDO.Position)
263
- create_triple(end_position_uri, RDF.type, BioInterchange::FALDO.Position)
264
- when BioInterchange::Genomics::GFF3Feature::UNKNOWN
265
- create_triple(start_position_uri, RDF.type, BioInterchange::FALDO.Position)
266
- create_triple(end_position_uri, RDF.type, BioInterchange::FALDO.Position)
267
- when BioInterchange::Genomics::GFF3Feature::POSITIVE
268
- create_triple(start_position_uri, RDF.type, BioInterchange::FALDO.Positive_strand)
269
- create_triple(end_position_uri, RDF.type, BioInterchange::FALDO.Positive_strand)
270
- when BioInterchange::Genomics::GFF3Feature::NEGATIVE
271
- create_triple(start_position_uri, RDF.type, BioInterchange::FALDO.Negative_strand)
272
- create_triple(end_position_uri, RDF.type, BioInterchange::FALDO.Negative_strand)
273
- else
274
- raise BioInterchange::Exceptions::InputFormatError, 'Strand of feature is set to an unknown constant.'
275
- end
276
- create_triple(start_position_uri, BioInterchange::FALDO.position, feature.start_coordinate)
277
- create_triple(end_position_uri, BioInterchange::FALDO.position, feature.end_coordinate)
258
+ [ [ start_position_uri, BioInterchange::FALDO.begin, feature.start_coordinate ],
259
+ [ end_position_uri, BioInterchange::FALDO.end, feature.end_coordinate ] ].each { |uri_relation_coordinate|
260
+ position_uri, faldo_relation, coordinate = uri_relation_coordinate
261
+ create_triple(region_uri, faldo_relation, position_uri)
262
+ case feature.strand
263
+ when BioInterchange::Genomics::GFF3Feature::NOT_STRANDED
264
+ create_triple(position_uri, RDF.type, BioInterchange::FALDO.Position)
265
+ when BioInterchange::Genomics::GFF3Feature::UNKNOWN
266
+ create_triple(position_uri, RDF.type, BioInterchange::FALDO.Position)
267
+ when BioInterchange::Genomics::GFF3Feature::POSITIVE
268
+ create_triple(position_uri, RDF.type, BioInterchange::FALDO.Positive_strand)
269
+ when BioInterchange::Genomics::GFF3Feature::NEGATIVE
270
+ create_triple(position_uri, RDF.type, BioInterchange::FALDO.Negative_strand)
271
+ else
272
+ raise BioInterchange::Exceptions::InputFormatError, 'Strand of feature is set to an unknown constant.'
273
+ end
274
+ create_triple(start_position_uri, BioInterchange::FALDO.position, coordinate)
275
+ }
278
276
  if feature.score then
279
277
  create_triple(feature_uri, @base.has_attribute, RDF::URI.new("#{feature_uri}/score"))
280
278
  if @format == :gvf or @format == :vcf then
@@ -32,6 +32,7 @@ class GFF3Reader < BioInterchange::Reader
32
32
  @name_uri = name_uri
33
33
  @date = date
34
34
  @batch_size = batch_size
35
+ @linenumber = 0
35
36
  end
36
37
 
37
38
  # Reads a GFF3 file from the input stream and returns an associated model.
@@ -79,6 +80,7 @@ protected
79
80
  begin
80
81
  line = gff3.readline
81
82
  line.chomp!
83
+ @linenumber += 1
82
84
 
83
85
  if line.start_with?('#') and not line.start_with?('##') then
84
86
  add_comment(@feature_set, line[1..-1].strip)
@@ -90,6 +92,9 @@ protected
90
92
  next
91
93
  end
92
94
 
95
+ # Empty lines. They happen. Mostly at the end of files.
96
+ next if line.strip.empty?
97
+
93
98
  if fasta_block then
94
99
  if line.start_with?('>') and line.length > 1 then
95
100
  @feature_set.add(BioInterchange::Genomics::GFF3FeatureSequence.new(fasta_id, fasta_sequence, fasta_comment)) if fasta_id and not fasta_sequence.empty?()
@@ -139,7 +144,9 @@ protected
139
144
  type = BioInterchange::SO.send(BioInterchange.make_safe_label(type))
140
145
  end
141
146
  rescue NoMethodError
142
- raise BioInterchange::Exceptions::InputFormatError, "Type of feature is set to an unknown SOFA term: \"#{type}\""
147
+ # If neither a SO accession or a SO/SOFA term, then raise an error. The GFF3 specification
148
+ # is very clear about type being either one of the two.
149
+ raise BioInterchange::Exceptions::InputFormatError, "Line #{@linenumber}. Type of feature is set to an unknown SO/SOFA term: \"#{type}\""
143
150
  end
144
151
 
145
152
  # String to numeric value conversions:
@@ -218,7 +225,15 @@ protected
218
225
  # +attribute_string+:: key/value string (column 9) as seen in a GFF3/GVF file
219
226
  def split_attributes(attribute_string)
220
227
  attributes = {}
221
- attribute_string.split(';').map { |assignment| match = assignment.match(/([^=]+)=(.+)/) ; { match[1].strip => match[2].split(',').map { |value| URI.decode(value.strip) } } }.map { |hash| hash.each_pair { |tag,list| attributes[tag] = list } }
228
+ hashes = attribute_string.split(';').map { |assignment|
229
+ match = assignment.match(/([^=]+)=(.+)/) ;
230
+ { match[1].strip => match[2].split(',').map { |value| URI.decode(value.strip) } }
231
+ }
232
+ hashes.map { |hash|
233
+ hash.each_pair { |tag,list|
234
+ attributes[tag] = list
235
+ }
236
+ }
222
237
  attributes
223
238
  end
224
239
 
@@ -0,0 +1,43 @@
1
+
2
+ require 'rubygems'
3
+ require 'rspec'
4
+
5
+ require 'biointerchange'
6
+
7
+ describe BioInterchange::Genomics::GFF3Reader do
8
+ describe 'reading GFF3 data' do
9
+
10
+ describe 'reading GFF3 into a model' do
11
+ before :all do
12
+ @test_gff3 = """##gff-version 3
13
+ ##sequence-region chr1 1001 400200
14
+ ##species http://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=9606
15
+ # A comment line.
16
+ chr1 Test Center gene 2030 3100 . + . ID=Gene1;Name=firstgene
17
+ chr1 . gene 5010 10029 . - . ID=Gene2;Name=secondgene
18
+ chr1 . exon 5090 6000 . - . ID=Exon2.1;Parent=Gene2
19
+
20
+ """
21
+ # Initialize with the most basic form of a GFF3 reader, i.e.
22
+ # no name, name_uri, date, etc., information is provided.
23
+ @reader = BioInterchange::Genomics::GFF3Reader.new()
24
+ end
25
+ it 'creates correct GFF3 model instance' do
26
+ model = @reader.deserialize(@test_gff3)
27
+
28
+ model.should be_an_instance_of BioInterchange::Genomics::GFF3FeatureSet
29
+ end
30
+ it 'creates a model with the right number of genomic features' do
31
+ model = @reader.deserialize(@test_gff3)
32
+
33
+ model.contents.length.should eql 3
34
+ end
35
+ it 'creates a model with the right number of pragma statements' do
36
+ model = @reader.deserialize(@test_gff3)
37
+
38
+ model.pragmas.length.should eql 3
39
+ end
40
+ end
41
+ end
42
+ end
43
+
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: biointerchange
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.4
4
+ version: 1.0.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Joachim Baran
@@ -12,7 +12,7 @@ authors:
12
12
  autorequire:
13
13
  bindir: bin
14
14
  cert_chain: []
15
- date: 2014-11-13 00:00:00.000000000 Z
15
+ date: 2014-11-25 00:00:00.000000000 Z
16
16
  dependencies:
17
17
  - !ruby/object:Gem::Dependency
18
18
  name: rdf
@@ -226,6 +226,7 @@ files:
226
226
  - make.sh
227
227
  - spec/exceptions_spec.rb
228
228
  - spec/gff3_rdfwriter_spec.rb
229
+ - spec/gff3_reader_spec.rb
229
230
  - spec/gvf_rdfwriter_spec.rb
230
231
  - spec/phylogenetics_spec.rb
231
232
  - spec/text_mining_pdfx_xml_reader_spec.rb
@@ -304,7 +305,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
304
305
  version: '0'
305
306
  requirements: []
306
307
  rubyforge_project:
307
- rubygems_version: 2.0.14
308
+ rubygems_version: 2.0.3
308
309
  signing_key:
309
310
  specification_version: 4
310
311
  summary: An open source framework for transforming heterogeneous data formats into