biointerchange 1.0.0 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,36 @@
1
+ module BioInterchange::Genomics
2
+
3
+ # Represents a sequence of a genomic feature of a GFF3 file.
4
+ class GFF3FeatureSequence
5
+
6
+ # Creates a new feature sequence representation. A feature sequence is described by two or more
7
+ # lines in a GFF3 file that are succeeding a '##FASTA' pragma statement.
8
+ #
9
+ # +feature_id+:: ID of the feature whose sequence is stored
10
+ # +sequence+:: sequence of the feature
11
+ def initialize(feature_id, sequence, comment = nil)
12
+ @feature_id = feature_id
13
+ @sequence = sequence
14
+ @comment = comment
15
+ end
16
+
17
+ # Returns the ID of the feature whose sequence is represented by the object.
18
+ def feature_id
19
+ @feature_id
20
+ end
21
+
22
+ # Returns the sequence of the feature.
23
+ def sequence
24
+ @sequence
25
+ end
26
+
27
+ # Returns additional comments -- if provided -- that are associated with the feature ID.
28
+ # If no comment was provided, then `nil` is returned.
29
+ def comment
30
+ @comment
31
+ end
32
+
33
+ end
34
+
35
+ end
36
+
@@ -11,24 +11,23 @@ module BioInterchange::Genomics
11
11
  # - biointerchange.gvf
12
12
  #
13
13
  # Outputs:
14
- # - rdf.biointerchange.gff3
15
- # - rdf.biointerchange.gvf
14
+ # - rdf.biointerchange.gfvo
16
15
  class RDFWriter < BioInterchange::Writer
17
16
 
18
17
  # Register writers:
19
18
  BioInterchange::Registry.register_writer(
20
- 'rdf.biointerchange.gff3',
19
+ 'rdf.biointerchange.gfvo',
21
20
  BioInterchange::Genomics::RDFWriter,
22
21
  [ 'biointerchange.gff3' ],
23
22
  true,
24
- 'Generic Feature Format Version 3 Ontology (GFF3O) based RDFization'
23
+ 'Genomic Feature and Variation Ontology (GFVO) based RDFization'
25
24
  )
26
25
  BioInterchange::Registry.register_writer(
27
- 'rdf.biointerchange.gvf',
26
+ 'rdf.biointerchange.gfvo',
28
27
  BioInterchange::Genomics::RDFWriter,
29
28
  [ 'biointerchange.gvf' ],
30
29
  true,
31
- 'Genome Variation Format Version 1 Ontology (GVF1O) based RDFization'
30
+ 'Genomic Feature and Variation Ontology (GFVO) based RDFization'
32
31
  )
33
32
 
34
33
  # Creates a new instance of a RDFWriter that will use the provided output stream to serialize RDF.
@@ -42,18 +41,19 @@ class RDFWriter < BioInterchange::Writer
42
41
  # Serialize a model as RDF.
43
42
  #
44
43
  # +model+:: a generic representation of input data that is derived from BioInterchange::Genomics::GFF3FeatureSet
45
- def serialize(model)
44
+ # +uri_prefix+:: optional URI prefix that replaces the default URI prefix for all set/feature/annotation URIs
45
+ def serialize(model, uri_prefix = nil)
46
46
  if model.instance_of?(BioInterchange::Genomics::GFF3FeatureSet) then
47
- @base = BioInterchange::GFF3O
48
- serialize_model(model)
47
+ @format = :gff3
49
48
  elsif model.instance_of?(BioInterchange::Genomics::GVFFeatureSet) then
50
- @base = BioInterchange::GVF1O
51
- serialize_model(model)
49
+ @format = :gvf
52
50
  else
53
51
  raise BioInterchange::Exceptions::ImplementationWriterError, 'The provided model cannot be serialized. ' +
54
52
  'This writer supports serialization for BioInterchange::Genomics::GFF3FeatureSet and '
55
53
  'BioInterchange::Genomics::GVFFeatureSet.'
56
54
  end
55
+ @base = BioInterchange::GFVO
56
+ serialize_model(model, uri_prefix)
57
57
  end
58
58
 
59
59
  protected
@@ -61,7 +61,8 @@ protected
61
61
  # Serializes RDF for a feature set representation.
62
62
  #
63
63
  # +model+:: an instance of +BioInterchange::Genomics::GFF3FeatureSet+
64
- def serialize_model(model)
64
+ # +set_uri+:: optional URI prefix that should be used for a set instance (and hence, all its dependents -- features, annotations, etc.)
65
+ def serialize_model(model, set_uri)
65
66
  # We record landmarks, because they can either be written when their "##sequence-region"
66
67
  # pragma statement appears, or otherwise, when the first feature with said landmark is
67
68
  # being serialized.
@@ -70,17 +71,25 @@ protected
70
71
  # Record written variants in order to avoid writing out RDF.type multiple times.
71
72
  @variants = {}
72
73
 
73
- graph = RDF::Graph.new
74
- graph.fast_ostream(@ostream) if BioInterchange::skip_rdf_graph
75
- set_uri = RDF::URI.new(model.uri)
76
- graph.insert(RDF::Statement.new(set_uri, RDF.type, @base.Set))
74
+ # Create a URI prefix that applies to the set, all features in the set, and all the features' annotations.
75
+ # Then register the prefix with the writer to have a concise Turtle output.
76
+ set_uri = set_uri[0..-2] if set_uri and set_uri.end_with?('/')
77
+ set_uri = RDF::URI.new(model.uri) unless set_uri
78
+ set_base(set_uri + '/')
79
+
80
+ create_triple(set_uri, RDF.type, @base.Set)
77
81
  model.pragmas.each { |pragma_name|
78
- serialize_pragma(graph, set_uri, model.pragma(pragma_name))
82
+ serialize_pragma(set_uri, model.pragma(pragma_name))
79
83
  }
80
84
  model.contents.each { |feature|
81
- serialize_feature(graph, set_uri, feature)
85
+ if feature.instance_of?(BioInterchange::Genomics::GFF3FeatureSequence) then
86
+ serialize_feature_sequence(set_uri, feature)
87
+ else
88
+ serialize_feature(set_uri, feature)
89
+ end
82
90
  }
83
- RDF::NTriples::Writer.dump(graph, @ostream)
91
+ close
92
+ #RDF::NTriples::Writer.dump(graph, @ostream)
84
93
  # TODO Figure out why the following is very slow. Use with 'rdf-raptor'.
85
94
  # Having said that, Jena's rdfcat is very good for converting formats
86
95
  # anyway, so perhaps it is not worth investigating the following.
@@ -89,25 +98,22 @@ protected
89
98
 
90
99
  # Serializes pragmas for a given feature set URI.
91
100
  #
92
- # +graph+:: RDF graph to which the pragmas are added
93
101
  # +set_uri+:: the feature set URI to which the pragmas belong to
94
102
  # +pragma+:: an object representing a pragma statement
95
- def serialize_pragma(graph, set_uri, pragma)
103
+ def serialize_pragma(set_uri, pragma)
96
104
  if pragma.kind_of?(Hash) then
97
- if (pragma.has_key?('attribute-method') or pragma.has_key?('data-source') or pragma.has_key?('score-method') or pragma.has_key?('source-method') or pragma.has_key?('technology-platform')) and @base == BioInterchange::GVF1O then
98
- serialize_structured_attribute(graph, set_uri, pragma)
99
- elsif pragma.has_key?('gff-version') and @base == BioInterchange::GFF3O then
100
- graph.insert(RDF::Statement.new(set_uri, @base.version, RDF::Literal.new(pragma['gff-version'], :datatype => RDF::XSD.float )))
101
- elsif pragma.has_key?('gff-version') and @base == BioInterchange::GVF1O then
102
- graph.insert(RDF::Statement.new(set_uri, @base.gff_version, RDF::Literal.new(pragma['gff-version'], :datatype => RDF::XSD.float )))
103
- elsif pragma.has_key?('gvf-version') and @base == BioInterchange::GVF1O then
104
- graph.insert(RDF::Statement.new(set_uri, @base.gvf_version, RDF::Literal.new(pragma['gvf-version'], :datatype => RDF::XSD.float )))
105
+ if (pragma.has_key?('attribute-method') or pragma.has_key?('data-source') or pragma.has_key?('score-method') or pragma.has_key?('source-method') or pragma.has_key?('technology-platform')) then
106
+ serialize_structured_attribute(set_uri, pragma)
107
+ elsif pragma.has_key?('gff-version') then
108
+ create_triple(set_uri, @base.gff_version, pragma['gff-version'], RDF::XSD.float)
109
+ elsif pragma.has_key?('gvf-version') then
110
+ create_triple(set_uri, @base.gvf_version, pragma['gvf-version'], RDF::XSD.float)
105
111
  elsif pragma.has_key?('sequence-region') then
106
112
  pragma['sequence-region'].keys.each { |seqid|
107
- serialize_landmark(graph, set_uri, pragma['sequence-region'][seqid])
113
+ serialize_landmark(set_uri, pragma['sequence-region'][seqid])
108
114
  }
109
115
  elsif pragma.has_key?('species') then
110
- graph.insert(RDF::Statement.new(set_uri, @base.species, RDF::URI.new(pragma['species'])))
116
+ create_triple(set_uri, @base.species, RDF::URI.new(pragma['species']))
111
117
  end
112
118
  else
113
119
  # TODO
@@ -116,77 +122,97 @@ protected
116
122
 
117
123
  # Serializes a +GFF3Feature+ object for a given feature set URI.
118
124
  #
119
- # +graph+:: RDF graph to which the feature is added
120
125
  # +set_uri+:: the feature set URI to which the feature belongs to
121
126
  # +feature+:: a +GFF3Feature+ instance
122
- def serialize_feature(graph, set_uri, feature)
127
+ def serialize_feature(set_uri, feature)
123
128
  # TODO Make sure there is only one value in the 'ID' list.
124
129
  feature_uri = RDF::URI.new("#{set_uri.to_s}/feature/#{feature.sequence_id},#{feature.source},#{feature.type.to_s.sub(/^[^:]+:\/\//, '')},#{feature.start_coordinate},#{feature.end_coordinate},#{feature.strand},#{feature.phase}") unless feature.attributes.has_key?('ID')
125
130
  feature_uri = RDF::URI.new("#{set_uri.to_s}/feature/#{feature.attributes['ID'][0]}") if feature.attributes.has_key?('ID')
126
- feature_datatype_properties = @base.feature_properties.select { |uri| @base.is_datatype_property?(uri) }[0]
127
- feature_object_properties = @base.feature_properties.select { |uri| @base.is_object_property?(uri) }[0]
128
- graph.insert(RDF::Statement.new(set_uri, @base.contains, feature_uri))
129
- graph.insert(RDF::Statement.new(feature_uri, RDF.type, @base.Feature))
130
- serialize_landmark(graph, set_uri, GFF3Landmark.new(feature.sequence_id)) unless @landmarks.has_key?(feature.sequence_id)
131
- graph.insert(RDF::Statement.new(feature_uri, @base.with_parent([ @base.seqid ].flatten, feature_object_properties)[0], @landmarks[feature.sequence_id]))
132
- graph.insert(RDF::Statement.new(feature_uri, @base.source, RDF::Literal.new(feature.source)))
133
- graph.insert(RDF::Statement.new(feature_uri, @base.type, feature.type))
134
- graph.insert(RDF::Statement.new(feature_uri, @base.with_parent(@base.start, feature_datatype_properties)[0], RDF::Literal.new(feature.start_coordinate)))
135
- graph.insert(RDF::Statement.new(feature_uri, @base.with_parent(@base.end, feature_datatype_properties)[0], RDF::Literal.new(feature.end_coordinate)))
136
- graph.insert(RDF::Statement.new(feature_uri, @base.score, RDF::Literal.new(feature.score))) if feature.score
137
- strand_uri = @base.with_parent(@base.strand, feature_object_properties)[0]
131
+ create_triple(set_uri, @base.contains, feature_uri)
132
+ create_triple(feature_uri, RDF.type, @base.Feature)
133
+ serialize_landmark(set_uri, GFF3Landmark.new(feature.sequence_id)) unless @landmarks.has_key?(feature.sequence_id)
134
+ create_triple(feature_uri, @base.seqid, @landmarks[feature.sequence_id])
135
+ create_triple(feature_uri, @base.source, feature.source)
136
+ create_triple(feature_uri, @base.type, feature.type)
137
+ create_triple(feature_uri, @base.phase, feature.phase) if feature.phase
138
+
139
+ serialize_coordinate(set_uri, feature_uri, feature)
140
+ serialize_attributes(set_uri, feature_uri, feature.attributes) unless feature.attributes.keys.empty?
141
+ end
142
+
143
+ def serialize_coordinate(set_uri, feature_uri, feature)
144
+ region_uri = RDF::URI.new("#{feature_uri.to_s}/region")
145
+ start_position_uri = RDF::URI.new("#{feature_uri.to_s}/region/start")
146
+ end_position_uri = RDF::URI.new("#{feature_uri.to_s}/region/end")
147
+ #feature_object_properties = @base.feature_properties.select { |uri| @base.is_object_property?(uri) }[0]
148
+ ##graph.insert(RDF::Statement.new(feature_uri, @base.with_parent([ @base.region ].flatten, feature_object_properties), region_uri))
149
+ create_triple(feature_uri, @base.locus, region_uri)
150
+ create_triple(region_uri, RDF.type, BioInterchange::FALDO.Region)
151
+ # BIN STUFF
152
+ if false then
153
+ bin_uri = RDF::URI.new("bin://#{feature.sequence_id}/#{BioInterchange::Genomics::Locations.reg2bin(feature.start_coordinate, feature.end_coordinate)}")
154
+ create_triple(bin_uri, RDF::URI.new('bin://contains'), feature_uri)
155
+ end
156
+ create_triple(region_uri, BioInterchange::FALDO.begin, start_position_uri)
157
+ create_triple(region_uri, BioInterchange::FALDO.end, end_position_uri)
138
158
  case feature.strand
139
159
  when BioInterchange::Genomics::GFF3Feature::NOT_STRANDED
140
- graph.insert(RDF::Statement.new(feature_uri, strand_uri, @base.NotStranded))
160
+ create_triple(start_position_uri, RDF.type, BioInterchange::FALDO.Position)
161
+ create_triple(end_position_uri, RDF.type, BioInterchange::FALDO.Position)
141
162
  when BioInterchange::Genomics::GFF3Feature::UNKNOWN
142
- graph.insert(RDF::Statement.new(feature_uri, strand_uri, @base.UnknownStrand))
163
+ create_triple(start_position_uri, RDF.type, BioInterchange::FALDO.Position)
164
+ create_triple(end_position_uri, RDF.type, BioInterchange::FALDO.Position)
143
165
  when BioInterchange::Genomics::GFF3Feature::POSITIVE
144
- graph.insert(RDF::Statement.new(feature_uri, strand_uri, @base.Positive))
166
+ create_triple(start_position_uri, RDF.type, BioInterchange::FALDO.Positive_strand)
167
+ create_triple(end_position_uri, RDF.type, BioInterchange::FALDO.Positive_strand)
145
168
  when BioInterchange::Genomics::GFF3Feature::NEGATIVE
146
- graph.insert(RDF::Statement.new(feature_uri, strand_uri, @base.Negative))
169
+ create_triple(start_position_uri, RDF.type, BioInterchange::FALDO.Negative_strand)
170
+ create_triple(end_position_uri, RDF.type, BioInterchange::FALDO.Negative_strand)
147
171
  else
148
172
  raise BioInterchange::Exceptions::InputFormatError, 'Strand of feature is set to an unknown constant.'
149
173
  end
150
- graph.insert(RDF::Statement.new(feature_uri, @base.phase, RDF::Literal.new(feature.phase))) if feature.phase
151
-
152
- serialize_attributes(graph, set_uri, feature_uri, feature.attributes) unless feature.attributes.keys.empty?
174
+ create_triple(start_position_uri, BioInterchange::FALDO.position, feature.start_coordinate)
175
+ create_triple(end_position_uri, BioInterchange::FALDO.position, feature.end_coordinate)
176
+ create_triple(feature_uri, @base.score, feature.score) if feature.score
153
177
  end
154
178
 
155
179
  # Serializes a genomic feature landmark ("seqid").
156
180
  #
157
- # +graph+:: RDF graph to which the landmark is added
158
181
  # +set_uri+:: the feature set URI to which the landmark belongs to
159
182
  # +landmark+:: encapsuled landmark data
160
- def serialize_landmark(graph, set_uri, landmark)
183
+ def serialize_landmark(set_uri, landmark)
161
184
  return if @landmarks.has_key?(landmark.seqid)
162
185
  landmark_uri = RDF::URI.new("#{set_uri.to_s}/landmark/#{landmark.seqid}")
186
+ region_uri = RDF::URI.new("#{landmark_uri.to_s}/region")
163
187
  @landmarks[landmark.seqid] = landmark_uri
164
- graph.insert(RDF::Statement.new(landmark_uri, RDF.type, @base.Landmark))
165
- graph.insert(RDF::Statement.new(landmark_uri, @base.with_parent([ @base.id ].flatten, @base.landmark_properties)[0], RDF::Literal.new(landmark.seqid)))
166
- graph.insert(RDF::Statement.new(landmark_uri, @base.with_parent([ @base.start ].flatten, @base.landmark_properties)[0], RDF::Literal.new(landmark.start_coordinate))) if landmark.start_coordinate
167
- graph.insert(RDF::Statement.new(landmark_uri, @base.with_parent([ @base.end ].flatten, @base.landmark_properties)[0], RDF::Literal.new(landmark.end_coordinate))) if landmark.end_coordinate
188
+ create_triple(landmark_uri, RDF.type, @base.Landmark)
189
+ create_triple(landmark_uri, @base.id, landmark.seqid)
190
+ create_triple(landmark_uri, @base.locus, region_uri)
191
+ if landmark.start_coordinate then
192
+ start_position_uri = RDF::URI.new("#{landmark_uri.to_s}/region/start")
193
+ create_triple(region_uri, BioInterchange::FALDO.begin, start_position_uri)
194
+ create_triple(start_position_uri, BioInterchange::FALDO.position, landmark.start_coordinate)
195
+ end
196
+ if landmark.start_coordinate then
197
+ end_position_uri = RDF::URI.new("#{landmark_uri.to_s}/region/end")
198
+ create_triple(region_uri, BioInterchange::FALDO.end, end_position_uri)
199
+ create_triple(end_position_uri, BioInterchange::FALDO.position, landmark.end_coordinate)
200
+ end
168
201
  end
169
202
 
170
203
  # Serializes the attributes of a feature.
171
204
  #
172
- # +graph+:: RDF graph to which the feature is added
173
205
  # +set_uri+:: URI of the set these attributes belong to (implicit due to feature)
174
206
  # +feature_uri+:: the feature URI to which the attributes belong to
175
207
  # +attribtues+:: a map of tag/value pairs
176
- def serialize_attributes(graph, set_uri, feature_uri, attributes)
208
+ def serialize_attributes(set_uri, feature_uri, attributes)
177
209
  attributes.each_pair { |tag, list|
178
210
  # Check for defined tags (in alphabetical order), if not matched, serialize as generic Attribute:
179
211
  if tag == 'Alias' then
180
212
  list.each { |value|
181
- graph.insert(RDF::Statement.new(feature_uri, @base.alias, RDF::Literal.new(value)))
213
+ create_triple(feature_uri, @base.alias, value)
182
214
  }
183
215
  elsif tag == 'Dbxref' then
184
- feature_properties = nil
185
- if @base == BioInterchange::GFF3O then
186
- feature_properties = @base.feature_properties.select { |uri| @base.is_datatype_property?(uri) }[0]
187
- else
188
- feature_properties = @base.feature_properties.select { |uri| @base.is_object_property?(uri) }[0]
189
- end
190
216
  list.each { |value|
191
217
  begin
192
218
  linkout = nil
@@ -200,79 +226,143 @@ protected
200
226
  linkout = BioInterchange::GOXRef.send(BioInterchange.make_safe_label(abbreviation)).to_s + id
201
227
  end
202
228
  # Second, and finally: add a triple to the graph in the right representative format depending on the ontology used
203
- if @base == BioInterchange::GFF3O then
204
- graph.insert(RDF::Statement.new(feature_uri, @base.with_parent([ @base.dbxref ].flatten, feature_properties)[0], RDF::Literal.new(linkout, :datatype => RDF::XSD.anyURI )))
205
- else
206
- graph.insert(RDF::Statement.new(feature_uri, @base.with_parent([ @base.dbxref ].flatten, feature_properties)[0], RDF::URI.new(linkout))) if @base == BioInterchange::GVF1O
207
- end
229
+ create_triple(feature_uri, @base.dbxref, linkout)
208
230
  rescue NoMethodError
209
- raise BioInterchange::Exceptions::InputFormatError, 'Attribute Dbxref link-out is not resolvable, i.e. the name cannot be turned into an URL.'
231
+ # Preserve the Dbxref as a Literal:
232
+ @dbxref = 0 if @dbxref == nil
233
+ literal_uri = RDF::URI.new("#{feature_uri.to_s}/attribute/dbxref/#{@dbxref}")
234
+ @dbxref += 1
235
+ create_triple(feature_uri, @base.dbxref, literal_uri)
236
+ create_triple(literal_uri, RDF.type, RDF::RDFS.Literal)
237
+ create_triple(literal_uri, RDF.value, value)
210
238
  end
211
239
  }
212
- elsif tag == 'Derives_from' and @base == BioInterchange::GFF3O then
240
+ elsif tag == 'Derives_from' then
213
241
  list.each { |value|
214
- graph.insert(RDF::Statement.new(feature_uri, @base.derives_from, RDF::URI.new("#{set_uri.to_s}/feature/#{value}")))
242
+ create_triple(feature_uri, @base.derivesFrom, RDF::URI.new("#{set_uri.to_s}/feature/#{value}"))
215
243
  }
216
- elsif tag == 'Gap' and @base == BioInterchange::GFF3O then
217
- graph.insert(RDF::Statement.new(feature_uri, @base.gap, RDF::Literal.new(list.join(','))))
244
+ elsif tag == 'Gap' then
245
+ # Handled by 'Target', because 'Gap' requires 'Target' to be present.
218
246
  elsif tag == 'ID' then
219
- # Do nothing. The feature ID is the URI of the feature. It is not relevant as information anymore.
220
- elsif tag == 'Is_circular' and @base == BioInterchange::GFF3O then
247
+ list.each { |value|
248
+ create_triple(feature_uri, @base.id, value)
249
+ }
250
+ elsif tag == 'Is_circular' then
221
251
  value = list.join(',')
222
- graph.insert(RDF::Statement.new(feature_uri, @base.is_circular, true)) if value == 'true'
223
- graph.insert(RDF::Statement.new(feature_uri, @base.is_circular, false)) if value == 'false'
224
- # TODO Report invalid value.
225
- elsif tag == 'Name' and @base == BioInterchange::GFF3O then
226
- graph.insert(RDF::Statement.new(feature_uri, @base.name, RDF::Literal.new(list.join(','))))
227
- elsif tag == 'Note' and @base == BioInterchange::GFF3O then
252
+ if value == 'true' then
253
+ create_triple(feature_uri, @base.isCircular, true) if value == 'true'
254
+ elsif value == 'false' then
255
+ create_triple(feature_uri, @base.isCircular, false) if value == 'false'
256
+ else
257
+ create_triple(feature_uri, RDF::RDFS.comment, "Is_circular non-truth value: #{value}")
258
+ end
259
+ elsif tag == 'Name' then
260
+ list.each { |value|
261
+ create_triple(feature_uri, @base.name, value)
262
+ }
263
+ elsif tag == 'Note' then
228
264
  list.each { |value|
229
- graph.insert(RDF::Statement.new(feature_uri, @base.note, RDF::Literal.new(value)))
265
+ create_triple(feature_uri, RDF::RDFS.comment, value)
230
266
  }
231
267
  elsif tag == 'Ontology_term' then
232
268
  list.each { |value|
233
269
  # TODO Sanitize values that are either not in GO xrf_abbs or need conversion to match
234
270
  # match their associated Ruby method.
235
271
  namespace, accession = value.split(/:/, 2)
236
- graph.insert(RDF::Statement.new(feature_uri, @base.ontology_term, RDF::Literal.new("#{BioInterchange::GOXRef.send(namespace).to_s}#{accession}", :datatype => RDF::XSD.anyURI )))
272
+ create_triple(feature_uri, @base.ontology_term, "#{BioInterchange::GOXRef.send(namespace).to_s}#{accession}")
237
273
  }
238
274
  elsif tag == 'Parent' then
239
275
  list.each { |parent_id|
240
- graph.insert(RDF::Statement.new(feature_uri, @base.parent, RDF::URI.new("#{set_uri.to_s}/feature/#{parent_id}")))
276
+ create_triple(feature_uri, @base.parent, RDF::URI.new("#{set_uri.to_s}/feature/#{parent_id}"))
241
277
  }
242
278
  elsif tag == 'Reference_seq' then
243
279
  list.each { |value|
244
- graph.insert(RDF::Statement.new(feature_uri, @base.reference_seq, RDF::Literal.new(value)))
280
+ reference_uri = RDF::URI.new("#{feature_uri.to_s}/reference/#{value}")
281
+ create_triple(feature_uri, @base.sequence_annotation, reference_uri)
282
+ create_triple(reference_uri, RDF.type, @base.Reference)
283
+ create_triple(reference_uri, @base.sequence, value)
245
284
  }
246
285
  elsif tag == 'Target' then
247
286
  target_id, start_coordinate, end_coordinate, strand = list.join(',').split(/\s+/, 4)
248
- target_datatype_properties = @base.target_properties.select { |uri| @base.is_datatype_property?(uri) }[0]
249
- target_object_properties = @base.target_properties.select { |uri| @base.is_object_property?(uri) }[0]
250
287
  target_uri = RDF::URI.new("#{feature_uri.to_s}/target/#{target_id}")
251
- graph.insert(RDF::Statement.new(target_uri, RDF.type, @base.Target))
252
- graph.insert(RDF::Statement.new(target_uri, @base.target_id, RDF::Literal.new(target_id)))
253
- graph.insert(RDF::Statement.new(target_uri, @base.with_parent([ @base.start ].flatten, target_datatype_properties)[0], RDF::Literal.new(start_coordinate.to_i)))
254
- graph.insert(RDF::Statement.new(target_uri, @base.with_parent([ @base.end ].flatten, target_datatype_properties)[0], RDF::Literal.new(end_coordinate.to_i)))
255
- graph.insert(RDF::Statement.new(target_uri, @base.with_parent([ @base.end ].flatten, target_object_properties)[0], @base.Positive)) if strand and strand == '+'
256
- graph.insert(RDF::Statement.new(target_uri, @base.with_parent([ @base.end ].flatten, target_object_properties)[0], @base.Negative)) if strand and strand == '-'
257
- graph.insert(RDF::Statement.new(feature_uri, @base.target, target_uri))
258
- elsif tag == 'Variant_effect' and @base == BioInterchange::GVF1O then
259
- serialize_variant_effects(graph, set_uri, feature_uri, list)
260
- elsif tag == 'Variant_seq' and @base == BioInterchange::GVF1O then
261
- serialize_variant_seqs(graph, set_uri, feature_uri, list)
288
+ create_triple(feature_uri, @base.target, target_uri)
289
+ create_triple(target_uri, RDF.type, @base.Target)
290
+ create_triple(target_uri, @base.id, target_id)
291
+ region_uri = RDF::URI.new("#{target_uri.to_s}/region")
292
+ start_position_uri = RDF::URI.new("#{region_uri.to_s}/start")
293
+ end_position_uri = RDF::URI.new("#{region_uri.to_s}/end")
294
+ create_triple(target_uri, @base.locus, region_uri)
295
+ create_triple(region_uri, @base.locus, start_position_uri)
296
+ create_triple(region_uri, @base.locus, end_position_uri)
297
+ if strand == '+' then
298
+ create_triple(start_position_uri, RDF.type, BioInterchange::FALDO.Positive_strand)
299
+ create_triple(end_position_uri, RDF.type, BioInterchange::FALDO.Positive_strand)
300
+ create_triple(start_position_uri, BioInterchange::FALDO.position, start_coordinate)
301
+ create_triple(end_position_uri, BioInterchange::FALDO.position, end_coordinate)
302
+ elsif strand == '-' then
303
+ create_triple(start_position_uri, RDF.type, BioInterchange::FALDO.Negative_strand)
304
+ create_triple(end_position_uri, RDF.type, BioInterchange::FALDO.Negative_strand)
305
+ # Reverse start/end coordinates on the negative strand; FALDO requirement:
306
+ create_triple(start_position_uri, BioInterchange::FALDO.position, end_coordinate)
307
+ create_triple(end_position_uri, BioInterchange::FALDO.position, start_coordinate)
308
+ else
309
+ create_triple(start_position_uri, RDF.type, BioInterchange::FALDO.Position)
310
+ create_triple(end_position_uri, RDF.type, BioInterchange::FALDO.Position)
311
+ create_triple(start_position_uri, BioInterchange::FALDO.position, start_coordinate)
312
+ create_triple(end_position_uri, BioInterchange::FALDO.position, end_coordinate)
313
+ end
314
+
315
+ # Describe a possible alignment between the feature and target:
316
+ if attributes.has_key?('Gap') then
317
+ attributes['Gap'].each_index { |gap_no|
318
+ cigar_line = attributes['Gap'][gap_no].split(/\s+/)
319
+ cigar_line.each_index { |alignment_no|
320
+ alignment_uri = RDF::URI.new("#{feature_uri.to_s}/alignment/#{gap_no}/#{alignment_no}")
321
+ create_triple(feature_uri, @base.alignment, alignment_uri) if alignment_no == 0
322
+ operation = cigar_line[alignment_no].gsub(/[^MIDFR]/, '')
323
+ operation = nil unless operation.length == 1
324
+ span = cigar_line[alignment_no].gsub(/[^0-9]/, '')
325
+ span = nil unless span.length > 0
326
+ if operation == 'M' then
327
+ create_triple(alignment_uri, RDF.type, @base.Match)
328
+ elsif operation == 'I' then
329
+ create_triple(alignment_uri, RDF.type, @base.Reference_Sequence_Gap)
330
+ elsif operation == 'D' then
331
+ create_triple(alignment_uri, RDF.type, @base.Target_Sequence_Gap)
332
+ elsif operation == 'F' then
333
+ create_triple(alignment_uri, RDF.type, @base.Forward_Reference_Sequence_Frameshift)
334
+ elsif operation == 'R' then
335
+ create_triple(alignment_uri, RDF.type, @base.Reverse_Reference_Sequence_Frameshift)
336
+ else
337
+ # Fallback: operation is outside of the specification
338
+ create_triple(alignment_uri, RDF.type, @base.Alignment_Operation)
339
+ create_triple(alignment_uri, RDF::RDFS.comment, "Alignment operation: #{operation}") if operation and not operation.empty?
340
+ end
341
+ create_triple(alignment_uri, @base.span, span.to_i) if span
342
+ create_triple(alignment_uri, RDF.first, alignment_uri)
343
+ if alignment_no + 1 < cigar_line.length then
344
+ create_triple(alignment_uri, RDF.rest, RDF::URI.new("#{feature_uri.to_s}/alignment/#{gap_no}/#{alignment_no + 1}"))
345
+ else
346
+ create_triple(alignment_uri, RDF.rest, RDF.nil)
347
+ end
348
+ }
349
+ }
350
+ end
351
+ elsif tag == 'Variant_effect' then
352
+ serialize_variant_effects(set_uri, feature_uri, list)
353
+ elsif tag == 'Variant_seq' then
354
+ serialize_variant_seqs(set_uri, feature_uri, list)
262
355
  else
263
356
  # TODO Report unknown upper case letters here? That would be a spec. validation...
264
357
  # Well, or it would show that this implementation is incomplete. Could be either.
265
- attribute_properties = @base.attribute_properties
266
- attribute_properties = attribute_properties.select { |uri| @base.is_datatype_property?(uri) }[0] if attribute_properties.kind_of?(Array)
267
- feature_properties = @base.feature_properties.select { |uri| @base.is_object_property?(uri) }[0]
268
358
  list.each_index { |index|
269
359
  value = list[index]
270
360
  attribute_uri = RDF::URI.new("#{feature_uri.to_s}/attribute/#{tag}") if list.size == 1
271
361
  attribute_uri = RDF::URI.new("#{feature_uri.to_s}/attribute/#{tag}-#{index + 1}") unless list.size == 1
272
- graph.insert(RDF::Statement.new(feature_uri, @base.with_parent([ @base.attributes ].flatten, feature_properties)[0], attribute_uri))
273
- graph.insert(RDF::Statement.new(attribute_uri, RDF.type, @base.Attribute))
274
- graph.insert(RDF::Statement.new(attribute_uri, @base.with_parent([ @base.tag ].flatten, attribute_properties)[0], RDF::Literal.new("#{tag}")))
275
- graph.insert(RDF::Statement.new(attribute_uri, RDF.value, RDF::Literal.new(value)))
362
+ create_triple(feature_uri, @base.attribute, attribute_uri)
363
+ create_triple(attribute_uri, RDF.type, @base.Attribute)
364
+ create_triple(attribute_uri, @base.tag, "#{tag}")
365
+ create_triple(attribute_uri, RDF.value, value)
276
366
  }
277
367
  end
278
368
  }
@@ -281,10 +371,9 @@ protected
281
371
  # Serializes a structured attribute (given as a pragma statement), which later
282
372
  # can be referred to from feature instances.
283
373
  #
284
- # +graph+:: RDF graph to which the structured attribute is added
285
374
  # +set_uri+:: the feature set URI to which the structured attribute belongs to
286
375
  # +pragma+:: a map that encapsulates the structured attribute data
287
- def serialize_structured_attribute(graph, set_uri, pragma)
376
+ def serialize_structured_attribute(set_uri, pragma)
288
377
  attribute_uri = RDF::URI.new("#{set_uri.to_s}/structured_attribute/#{pragma.object_id}")
289
378
  attributes = nil
290
379
  class_type = nil
@@ -306,114 +395,132 @@ protected
306
395
  else
307
396
  # TODO Error.
308
397
  end
309
- graph.insert(RDF::Statement.new(attribute_uri, RDF.type, class_type))
310
398
  if class_type == @base.DataSource and attributes.has_key?('Data_type') then
311
- data_type_individual = nil
312
399
  attributes['Data_type'] = attributes['Data_type'][0] # TODO Make sure array is of length 1.
313
400
  if attributes['Data_type'] == 'Array_CGH' then
314
- data_type_individual = @base.ArrayComparativeGenomicHybridization
401
+ class_type = @base.ArrayComparativeGenomicHybridization
315
402
  elsif attributes['Data_type'] == 'DNA_microarray' then
316
- data_type_individual = @base.DNAMicroarray
403
+ class_type = @base.DNAMicroarray
317
404
  elsif attributes['Data_type'] == 'DNA_sequence' then
318
- data_type_individual = @base.DNASequence
405
+ class_type = @base.DNASequence
319
406
  elsif attributes['Data_type'] == 'RNA_sequence' then
320
- data_type_individual = @base.RNASequence
407
+ class_type = @base.RNASequence
321
408
  else
322
409
  # TODO Error.
323
410
  end
324
- graph.insert(RDF::Statement.new(attribute_uri, @base.data_type, data_type_individual))
325
411
  elsif class_type == @base.TechnologyPlatform then
326
412
  if attributes.has_key?('Average_coverage') then
327
- graph.insert(RDF::Statement.new(attribute_uri, @base.average_coverage, RDF::Literal.new(attributes['Average_coverage'][0].to_i)))
413
+ create_triple(attribute_uri, @base.averageCoverage, attributes['Average_coverage'][0].to_i)
328
414
  end
329
415
  if attributes.has_key?('Platform_class') then
330
- graph.insert(RDF::Statement.new(attribute_uri, @base.platform_class, RDF::Literal.new(attributes['Platform_class'][0])))
416
+ create_triple(attribute_uri, @base.platformClass, attributes['Platform_class'][0])
331
417
  end
332
418
  if attributes.has_key?('Platform_name') then
333
- graph.insert(RDF::Statement.new(attribute_uri, @base.platform_name, RDF::Literal.new(attributes['Platform_name'][0])))
419
+ create_triple(attribute_uri, @base.platformName, attributes['Platform_name'][0])
334
420
  end
335
421
  if attributes.has_key?('Read_length') then
336
- graph.insert(RDF::Statement.new(attribute_uri, @base.read_length, RDF::Literal.new(attributes['Read_length'][0].to_i)))
422
+ create_triple(attribute_uri, @base.readLength, attributes['Read_length'][0].to_i)
337
423
  end
338
424
  if attributes.has_key?('Read_pair_span') then
339
- graph.insert(RDF::Statement.new(attribute_uri, @base.read_pair_span, RDF::Literal.new(attributes['Read_pair_span'][0].to_i)))
425
+ create_triple(attribute_uri, @base.readPairSpan, attributes['Read_pair_span'][0].to_i)
340
426
  end
341
427
  if attributes.has_key?('Read_type') then
342
- read_type_individual = nil
343
428
  attributes['Read_type'] = attributes['Read_type'][0] # TODO Make sure array is of length 1.
344
429
  if attributes['Read_type'] == 'fragment' then
345
- read_type_individual = @base.Fragment
430
+ class_type = @base.FragmentReadPlatform
346
431
  elsif attributes['Read_type'] == 'pair' then
347
- read_type_individual = @base.Pair
432
+ class_type = @base.PairedEndReadPlatform
348
433
  else
349
434
  # TODO Error.
350
435
  end
351
- graph.insert(RDF::Statement.new(attribute_uri, @base.read_type, read_type_individual))
352
436
  end
353
437
  end
354
- structuredpragma_properties = @base.structuredpragma_properties.select { |uri| @base.is_object_property?(uri) }[0]
438
+ create_triple(attribute_uri, RDF.type, class_type)
355
439
  attributes.keys.each { |tag|
356
440
  if tag.match(/^[a-z]/) then
357
441
  custom_attribute_uri = RDF::URI.new("#{attribute_uri.to_s}/attribute/#{tag}")
358
- graph.insert(RDF::Statement.new(custom_attribute_uri, RDF.type, @base.StructuredAttribute))
359
- graph.insert(RDF::Statement.new(custom_attribute_uri, @base.with_parent([ @base.tag ].flatten, @base.structuredattribute_properties)[0], tag))
360
- graph.insert(RDF::Statement.new(custom_attribute_uri, RDF.value, RDF::Literal.new(attributes[tag].join(','))))
361
- graph.insert(RDF::Statement.new(attribute_uri, @base.with_parent([ @base.attributes ].flatten, structuredpragma_properties)[0], custom_attribute_uri))
442
+ create_triple(custom_attribute_uri, RDF.type, @base.StructuredAttribute)
443
+ create_triple(custom_attribute_uri, @base.tag, tag)
444
+ attributes[tag].each { |value|
445
+ create_triple(custom_attribute_uri, RDF.value, value)
446
+ }
447
+ create_triple(attribute_uri, @base.attribute, custom_attribute_uri)
448
+ else
449
+ # TODO
362
450
  end
363
451
  }
364
452
  end
365
453
 
366
454
  # Serializes a list of variant effects.
367
455
  #
368
- # +graph+:: RDF graph to which the structured attribute is added
369
456
  # +set_uri+:: the feature set URI to which the feature belongs to
370
457
  # +feature_uri+:: the feature URI to the feature that is annotated with variant data
371
458
  # +list+:: list of variant values
372
- def serialize_variant_effects(graph, set_uri, feature_uri, list)
459
+ def serialize_variant_effects(set_uri, feature_uri, list)
373
460
  list.each_index { |index|
374
461
  effect = list[index]
375
462
  sequence_variant, variant_index, feature_type, feature_ids = effect.split(' ', 4)
376
463
  feature_ids = feature_ids.split(' ')
377
464
  effect_uri = RDF::URI.new("#{feature_uri.to_s}/variant/#{variant_index}/effect/#{index}")
378
- serialize_variant_triple(graph, feature_uri, RDF::URI.new("#{feature_uri.to_s}/variant/#{variant_index}"), @base.effect, effect_uri)
379
- graph.insert(RDF::Statement.new(effect_uri, RDF.type, @base.Effect))
380
- graph.insert(RDF::Statement.new(effect_uri, @base.sequence_variant, BioInterchange::SO.send(BioInterchange.make_safe_label(sequence_variant))))
381
- graph.insert(RDF::Statement.new(effect_uri, @base.feature_type, BioInterchange::SO.send(BioInterchange.make_safe_label(feature_type))))
465
+ serialize_variant_triple(feature_uri, RDF::URI.new("#{feature_uri.to_s}/variant/#{variant_index}"), @base.effect, effect_uri)
466
+ create_triple(effect_uri, RDF.type, @base.Effect)
467
+ create_triple(effect_uri, @base.sequenceVariant, BioInterchange::SO.send(BioInterchange.make_safe_label(sequence_variant)))
468
+ create_triple(effect_uri, @base.featureType, BioInterchange::SO.send(BioInterchange.make_safe_label(feature_type)))
382
469
  feature_ids.each { |feature_id|
383
- graph.insert(RDF::Statement.new(effect_uri, @base.feature, RDF::Literal.new(feature_id)))
470
+ create_triple(effect_uri, @base.feature, feature_id)
384
471
  }
385
472
  }
386
473
  end
387
474
 
388
475
  # Serializes a list of variant sequences.
389
476
  #
390
- # +graph+:: RDF graph to which the structured attribute is added
391
477
  # +set_uri+:: the feature set URI to which the feature belongs to
392
478
  # +feature_uri+:: the feature URI to the feature that is annotated with variant data
393
479
  # +list+:: list of variant values
394
- def serialize_variant_seqs(graph, set_uri, feature_uri, list)
480
+ def serialize_variant_seqs(set_uri, feature_uri, list)
395
481
  list.each_index { |index|
396
482
  value = list[index]
397
483
  variant_uri = RDF::URI.new("#{feature_uri.to_s}/variant/#{index}")
398
- serialize_variant_triple(graph, feature_uri, variant_uri, @base.variant_seq, RDF::Literal.new(value))
484
+ serialize_variant_triple(feature_uri, variant_uri, @base.sequence, RDF::Literal.new(value))
399
485
  }
486
+
487
+ # Return the variant type based on the present sequence(s):
488
+ return @base.Variant if list.length != 2
489
+ if list[0].match(/a-zA-Z/) and list[1].match(/a-zA-Z/) then
490
+ return @base.HomozygousVariant if list[0] == list[1]
491
+ return @base.HeterozygousVariant
492
+ end
493
+ return @base.Variant
400
494
  end
401
495
 
402
496
  # Adds a variant to the graph; tracks the variant's URI that RDF.type is only written out once.
403
497
  #
404
- # +graph+:: RDF graph to which the variant is added
405
498
  # +feature_uri+:: the feature URI to the feature that is annotated with variant data
406
499
  # +variant_uri+:: URI that identifies the feature in question ("subject", if you like)
407
500
  # +predicate+:: predicate that describes the data being serialized
408
501
  # +object+:: data to be serialized
409
- def serialize_variant_triple(graph, feature_uri, variant_uri, predicate, object)
502
+ def serialize_variant_triple(feature_uri, variant_uri, predicate, object)
410
503
  unless @variants.has_key?(variant_uri.to_s) then
411
- graph.insert(RDF::Statement.new(feature_uri, @base.variant, variant_uri))
412
- graph.insert(RDF::Statement.new(variant_uri, RDF.type, @base.Variant))
504
+ create_triple(feature_uri, @base.sequence_annotation, variant_uri)
505
+ create_triple(variant_uri, RDF.type, @base.Variant)
413
506
  end
414
507
  @variants[variant_uri.to_s] = true
415
- graph.insert(RDF::Statement.new(variant_uri, predicate, object))
508
+ create_triple(variant_uri, predicate, object)
509
+ end
510
+
511
+ # Serializes a +GFF3FeatureSequence+ object that contains the sequence for a feature object.
512
+ #
513
+ # +set_uri+:: the feature set URI to which the feature belongs to
514
+ # +feature_sequence+:: a +GFF3FeatureSequence+ instance
515
+ def serialize_feature_sequence(set_uri, feature_sequence)
516
+ feature_uri = RDF::URI.new("#{set_uri.to_s}/feature/#{feature_sequence.feature_id}")
517
+ annotation_uri = RDF::URI.new("#{feature_uri.to_s}/sequence")
518
+ create_triple(feature_uri, @base.sequence_annotation, annotation_uri)
519
+ create_triple(annotation_uri, RDF.type, @base.Sequence_Annotation)
520
+ create_triple(annotation_uri, RDF::RDFS.comment, feature_sequence.comment) if feature_sequence.comment
521
+ create_triple(annotation_uri, @base.sequence, feature_sequence.sequence)
416
522
  end
523
+
417
524
  end
418
525
 
419
526
  end