genomer-plugin-validate 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +4 -0
- data/Gemfile +4 -0
- data/Rakefile +9 -0
- data/VERSION +1 -0
- data/features/annotations/bad-product-field.feature +91 -0
- data/features/annotations/command-line-interface.feature +19 -0
- data/features/annotations/duplicate_id.feature +144 -0
- data/features/annotations/identical_locations.feature +74 -0
- data/features/annotations/incorrect-attributes.feature +135 -0
- data/features/annotations/missing_attributes.feature +75 -0
- data/features/annotations/name.feature +40 -0
- data/features/command-line-interface.feature +36 -0
- data/features/support/env.rb +13 -0
- data/genomer-plugin-validate.gemspec +28 -0
- data/lib/extensions/string.rb +12 -0
- data/lib/genomer-plugin-validate.rb +33 -0
- data/lib/genomer-plugin-validate/group.rb +17 -0
- data/lib/genomer-plugin-validate/group/annotations.rb +20 -0
- data/lib/genomer-plugin-validate/validator.rb +27 -0
- data/lib/genomer-plugin-validate/validator/bad_product_field.rb +45 -0
- data/lib/genomer-plugin-validate/validator/duplicate_coordinates.rb +12 -0
- data/lib/genomer-plugin-validate/validator/duplicate_id.rb +11 -0
- data/lib/genomer-plugin-validate/validator/gff3_attributes.rb +16 -0
- data/lib/genomer-plugin-validate/validator/missing_id.rb +13 -0
- data/lib/genomer-plugin-validate/validator/no_name_or_product.rb +13 -0
- data/lib/genomer-plugin-validate/validator/uppercase_name.rb +13 -0
- data/lib/genomer-plugin-validate/validator/view_attributes.rb +16 -0
- data/man/genomer-validate.ronn +100 -0
- data/spec/genomer-plugin-validate/group/annotations_spec.rb +18 -0
- data/spec/genomer-plugin-validate/group_spec.rb +24 -0
- data/spec/genomer-plugin-validate/validator/bad_product_field_spec.rb +93 -0
- data/spec/genomer-plugin-validate/validator/duplicate_coordinates_spec.rb +24 -0
- data/spec/genomer-plugin-validate/validator/duplicate_id_spec.rb +34 -0
- data/spec/genomer-plugin-validate/validator/gff_attributes_spec.rb +32 -0
- data/spec/genomer-plugin-validate/validator/missing_id_spec.rb +27 -0
- data/spec/genomer-plugin-validate/validator/no_name_or_product_spec.rb +28 -0
- data/spec/genomer-plugin-validate/validator/uppercase_name_spec.rb +22 -0
- data/spec/genomer-plugin-validate/validator/view_attributes_spec.rb +31 -0
- data/spec/genomer-plugin-validate/validator_spec.rb +107 -0
- data/spec/genomer-plugin-validate_spec.rb +92 -0
- data/spec/spec_helper.rb +35 -0
- data/spec/validator_run_matcher.rb +25 -0
- metadata +244 -0
@@ -0,0 +1,16 @@
|
|
1
|
+
class GenomerPluginValidate::Validator::ViewAttributes < Genomer::Plugin
|
2
|
+
|
3
|
+
def valid_view_attributes
|
4
|
+
%w|product ec_number function feature_type|
|
5
|
+
end
|
6
|
+
|
7
|
+
def run
|
8
|
+
annotations.
|
9
|
+
map{|attn| attn.attributes.map{|(k,v)| [k,attn] }}.
|
10
|
+
flatten(1).
|
11
|
+
select{|(term,_)| term =~ (/^[a-z]/) }.
|
12
|
+
reject{|(term,_)| valid_view_attributes.include? term }.
|
13
|
+
map{|(term,attn)| "Illegal view attribute '#{term}' for '#{attn.id}'"}
|
14
|
+
end
|
15
|
+
|
16
|
+
end
|
@@ -0,0 +1,100 @@
|
|
1
|
+
genomer-validate(1) -- Test assembly files for errors
|
2
|
+
=====================================================
|
3
|
+
|
4
|
+
## SYNOPSIS
|
5
|
+
|
6
|
+
`genomer validate` <type>
|
7
|
+
|
8
|
+
## DESCRIPTION
|
9
|
+
|
10
|
+
**Genomer-validate** tests the assembly files for errors. Any detected errors
|
11
|
+
are reported to the terminal.
|
12
|
+
|
13
|
+
## OPTIONS
|
14
|
+
|
15
|
+
* `--no_view_validations`
|
16
|
+
The GFF ninth-column attributes are validated to ensure that all attributes
|
17
|
+
beginning with a lower case character match those used by
|
18
|
+
genomer-plugin-view to generate GenBank annotation table output. This
|
19
|
+
argument can be used to disable these validations.
|
20
|
+
|
21
|
+
## FILE TYPES
|
22
|
+
|
23
|
+
### ANNOTATIONS
|
24
|
+
|
25
|
+
Test the annotation file for errors. The default location for this file is
|
26
|
+
**assembly/annotations.gff**. The following validations are performed:
|
27
|
+
|
28
|
+
* `Identical locations`:
|
29
|
+
Flag annotations of the same type with identical start and end coordinates.
|
30
|
+
|
31
|
+
* `Identical IDs`:
|
32
|
+
Flag two or more annotations identical ID attributes.
|
33
|
+
|
34
|
+
* `Missing ID`:
|
35
|
+
Flag annotations without an ID attribute.
|
36
|
+
|
37
|
+
* `Missing either Name or product`:
|
38
|
+
Flag annotations without either a Name or product attribute.
|
39
|
+
|
40
|
+
* `Capitalised Name attribute`:
|
41
|
+
Flag annotations with Name attributes beginning with an uppercase letter.
|
42
|
+
|
43
|
+
* `Invalid GFF attributes`:
|
44
|
+
Flag annotations with capitalised attribute keys outside of the GFF3
|
45
|
+
vocabulary. See below for a description of this vocabulary.
|
46
|
+
|
47
|
+
* `Invalid genomer plugin view attributes`:
|
48
|
+
Flag annotations with attribute keys outside of the genomer-plugin-view
|
49
|
+
vocabulary. This validation can be disabled using the `--validate_for_view`
|
50
|
+
command line flag.
|
51
|
+
|
52
|
+
* `Bad product fields`: NCBI requires that annotation product fields not
|
53
|
+
contain terms such as "putative" or end with "-like." This validator checks
|
54
|
+
that product fields do not contain the more common types of disallowed terms.
|
55
|
+
|
56
|
+
The [GFF3 specification][http://www.sequenceontology.org/gff3.shtml] restricts
|
57
|
+
attributes beginning with an upper case character (A-Z) in the ninth column to
|
58
|
+
a controlled vocabulary. Any attribute keys appearing in the ninth column not
|
59
|
+
included in this vocabulary will raise a validation error. The allowed keys are
|
60
|
+
listed below. The ID field is considered mandatory in this software and will
|
61
|
+
raise an error if not present.
|
62
|
+
|
63
|
+
* ID
|
64
|
+
* Name
|
65
|
+
* Note
|
66
|
+
* Alias
|
67
|
+
* Parent
|
68
|
+
* Target
|
69
|
+
* Gap
|
70
|
+
* Derives\_from
|
71
|
+
* Dbxref
|
72
|
+
* Ontology\_term
|
73
|
+
* Is\_circular
|
74
|
+
|
75
|
+
Any fields beginning with a lower case character (a-z) are allowed in the GFF3
|
76
|
+
ninth column. The genomer-plugin-view however uses these lower case fields to
|
77
|
+
populate a GenBank annotation table and a validation is performed for this.
|
78
|
+
The optional `--no_view_validations` flag can be used to disable this
|
79
|
+
validation. The allowed genomer-plugin-view attribute keys are as follows:
|
80
|
+
|
81
|
+
* product
|
82
|
+
* ec\_number
|
83
|
+
* function
|
84
|
+
* feature\_type
|
85
|
+
|
86
|
+
## EXAMPLES
|
87
|
+
|
88
|
+
Test the annotation file for errors.
|
89
|
+
|
90
|
+
$ genomer validate annotations
|
91
|
+
|
92
|
+
## BUGS
|
93
|
+
|
94
|
+
**Genomer-validate** is written in Ruby and depends on the genomer gem. See the
|
95
|
+
Gemfile in the genomer-plugin-validate gem install directory for version
|
96
|
+
details.
|
97
|
+
|
98
|
+
## COPYRIGHT
|
99
|
+
|
100
|
+
**Genomer** is Copyright (C) 2012 Michael Barton <http://michaelbarton.me.uk>
|
@@ -0,0 +1,18 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'genomer-plugin-validate/group/annotations'
|
3
|
+
|
4
|
+
describe GenomerPluginValidate::Group::Annotations do
|
5
|
+
|
6
|
+
its("class.description"){ should == "Validate GFF3 annotations file" }
|
7
|
+
|
8
|
+
its("class.validators"){ should be_instance_of Array }
|
9
|
+
its("class.validators"){ should include :duplicate_id }
|
10
|
+
its("class.validators"){ should include :missing_id }
|
11
|
+
its("class.validators"){ should include :no_name_or_product }
|
12
|
+
its("class.validators"){ should include :gff3_attributes }
|
13
|
+
its("class.validators"){ should include :view_attributes }
|
14
|
+
its("class.validators"){ should include :duplicate_coordinates }
|
15
|
+
its("class.validators"){ should include :uppercase_name }
|
16
|
+
its("class.validators"){ should include :bad_product_field }
|
17
|
+
|
18
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe GenomerPluginValidate::Group do
|
4
|
+
|
5
|
+
before do
|
6
|
+
@example = GenomerPluginValidate::Group::Example = Class.new
|
7
|
+
stub(described_class).require(anything)
|
8
|
+
end
|
9
|
+
|
10
|
+
after do
|
11
|
+
GenomerPluginValidate::Group.send(:remove_const,'Example')
|
12
|
+
end
|
13
|
+
|
14
|
+
describe "#groups" do
|
15
|
+
|
16
|
+
subject do
|
17
|
+
described_class.groups
|
18
|
+
end
|
19
|
+
|
20
|
+
its(['example']){should == @example}
|
21
|
+
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
@@ -0,0 +1,93 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'genomer-plugin-validate/validator/bad_product_field'
|
3
|
+
|
4
|
+
describe GenomerPluginValidate::Validator::BadProductField do
|
5
|
+
|
6
|
+
subject{ described_class }
|
7
|
+
|
8
|
+
describe "where there are no annotations" do
|
9
|
+
it{ should return_no_errors_for [] }
|
10
|
+
end
|
11
|
+
|
12
|
+
describe "where there is valid annotation product field" do
|
13
|
+
it{ should return_no_errors_for [annotation_with_product("transmembrane protein")]}
|
14
|
+
end
|
15
|
+
|
16
|
+
describe "where the product field begins with hypothetical" do
|
17
|
+
it do
|
18
|
+
should return_errors_for(
|
19
|
+
[annotation_with_product("hypothetical something")],
|
20
|
+
["Bad product field for '1:' start with 'putative' instead of 'hypothetical.'"]
|
21
|
+
)
|
22
|
+
end
|
23
|
+
|
24
|
+
it do
|
25
|
+
should return_errors_for(
|
26
|
+
[annotation_with_product("Hypothetical somthing")],
|
27
|
+
["Bad product field for '1:' start with 'putative' instead of 'hypothetical.'"]
|
28
|
+
)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
describe "where the product field contains only 'hypothetical protein'" do
|
33
|
+
it{ should return_no_errors_for [annotation_with_product("hypothetical protein")]}
|
34
|
+
it{ should return_no_errors_for [annotation_with_product("Hypothetical protein.")]}
|
35
|
+
end
|
36
|
+
|
37
|
+
describe "where the product ends with with like/domain/related" do
|
38
|
+
start = "Bad product field for '1:' "
|
39
|
+
['like','related','domain'].each do |word|
|
40
|
+
[word, word.capitalize].each do |cased|
|
41
|
+
[' '+cased, '-'+cased].each do |prefixed|
|
42
|
+
[prefixed+'.', prefixed].each do |formatted|
|
43
|
+
it do
|
44
|
+
should return_errors_for(
|
45
|
+
[annotation_with_product("membrane" + formatted)],
|
46
|
+
[start + "products ending with '#{word}' are not allowed."]
|
47
|
+
)
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
describe "where the product field contains n-term" do
|
56
|
+
it do
|
57
|
+
should return_errors_for(
|
58
|
+
[annotation_with_product("n-term membrane region")],
|
59
|
+
["Bad product field for '1:' 'N-terminal' or variations are not allowed."]
|
60
|
+
)
|
61
|
+
end
|
62
|
+
|
63
|
+
it do
|
64
|
+
should return_errors_for(
|
65
|
+
[annotation_with_product("N terminal membrane region")],
|
66
|
+
["Bad product field for '1:' 'N-terminal' or variations are not allowed."]
|
67
|
+
)
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
describe "where the product field contains similiar to 'n terminal'" do
|
72
|
+
it do
|
73
|
+
should return_no_errors_for [annotation_with_product("transcription termination")]
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
describe "where the product field is in all caps" do
|
78
|
+
it do
|
79
|
+
should return_errors_for(
|
80
|
+
[annotation_with_product("PROTEIN")],
|
81
|
+
["Bad product field for '1:' all caps product fields are not allowed."]
|
82
|
+
)
|
83
|
+
end
|
84
|
+
|
85
|
+
it do
|
86
|
+
should return_errors_for(
|
87
|
+
[annotation_with_product("MEMBRANE PROTEIN")],
|
88
|
+
["Bad product field for '1:' all caps product fields are not allowed."]
|
89
|
+
)
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'genomer-plugin-validate/validator/duplicate_coordinates'
|
3
|
+
|
4
|
+
describe GenomerPluginValidate::Validator::DuplicateCoordinates do
|
5
|
+
|
6
|
+
subject{ described_class }
|
7
|
+
|
8
|
+
describe "where there are no annotations" do
|
9
|
+
it{ should return_no_errors_for [] }
|
10
|
+
end
|
11
|
+
|
12
|
+
describe "where there are two annotations with different coordinates" do
|
13
|
+
attns = [annotation(:start => 1, :end => 3), annotation(:start => 4, :end => 6)]
|
14
|
+
it{ should return_no_errors_for attns}
|
15
|
+
end
|
16
|
+
|
17
|
+
describe "where there are two annotations with the same coordinates" do
|
18
|
+
attns = [annotation(:start => 1, :end => 3, :attributes => {'ID' => '1'}),
|
19
|
+
annotation(:start => 1, :end => 3, :attributes => {'ID' => '2'})]
|
20
|
+
errors = ["Identical locations for '1', '2'"]
|
21
|
+
it{ should return_errors_for attns, errors}
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'genomer-plugin-validate/validator/duplicate_id'
|
3
|
+
|
4
|
+
describe GenomerPluginValidate::Validator::DuplicateID do
|
5
|
+
|
6
|
+
subject{ described_class }
|
7
|
+
|
8
|
+
describe "where there are no annotations" do
|
9
|
+
it{ should return_no_errors_for [] }
|
10
|
+
end
|
11
|
+
|
12
|
+
describe "where there are two annotations with different IDs" do
|
13
|
+
it{ should return_no_errors_for [annotation_with_id(1), annotation_with_id(2)]}
|
14
|
+
end
|
15
|
+
|
16
|
+
describe "where there two annotations with missing IDs" do
|
17
|
+
attns = [Annotation.new.to_gff3_record, Annotation.new.to_gff3_record]
|
18
|
+
it{ should return_no_errors_for attns}
|
19
|
+
end
|
20
|
+
|
21
|
+
describe "where there are two annotations with the same ID" do
|
22
|
+
attns = [annotation_with_id(1), annotation_with_id(1)]
|
23
|
+
errors = ["Duplicate ID '1'"]
|
24
|
+
it{ should return_errors_for attns, errors}
|
25
|
+
end
|
26
|
+
|
27
|
+
describe "where there are two sets of annotations with the same ID" do
|
28
|
+
attns = [annotation_with_id(1), annotation_with_id(1),
|
29
|
+
annotation_with_id(2), annotation_with_id(2)]
|
30
|
+
errors = ["Duplicate ID '1'", "Duplicate ID '2'"]
|
31
|
+
it{ should return_errors_for attns, errors}
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'genomer-plugin-validate/validator/gff3_attributes'
|
3
|
+
|
4
|
+
describe GenomerPluginValidate::Validator::Gff3Attributes do
|
5
|
+
|
6
|
+
subject{ described_class }
|
7
|
+
|
8
|
+
describe "where there are no annotations" do
|
9
|
+
it{ should return_no_errors_for [] }
|
10
|
+
end
|
11
|
+
|
12
|
+
describe "where there are valid GFF3 attributes" do
|
13
|
+
attrs = %w|ID Name Alias Parent Target Gap Derives_from
|
14
|
+
Note Dbxref Ontology_term Is_circular|
|
15
|
+
attrs.each do |attr|
|
16
|
+
attns = [annotation({:attributes => {attr => 'something'}})]
|
17
|
+
it{ should return_no_errors_for attns}
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
describe "where there are lower case attribute keys" do
|
22
|
+
attns = [annotation({:attributes => {'unknown_term' => 'something'}})]
|
23
|
+
it{ should return_no_errors_for attns}
|
24
|
+
end
|
25
|
+
|
26
|
+
describe "where there is an unknown capitalised attribute key" do
|
27
|
+
attns = [annotation({:attributes => {'Unknown_term' => 'something','ID' => 1}})]
|
28
|
+
errors = ["Illegal GFF3 attribute 'Unknown_term' for '1'"]
|
29
|
+
it{ should return_errors_for attns, errors}
|
30
|
+
end
|
31
|
+
|
32
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'genomer-plugin-validate/validator/missing_id'
|
3
|
+
|
4
|
+
describe GenomerPluginValidate::Validator::MissingID do
|
5
|
+
|
6
|
+
subject{ described_class }
|
7
|
+
|
8
|
+
describe "where there are no annotations" do
|
9
|
+
it{ should return_no_errors_for [] }
|
10
|
+
end
|
11
|
+
|
12
|
+
describe "where there no annotations with missing IDs" do
|
13
|
+
it{ should return_no_errors_for [annotation_with_id(1), annotation_with_id(2)] }
|
14
|
+
end
|
15
|
+
|
16
|
+
describe "where there an annotation with a missing ID" do
|
17
|
+
errors = ["Annotations found with missing ID attribute"]
|
18
|
+
it{ should return_errors_for [Annotation.new.to_gff3_record], errors}
|
19
|
+
end
|
20
|
+
|
21
|
+
describe "where there are multiple annotations with missing IDs" do
|
22
|
+
errors = ["Annotations found with missing ID attribute"]
|
23
|
+
attns = [Annotation.new.to_gff3_record, Annotation.new.to_gff3_record]
|
24
|
+
it{ should return_errors_for attns, errors}
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'genomer-plugin-validate/validator/no_name_or_product'
|
3
|
+
|
4
|
+
describe GenomerPluginValidate::Validator::NoNameOrProduct do
|
5
|
+
|
6
|
+
subject{ described_class }
|
7
|
+
|
8
|
+
describe "where there are no annotations" do
|
9
|
+
it{ should return_no_errors_for [] }
|
10
|
+
end
|
11
|
+
|
12
|
+
describe "where an annotation has a Name attribute" do
|
13
|
+
attns = [annotation({:attributes => {'Name' => 'something'}})]
|
14
|
+
it{ should return_no_errors_for attns}
|
15
|
+
end
|
16
|
+
|
17
|
+
describe "where an annotation has a Product attribute" do
|
18
|
+
attns = [annotation({:attributes => {'product' => 'something'}})]
|
19
|
+
it{ should return_no_errors_for attns}
|
20
|
+
end
|
21
|
+
|
22
|
+
describe "where an annotation has neither a Name nor a product attribute" do
|
23
|
+
attns = [annotation({:attributes => {'ID' => '1'}})]
|
24
|
+
errors = ["No 'Name' or 'product' attribute for annotation '1'"]
|
25
|
+
it{ should return_errors_for attns, errors}
|
26
|
+
end
|
27
|
+
|
28
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'genomer-plugin-validate/validator/uppercase_name'
|
3
|
+
|
4
|
+
describe GenomerPluginValidate::Validator::UppercaseName do
|
5
|
+
|
6
|
+
subject{ described_class }
|
7
|
+
|
8
|
+
describe "where there are no annotations" do
|
9
|
+
it{ should return_no_errors_for [] }
|
10
|
+
end
|
11
|
+
|
12
|
+
describe "where there is an annotations with a lowercase name" do
|
13
|
+
it{ should return_no_errors_for [annotation(:attributes => {'Name' => 'something'})]}
|
14
|
+
end
|
15
|
+
|
16
|
+
describe "where there two annotations with an uppercase name" do
|
17
|
+
attns = [annotation(:attributes => {'Name' => 'Something', 'ID' => 1})]
|
18
|
+
errors = ["Illegal capitalised Name attribute 'Something' for '1'"]
|
19
|
+
it{ should return_errors_for attns, errors}
|
20
|
+
end
|
21
|
+
|
22
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'genomer-plugin-validate/validator/view_attributes'
|
3
|
+
|
4
|
+
describe GenomerPluginValidate::Validator::ViewAttributes do
|
5
|
+
|
6
|
+
subject{ described_class }
|
7
|
+
|
8
|
+
describe "where there are no annotations" do
|
9
|
+
it{ should return_no_errors_for [] }
|
10
|
+
end
|
11
|
+
|
12
|
+
describe "where there are capitalized attribute keys" do
|
13
|
+
attns = [annotation({:attributes => {'Unknown_term' => 'something'}})]
|
14
|
+
it{ should return_no_errors_for attns}
|
15
|
+
end
|
16
|
+
|
17
|
+
describe "where there are valid view attributes" do
|
18
|
+
attrs = %w|product ec_number function feature_type|
|
19
|
+
attrs.each do |attr|
|
20
|
+
attns = [annotation({:attributes => {attr => 'something'}})]
|
21
|
+
it{ should return_no_errors_for attns}
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
describe "where there is an unknown lower case attribute key" do
|
26
|
+
attns = [annotation({:attributes => {'unknown_term' => 'something','ID' => 1}})]
|
27
|
+
errors = ["Illegal view attribute 'unknown_term' for '1'"]
|
28
|
+
it{ should return_errors_for attns, errors}
|
29
|
+
end
|
30
|
+
|
31
|
+
end
|