genomer-plugin-validate 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. data/.gitignore +4 -0
  2. data/Gemfile +4 -0
  3. data/Rakefile +9 -0
  4. data/VERSION +1 -0
  5. data/features/annotations/bad-product-field.feature +91 -0
  6. data/features/annotations/command-line-interface.feature +19 -0
  7. data/features/annotations/duplicate_id.feature +144 -0
  8. data/features/annotations/identical_locations.feature +74 -0
  9. data/features/annotations/incorrect-attributes.feature +135 -0
  10. data/features/annotations/missing_attributes.feature +75 -0
  11. data/features/annotations/name.feature +40 -0
  12. data/features/command-line-interface.feature +36 -0
  13. data/features/support/env.rb +13 -0
  14. data/genomer-plugin-validate.gemspec +28 -0
  15. data/lib/extensions/string.rb +12 -0
  16. data/lib/genomer-plugin-validate.rb +33 -0
  17. data/lib/genomer-plugin-validate/group.rb +17 -0
  18. data/lib/genomer-plugin-validate/group/annotations.rb +20 -0
  19. data/lib/genomer-plugin-validate/validator.rb +27 -0
  20. data/lib/genomer-plugin-validate/validator/bad_product_field.rb +45 -0
  21. data/lib/genomer-plugin-validate/validator/duplicate_coordinates.rb +12 -0
  22. data/lib/genomer-plugin-validate/validator/duplicate_id.rb +11 -0
  23. data/lib/genomer-plugin-validate/validator/gff3_attributes.rb +16 -0
  24. data/lib/genomer-plugin-validate/validator/missing_id.rb +13 -0
  25. data/lib/genomer-plugin-validate/validator/no_name_or_product.rb +13 -0
  26. data/lib/genomer-plugin-validate/validator/uppercase_name.rb +13 -0
  27. data/lib/genomer-plugin-validate/validator/view_attributes.rb +16 -0
  28. data/man/genomer-validate.ronn +100 -0
  29. data/spec/genomer-plugin-validate/group/annotations_spec.rb +18 -0
  30. data/spec/genomer-plugin-validate/group_spec.rb +24 -0
  31. data/spec/genomer-plugin-validate/validator/bad_product_field_spec.rb +93 -0
  32. data/spec/genomer-plugin-validate/validator/duplicate_coordinates_spec.rb +24 -0
  33. data/spec/genomer-plugin-validate/validator/duplicate_id_spec.rb +34 -0
  34. data/spec/genomer-plugin-validate/validator/gff_attributes_spec.rb +32 -0
  35. data/spec/genomer-plugin-validate/validator/missing_id_spec.rb +27 -0
  36. data/spec/genomer-plugin-validate/validator/no_name_or_product_spec.rb +28 -0
  37. data/spec/genomer-plugin-validate/validator/uppercase_name_spec.rb +22 -0
  38. data/spec/genomer-plugin-validate/validator/view_attributes_spec.rb +31 -0
  39. data/spec/genomer-plugin-validate/validator_spec.rb +107 -0
  40. data/spec/genomer-plugin-validate_spec.rb +92 -0
  41. data/spec/spec_helper.rb +35 -0
  42. data/spec/validator_run_matcher.rb +25 -0
  43. metadata +244 -0
@@ -0,0 +1,16 @@
1
+ class GenomerPluginValidate::Validator::ViewAttributes < Genomer::Plugin
2
+
3
+ def valid_view_attributes
4
+ %w|product ec_number function feature_type|
5
+ end
6
+
7
+ def run
8
+ annotations.
9
+ map{|attn| attn.attributes.map{|(k,v)| [k,attn] }}.
10
+ flatten(1).
11
+ select{|(term,_)| term =~ (/^[a-z]/) }.
12
+ reject{|(term,_)| valid_view_attributes.include? term }.
13
+ map{|(term,attn)| "Illegal view attribute '#{term}' for '#{attn.id}'"}
14
+ end
15
+
16
+ end
@@ -0,0 +1,100 @@
1
+ genomer-validate(1) -- Test assembly files for errors
2
+ =====================================================
3
+
4
+ ## SYNOPSIS
5
+
6
+ `genomer validate` <type>
7
+
8
+ ## DESCRIPTION
9
+
10
+ **Genomer-validate** tests the assembly files for errors. Any detected errors
11
+ are reported to the terminal.
12
+
13
+ ## OPTIONS
14
+
15
+ * `--no_view_validations`
16
+ The GFF ninth-column attributes are validated to ensure that all attributes
17
+ beginning with a lower case character match those used by
18
+ genomer-plugin-view to generate GenBank annotation table output. This
19
+ argument can be used to disable these validations.
20
+
21
+ ## FILE TYPES
22
+
23
+ ### ANNOTATIONS
24
+
25
+ Test the annotation file for errors. The default location for this file is
26
+ **assembly/annotations.gff**. The following validations are performed:
27
+
28
+ * `Identical locations`:
29
+ Flag annotations of the same type with identical start and end coordinates.
30
+
31
+ * `Identical IDs`:
32
+ Flag two or more annotations identical ID attributes.
33
+
34
+ * `Missing ID`:
35
+ Flag annotations without an ID attribute.
36
+
37
+ * `Missing either Name or product`:
38
+ Flag annotations without either a Name or product attribute.
39
+
40
+ * `Capitalised Name attribute`:
41
+ Flag annotations with Name attributes beginning with an uppercase letter.
42
+
43
+ * `Invalid GFF attributes`:
44
+ Flag annotations with capitalised attribute keys outside of the GFF3
45
+ vocabulary. See below for a description of this vocabulary.
46
+
47
+ * `Invalid genomer plugin view attributes`:
48
+ Flag annotations with attribute keys outside of the genomer-plugin-view
49
+ vocabulary. This validation can be disabled using the `--validate_for_view`
50
+ command line flag.
51
+
52
+ * `Bad product fields`: NCBI requires that annotation product fields not
53
+ contain terms such as "putative" or end with "-like." This validator checks
54
+ that product fields do not contain the more common types of disallowed terms.
55
+
56
+ The [GFF3 specification][http://www.sequenceontology.org/gff3.shtml] restricts
57
+ attributes beginning with an upper case character (A-Z) in the ninth column to
58
+ a controlled vocabulary. Any attribute keys appearing in the ninth column not
59
+ included in this vocabulary will raise a validation error. The allowed keys are
60
+ listed below. The ID field is considered mandatory in this software and will
61
+ raise an error if not present.
62
+
63
+ * ID
64
+ * Name
65
+ * Note
66
+ * Alias
67
+ * Parent
68
+ * Target
69
+ * Gap
70
+ * Derives\_from
71
+ * Dbxref
72
+ * Ontology\_term
73
+ * Is\_circular
74
+
75
+ Any fields beginning with a lower case character (a-z) are allowed in the GFF3
76
+ ninth column. The genomer-plugin-view however uses these lower case fields to
77
+ populate a GenBank annotation table and a validation is performed for this.
78
+ The optional `--no_view_validations` flag can be used to disable this
79
+ validation. The allowed genomer-plugin-view attribute keys are as follows:
80
+
81
+ * product
82
+ * ec\_number
83
+ * function
84
+ * feature\_type
85
+
86
+ ## EXAMPLES
87
+
88
+ Test the annotation file for errors.
89
+
90
+ $ genomer validate annotations
91
+
92
+ ## BUGS
93
+
94
+ **Genomer-validate** is written in Ruby and depends on the genomer gem. See the
95
+ Gemfile in the genomer-plugin-validate gem install directory for version
96
+ details.
97
+
98
+ ## COPYRIGHT
99
+
100
+ **Genomer** is Copyright (C) 2012 Michael Barton <http://michaelbarton.me.uk>
@@ -0,0 +1,18 @@
1
+ require 'spec_helper'
2
+ require 'genomer-plugin-validate/group/annotations'
3
+
4
+ describe GenomerPluginValidate::Group::Annotations do
5
+
6
+ its("class.description"){ should == "Validate GFF3 annotations file" }
7
+
8
+ its("class.validators"){ should be_instance_of Array }
9
+ its("class.validators"){ should include :duplicate_id }
10
+ its("class.validators"){ should include :missing_id }
11
+ its("class.validators"){ should include :no_name_or_product }
12
+ its("class.validators"){ should include :gff3_attributes }
13
+ its("class.validators"){ should include :view_attributes }
14
+ its("class.validators"){ should include :duplicate_coordinates }
15
+ its("class.validators"){ should include :uppercase_name }
16
+ its("class.validators"){ should include :bad_product_field }
17
+
18
+ end
@@ -0,0 +1,24 @@
1
+ require 'spec_helper'
2
+
3
+ describe GenomerPluginValidate::Group do
4
+
5
+ before do
6
+ @example = GenomerPluginValidate::Group::Example = Class.new
7
+ stub(described_class).require(anything)
8
+ end
9
+
10
+ after do
11
+ GenomerPluginValidate::Group.send(:remove_const,'Example')
12
+ end
13
+
14
+ describe "#groups" do
15
+
16
+ subject do
17
+ described_class.groups
18
+ end
19
+
20
+ its(['example']){should == @example}
21
+
22
+ end
23
+
24
+ end
@@ -0,0 +1,93 @@
1
+ require 'spec_helper'
2
+ require 'genomer-plugin-validate/validator/bad_product_field'
3
+
4
+ describe GenomerPluginValidate::Validator::BadProductField do
5
+
6
+ subject{ described_class }
7
+
8
+ describe "where there are no annotations" do
9
+ it{ should return_no_errors_for [] }
10
+ end
11
+
12
+ describe "where there is valid annotation product field" do
13
+ it{ should return_no_errors_for [annotation_with_product("transmembrane protein")]}
14
+ end
15
+
16
+ describe "where the product field begins with hypothetical" do
17
+ it do
18
+ should return_errors_for(
19
+ [annotation_with_product("hypothetical something")],
20
+ ["Bad product field for '1:' start with 'putative' instead of 'hypothetical.'"]
21
+ )
22
+ end
23
+
24
+ it do
25
+ should return_errors_for(
26
+ [annotation_with_product("Hypothetical somthing")],
27
+ ["Bad product field for '1:' start with 'putative' instead of 'hypothetical.'"]
28
+ )
29
+ end
30
+ end
31
+
32
+ describe "where the product field contains only 'hypothetical protein'" do
33
+ it{ should return_no_errors_for [annotation_with_product("hypothetical protein")]}
34
+ it{ should return_no_errors_for [annotation_with_product("Hypothetical protein.")]}
35
+ end
36
+
37
+ describe "where the product ends with with like/domain/related" do
38
+ start = "Bad product field for '1:' "
39
+ ['like','related','domain'].each do |word|
40
+ [word, word.capitalize].each do |cased|
41
+ [' '+cased, '-'+cased].each do |prefixed|
42
+ [prefixed+'.', prefixed].each do |formatted|
43
+ it do
44
+ should return_errors_for(
45
+ [annotation_with_product("membrane" + formatted)],
46
+ [start + "products ending with '#{word}' are not allowed."]
47
+ )
48
+ end
49
+ end
50
+ end
51
+ end
52
+ end
53
+ end
54
+
55
+ describe "where the product field contains n-term" do
56
+ it do
57
+ should return_errors_for(
58
+ [annotation_with_product("n-term membrane region")],
59
+ ["Bad product field for '1:' 'N-terminal' or variations are not allowed."]
60
+ )
61
+ end
62
+
63
+ it do
64
+ should return_errors_for(
65
+ [annotation_with_product("N terminal membrane region")],
66
+ ["Bad product field for '1:' 'N-terminal' or variations are not allowed."]
67
+ )
68
+ end
69
+ end
70
+
71
+ describe "where the product field contains similiar to 'n terminal'" do
72
+ it do
73
+ should return_no_errors_for [annotation_with_product("transcription termination")]
74
+ end
75
+ end
76
+
77
+ describe "where the product field is in all caps" do
78
+ it do
79
+ should return_errors_for(
80
+ [annotation_with_product("PROTEIN")],
81
+ ["Bad product field for '1:' all caps product fields are not allowed."]
82
+ )
83
+ end
84
+
85
+ it do
86
+ should return_errors_for(
87
+ [annotation_with_product("MEMBRANE PROTEIN")],
88
+ ["Bad product field for '1:' all caps product fields are not allowed."]
89
+ )
90
+ end
91
+ end
92
+
93
+ end
@@ -0,0 +1,24 @@
1
+ require 'spec_helper'
2
+ require 'genomer-plugin-validate/validator/duplicate_coordinates'
3
+
4
+ describe GenomerPluginValidate::Validator::DuplicateCoordinates do
5
+
6
+ subject{ described_class }
7
+
8
+ describe "where there are no annotations" do
9
+ it{ should return_no_errors_for [] }
10
+ end
11
+
12
+ describe "where there are two annotations with different coordinates" do
13
+ attns = [annotation(:start => 1, :end => 3), annotation(:start => 4, :end => 6)]
14
+ it{ should return_no_errors_for attns}
15
+ end
16
+
17
+ describe "where there are two annotations with the same coordinates" do
18
+ attns = [annotation(:start => 1, :end => 3, :attributes => {'ID' => '1'}),
19
+ annotation(:start => 1, :end => 3, :attributes => {'ID' => '2'})]
20
+ errors = ["Identical locations for '1', '2'"]
21
+ it{ should return_errors_for attns, errors}
22
+ end
23
+
24
+ end
@@ -0,0 +1,34 @@
1
+ require 'spec_helper'
2
+ require 'genomer-plugin-validate/validator/duplicate_id'
3
+
4
+ describe GenomerPluginValidate::Validator::DuplicateID do
5
+
6
+ subject{ described_class }
7
+
8
+ describe "where there are no annotations" do
9
+ it{ should return_no_errors_for [] }
10
+ end
11
+
12
+ describe "where there are two annotations with different IDs" do
13
+ it{ should return_no_errors_for [annotation_with_id(1), annotation_with_id(2)]}
14
+ end
15
+
16
+ describe "where there two annotations with missing IDs" do
17
+ attns = [Annotation.new.to_gff3_record, Annotation.new.to_gff3_record]
18
+ it{ should return_no_errors_for attns}
19
+ end
20
+
21
+ describe "where there are two annotations with the same ID" do
22
+ attns = [annotation_with_id(1), annotation_with_id(1)]
23
+ errors = ["Duplicate ID '1'"]
24
+ it{ should return_errors_for attns, errors}
25
+ end
26
+
27
+ describe "where there are two sets of annotations with the same ID" do
28
+ attns = [annotation_with_id(1), annotation_with_id(1),
29
+ annotation_with_id(2), annotation_with_id(2)]
30
+ errors = ["Duplicate ID '1'", "Duplicate ID '2'"]
31
+ it{ should return_errors_for attns, errors}
32
+ end
33
+
34
+ end
@@ -0,0 +1,32 @@
1
+ require 'spec_helper'
2
+ require 'genomer-plugin-validate/validator/gff3_attributes'
3
+
4
+ describe GenomerPluginValidate::Validator::Gff3Attributes do
5
+
6
+ subject{ described_class }
7
+
8
+ describe "where there are no annotations" do
9
+ it{ should return_no_errors_for [] }
10
+ end
11
+
12
+ describe "where there are valid GFF3 attributes" do
13
+ attrs = %w|ID Name Alias Parent Target Gap Derives_from
14
+ Note Dbxref Ontology_term Is_circular|
15
+ attrs.each do |attr|
16
+ attns = [annotation({:attributes => {attr => 'something'}})]
17
+ it{ should return_no_errors_for attns}
18
+ end
19
+ end
20
+
21
+ describe "where there are lower case attribute keys" do
22
+ attns = [annotation({:attributes => {'unknown_term' => 'something'}})]
23
+ it{ should return_no_errors_for attns}
24
+ end
25
+
26
+ describe "where there is an unknown capitalised attribute key" do
27
+ attns = [annotation({:attributes => {'Unknown_term' => 'something','ID' => 1}})]
28
+ errors = ["Illegal GFF3 attribute 'Unknown_term' for '1'"]
29
+ it{ should return_errors_for attns, errors}
30
+ end
31
+
32
+ end
@@ -0,0 +1,27 @@
1
+ require 'spec_helper'
2
+ require 'genomer-plugin-validate/validator/missing_id'
3
+
4
+ describe GenomerPluginValidate::Validator::MissingID do
5
+
6
+ subject{ described_class }
7
+
8
+ describe "where there are no annotations" do
9
+ it{ should return_no_errors_for [] }
10
+ end
11
+
12
+ describe "where there no annotations with missing IDs" do
13
+ it{ should return_no_errors_for [annotation_with_id(1), annotation_with_id(2)] }
14
+ end
15
+
16
+ describe "where there an annotation with a missing ID" do
17
+ errors = ["Annotations found with missing ID attribute"]
18
+ it{ should return_errors_for [Annotation.new.to_gff3_record], errors}
19
+ end
20
+
21
+ describe "where there are multiple annotations with missing IDs" do
22
+ errors = ["Annotations found with missing ID attribute"]
23
+ attns = [Annotation.new.to_gff3_record, Annotation.new.to_gff3_record]
24
+ it{ should return_errors_for attns, errors}
25
+ end
26
+
27
+ end
@@ -0,0 +1,28 @@
1
+ require 'spec_helper'
2
+ require 'genomer-plugin-validate/validator/no_name_or_product'
3
+
4
+ describe GenomerPluginValidate::Validator::NoNameOrProduct do
5
+
6
+ subject{ described_class }
7
+
8
+ describe "where there are no annotations" do
9
+ it{ should return_no_errors_for [] }
10
+ end
11
+
12
+ describe "where an annotation has a Name attribute" do
13
+ attns = [annotation({:attributes => {'Name' => 'something'}})]
14
+ it{ should return_no_errors_for attns}
15
+ end
16
+
17
+ describe "where an annotation has a Product attribute" do
18
+ attns = [annotation({:attributes => {'product' => 'something'}})]
19
+ it{ should return_no_errors_for attns}
20
+ end
21
+
22
+ describe "where an annotation has neither a Name nor a product attribute" do
23
+ attns = [annotation({:attributes => {'ID' => '1'}})]
24
+ errors = ["No 'Name' or 'product' attribute for annotation '1'"]
25
+ it{ should return_errors_for attns, errors}
26
+ end
27
+
28
+ end
@@ -0,0 +1,22 @@
1
+ require 'spec_helper'
2
+ require 'genomer-plugin-validate/validator/uppercase_name'
3
+
4
+ describe GenomerPluginValidate::Validator::UppercaseName do
5
+
6
+ subject{ described_class }
7
+
8
+ describe "where there are no annotations" do
9
+ it{ should return_no_errors_for [] }
10
+ end
11
+
12
+ describe "where there is an annotations with a lowercase name" do
13
+ it{ should return_no_errors_for [annotation(:attributes => {'Name' => 'something'})]}
14
+ end
15
+
16
+ describe "where there two annotations with an uppercase name" do
17
+ attns = [annotation(:attributes => {'Name' => 'Something', 'ID' => 1})]
18
+ errors = ["Illegal capitalised Name attribute 'Something' for '1'"]
19
+ it{ should return_errors_for attns, errors}
20
+ end
21
+
22
+ end
@@ -0,0 +1,31 @@
1
+ require 'spec_helper'
2
+ require 'genomer-plugin-validate/validator/view_attributes'
3
+
4
+ describe GenomerPluginValidate::Validator::ViewAttributes do
5
+
6
+ subject{ described_class }
7
+
8
+ describe "where there are no annotations" do
9
+ it{ should return_no_errors_for [] }
10
+ end
11
+
12
+ describe "where there are capitalized attribute keys" do
13
+ attns = [annotation({:attributes => {'Unknown_term' => 'something'}})]
14
+ it{ should return_no_errors_for attns}
15
+ end
16
+
17
+ describe "where there are valid view attributes" do
18
+ attrs = %w|product ec_number function feature_type|
19
+ attrs.each do |attr|
20
+ attns = [annotation({:attributes => {attr => 'something'}})]
21
+ it{ should return_no_errors_for attns}
22
+ end
23
+ end
24
+
25
+ describe "where there is an unknown lower case attribute key" do
26
+ attns = [annotation({:attributes => {'unknown_term' => 'something','ID' => 1}})]
27
+ errors = ["Illegal view attribute 'unknown_term' for '1'"]
28
+ it{ should return_errors_for attns, errors}
29
+ end
30
+
31
+ end