fasta_read 1.1.0 → 2.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 9d46389f8c604841492cdfc937c0ac0c4ac69082
4
- data.tar.gz: 79e57cab5fe9d2e5325eb8a8aa9e05ec7393e821
3
+ metadata.gz: 0610fba4a055d83670705afed69fd6a98603749b
4
+ data.tar.gz: 5203a61fa111d3c29f73fc4396cec5f3d9a43070
5
5
  SHA512:
6
- metadata.gz: 06d11757e322c49a9c9607f37212e2dfe4350f9cfbb9498d29aecf89f672b6e7dc6dd6d2c2ef5ea4959800f3745a1a9dc9b0da5f0e5aa63467ecdba08b849795
7
- data.tar.gz: a98f489a0d17f7123af3411578c27f716a5c7660f0bdb94adf55f025e337ac0796ebb836dcc44754125327c3f6087a3f75f97a482fe3ff07646b28b9ead4f2eb
6
+ metadata.gz: a6bf8413263aeab223c0afd9e3e52a491d7c12c3f007438bb4d8ee4d39c19b626ef6275831f658ef2ab8203b8a7818c9e62626e7e275e2b2ab544f7c819dbf60
7
+ data.tar.gz: a1d0463b0a048cbfbce9b9196f0cce7ca73f94737e226092efe3db9ae91a74fb35f4d862aa3f747bfc4646d3c0d32e48e79a0e2782dac8dbaa7d073b49c18f7e
data/.travis.yml CHANGED
@@ -1,4 +1,4 @@
1
1
  language: ruby
2
2
  rvm:
3
- - 2.1.0
3
+ - 2.1.1
4
4
  - 1.9.3
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- fasta_read (1.1.0)
4
+ fasta_read (2.0.1)
5
5
  methadone (>= 1.3.2)
6
6
  rake (>= 0.9.2)
7
7
 
data/README.md CHANGED
@@ -24,7 +24,7 @@ Or install it yourself as:
24
24
  ## Example
25
25
 
26
26
  fasta_read hg19 chr12 112123514 112123790 --output=out.txt
27
-
27
+
28
28
  Options:
29
29
  -h, --help Show command line help
30
30
  -o, --output OUTPUTFILE outputs sequence to a file
@@ -39,7 +39,7 @@ Or install it yourself as:
39
39
  assembly
40
40
  assembly name (hg19, mm10, etc.)
41
41
  chromosome
42
- id of chromosome (1-22 or X/Y)
42
+ id of chromosome (chr1-chr22 or chrx/chry)
43
43
  cstart
44
44
  start coordinate (inclusive) within the chromosome
45
45
  cend
@@ -51,30 +51,40 @@ stdout: Extracted sequence (only)
51
51
 
52
52
  stderr: Any errors.
53
53
 
54
- using --output option export the sequence to a file
54
+ using --output option exports the sequence to a file
55
55
 
56
56
  ## Supporting Requirements
57
57
 
58
58
  The program depends on being run at the top of a directory tree containing .fa files. The .fa files should be of the form where each file maps to a single chomosome.
59
-
60
59
  The directory tree will have separate branches for SNPs and unmasked files.
61
60
 
62
- /fasta/hg19/unmasked/chr1.fa
63
- /fasta/hg19/unmasked/chr2.fa
64
- /fasta/hg19/unmasked/chr3.fa
65
- ........
66
- /fasta/hg19/snp/chr1.subst.fa
67
- /fasta/hg19/snp/chr2.subst.fa
68
- /fasta/hg19/snp/chr3.subst.fa
69
- .......
70
- /fasta/mm10/unmasked/chr1.fa
71
- /fasta/mm10/unmasked/chr2.fa
72
- /fasta/mm10/unmasked/chr3.fa
73
- ........
74
- /fasta/mm10/snp/chr1.subst.fa
75
- /fasta/mm10/snp/chr2.subst.fa
76
- /fasta/mm10/snp/chr3.subst.fa
77
- .......
61
+ For example, provided the following tree the command expects to be run inside the 'fasta' directory:
62
+
63
+ fasta
64
+ ├── hg19
65
+ │   ├── snp
66
+ │   │   ├── chr1.subst.fa
67
+ | │   ├── chr2.subst.fa
68
+ | │   ├── chr3.subst.fa
69
+ | │   └── ....
70
+ │   └── unmasked
71
+ │      ├── chr1.fa
72
+ |    ├── chr2.fa
73
+ |    ├── chr3.fa
74
+ |    └── ....
75
+ └── mm10
76
+ ├── snp
77
+ │   ├── chr1.subst.fa
78
+ │   ├── chr2.subst.fa
79
+ │   ├── chr3.subst.fa
80
+ │   └── ....
81
+ └── unmasked
82
+       ├── chr1.fa
83
+    ├── chr2.fa
84
+    ├── chr3.fa
85
+    └── ....
86
+
87
+ Fasta files can be downloaded at:
78
88
 
79
89
  http://hgdownload.cse.ucsc.edu/goldenPath/hg19/chromosomes/
80
90
  http://hgdownload.cse.ucsc.edu/goldenPath/hg19/snp138Mask/
data/bin/fasta_read CHANGED
@@ -9,12 +9,10 @@ class App
9
9
  include Methadone::CLILogging
10
10
 
11
11
  main do |assembly, chromosome, cstart, cend|
12
- snp = masked_unmasked(options)[0]
13
- subst = masked_unmasked(options)[1]
14
- path = "fasta/#{assembly}/#{snp}/chr#{chromosome + subst}.fa"
12
+ path = "#{assembly}/#{snp_or_unmasked(options)[0]}/#{chromosome + snp_or_unmasked(options)[1]}.fa"
15
13
  exit_now_with!(assembly_or_chromosome(assembly, chromosome)) unless File.exist?(path)
16
14
  fasta = IO.read(path)
17
- debug("About to read #{assembly} #{snp} chr#{chromosome}:#{cstart}-#{cend}")
15
+ debug("About to read #{assembly} #{snp_or_unmasked(options)[0]} chr#{chromosome}:#{cstart}-#{cend}")
18
16
  sequence = FastaRead::Sequence.new(fasta, chromosome, cstart, cend).process
19
17
  if options[:output]
20
18
  File.open(options[:output], "w") do |file|
@@ -26,12 +24,12 @@ class App
26
24
  info "#{sequence.length} base pairs"
27
25
  end
28
26
 
29
- def self.masked_unmasked(options)
27
+ def self.snp_or_unmasked(options)
30
28
  options[:snp] ? ["snp", ".subst"] : ["unmasked", ""]
31
29
  end
32
30
 
33
31
  def self.assembly_or_chromosome(assembly, chromosome)
34
- Dir.exist?("fasta/#{assembly}") ? [chromosome, "chromosome"] : [assembly, "assembly"]
32
+ Dir.exist?("#{assembly}") ? [chromosome, "chromosome"] : [assembly, "assembly"]
35
33
  end
36
34
 
37
35
  def self.exit_now_with!(value_and_argument)
@@ -41,17 +39,47 @@ class App
41
39
 
42
40
  # Declare command-line interface here
43
41
 
44
- description "Extract DNA Fasta sequence from assembly files."
42
+ description <<-EOF
43
+ Extract DNA Fasta sequence from assembly files.
44
+
45
+ The program depends on being run at the top of a directory tree containing .fa files. The .fa files should be of the form where each file maps to a single chomosome.
46
+ The directory tree will have separate branches for SNPs and unmasked files.
47
+
48
+ For example, provided the following tree the command expects to be run inside the 'fasta' directory:
49
+
50
+ └── fasta
51
+ ├── hg19
52
+ │   ├── snp
53
+ │   │   ├── chr1.subst.fa
54
+ | │   ├── chr2.subst.fa
55
+ | │   ├── chr3.subst.fa
56
+ | │   └── ....
57
+ │   └── unmasked
58
+ │      ├── chr1.fa
59
+ |    ├── chr2.fa
60
+ |    ├── chr3.fa
61
+ |    └── ....
62
+ └── mm10
63
+ ├── snp
64
+ │   ├── chr1.subst.fa
65
+ │   ├── chr2.subst.fa
66
+ │   ├── chr3.subst.fa
67
+ │   └── ....
68
+ └── unmasked
69
+       ├── chr1.fa
70
+    ├── chr2.fa
71
+    ├── chr3.fa
72
+    └── ....
73
+ EOF
74
+
45
75
  #
46
76
  # Accept flags via:
47
77
  # on("--flag VAL","Some flag")
48
78
  # options[flag] will contain VAL
49
- options['multiple-lines'] = true
50
79
  #
51
80
  # Specify switches via:
52
81
  on "-o OUTPUTFILE", "--output", "outputs sequence to a file"
53
82
  on "--snp", "return the sequence from the SNP-masked assembly"
54
- on "--multiple-lines", "return the sequence from the SNP-masked assembly"
55
83
 
56
84
  # on("--[no-]switch","Some switch")
57
85
  #
@@ -59,7 +87,7 @@ class App
59
87
  #
60
88
  # Require an argument
61
89
  arg :assembly, "assembly name (hg19, mm10, etc.)"
62
- arg :chromosome, "id of chromosome (1-22 or X/Y)"
90
+ arg :chromosome, "id of chromosome (chr1-chr22 or chrx/chry)"
63
91
  arg :cstart, "start coordinate (inclusive) within the chromosome"
64
92
  arg :cend, "end coordinate within the chromosome"
65
93
  #
@@ -4,7 +4,7 @@ Feature: My bootstrapped app kinda works
4
4
  So I don't have to do it myself
5
5
 
6
6
  Background:
7
- Given a file named "fasta/hg19/unmasked/chr1.fa" with:
7
+ Given a file named "hg19/unmasked/chr1.fa" with:
8
8
  """
9
9
  >chr1
10
10
  AATCACACGTGCAGGAACCCTTTTCCAAAGGAGGGTCACGCTCACAGCTT
@@ -13,7 +13,7 @@ Feature: My bootstrapped app kinda works
13
13
  GTCTGTCTCCACCAGCACTTTGTGGGTGGGCTCTGTCCCCAGGAAATGCT
14
14
  C
15
15
  """
16
- And a file named "fasta/hg19/snp/chr1.subst.fa" with:
16
+ And a file named "hg19/snp/chr1.subst.fa" with:
17
17
  """
18
18
  AATCACACGTGCAGGAACCCTTTTCCAAAGGAGGGTCACGCTCACAGCTT
19
19
  CTGGAGTAGGACATGGACTTGTCTTTTTGGAGGCCCATCCTCAACGCACC
@@ -26,7 +26,6 @@ Feature: My bootstrapped app kinda works
26
26
  When I get help for "fasta_read"
27
27
  Then the exit status should be 0
28
28
  And the banner should be present
29
- And there should be a one line summary of what the app does
30
29
  And the banner should include the version
31
30
  And the banner should document that this app takes options
32
31
  And the following options should be documented:
@@ -41,7 +40,7 @@ Feature: My bootstrapped app kinda works
41
40
  |cend |which is required|
42
41
 
43
42
  Scenario Outline: files with one line stream
44
- Given a file named "fasta/hg19/unmasked/chr1.fa" with:
43
+ Given a file named "hg19/unmasked/chr1.fa" with:
45
44
  """
46
45
  >chr1
47
46
  AATCACACGTGCAGGAACCCTTTTCCAAAGGAGGGTCACGCTCACAGCTT
@@ -50,7 +49,7 @@ Feature: My bootstrapped app kinda works
50
49
  GTCTGTCTCCACCAGCACTTTGTGGGTGGGCTCTGTCCCCAGGAAATGCT
51
50
  C
52
51
  """
53
- And a file named "fasta/hg19/snp/chr1.subst.fa" with:
52
+ And a file named "hg19/snp/chr1.subst.fa" with:
54
53
  """
55
54
  >chr1
56
55
  AATCACACGTGCAGGAACCCTTTTCCAAAGGAGGGTCACGCTCACAGCTT
@@ -61,17 +60,16 @@ Feature: My bootstrapped app kinda works
61
60
  """
62
61
  When I successfully run `fasta_read <options>`
63
62
  Then the output should contain "<output>"
64
- Then the output should not contain "<noutput>"
65
63
 
66
64
  Scenarios: unmasked
67
- |options |output |noutput|
68
- |hg19 1 0 200|AATCACACGTGCAGGAACCCTTTTCCAAAGGAGGGTCACGCTCACAGCTTCTGGAGTAGGACATGGACTTGTCTTTTTGGAGGCCCATCCTCAACGCACCACAGTTGACTACATCAAGGTCTGCCTCTGATCTGGTGGGAGTGCTGGGTGGTCTGTCTCCACCAGCACTTTGTGGGTGGGCTCTGTCCCCAGGAAATGCT|ccaga |
65
+ |options |output |
66
+ |hg19 chr1 0 200|AATCACACGTGCAGGAACCCTTTTCCAAAGGAGGGTCACGCTCACAGCTTCTGGAGTAGGACATGGACTTGTCTTTTTGGAGGCCCATCCTCAACGCACCACAGTTGACTACATCAAGGTCTGCCTCTGATCTGGTGGGAGTGCTGGGTGGTCTGTCTCCACCAGCACTTTGTGGGTGGGCTCTGTCCCCAGGAAATGCT|
69
67
  Scenarios: snp
70
- |options |output |noutput|
71
- |hg19 1 0 200 --snp|AATCACACGTGCAGGAACCCTTTTCCAAAGGAGGGTCACGCTCACAGCTTCTGGAGTAGGACATGGACTTGTCTTTTTGGAGGCCCATCCTCAACGCACCACAGTTGACTACATCAAGGTCTGCCTCTGATCTGGTGGGAGTGCTGGGTGGTCTGTCTCCACCAGCACTTTGTGGGTGGGCTCTGTCCCCAGGAAATGCT|ccaga |
68
+ |options |output |
69
+ |hg19 chr1 0 200 --snp|AATCACACGTGCAGGAACCCTTTTCCAAAGGAGGGTCACGCTCACAGCTTCTGGAGTAGGACATGGACTTGTCTTTTTGGAGGCCCATCCTCAACGCACCACAGTTGACTACATCAAGGTCTGCCTCTGATCTGGTGGGAGTGCTGGGTGGTCTGTCTCCACCAGCACTTTGTGGGTGGGCTCTGTCCCCAGGAAATGCT|
72
70
 
73
71
  Scenario: Export to file
74
- When I successfully run `fasta_read hg19 1 0 200 --output=out.txt`
72
+ When I successfully run `fasta_read hg19 chr1 0 200 --output=out.txt`
75
73
  Then the file "out.txt" should contain:
76
74
  """
77
75
  AATCACACGTGCAGGAACCCTTTTCCAAAGGAGGGTCACGCTCACAGCTTCTGGAGTAGGACATGGACTTGTCTTTTTGGAGGCCCATCCTCAACGCACCACAGTTGACTACATCAAGGTCTGCCTCTGATCTGGTGGGAGTGCTGGGTGGTCTGTCTCCACCAGCACTTTGTGGGTGGGCTCTGTCCCCAGGAAATGCT
@@ -82,5 +80,5 @@ Feature: My bootstrapped app kinda works
82
80
  Then the stderr should contain "<output>"
83
81
  Scenarios: incorrect assembly/chromosome
84
82
  |options |output |
85
- |foo 12 0 3 --log-level=debug |the 'foo' assembly doesn't exist in directory structure |
86
- |hg19 99 0 3 --log-level=debug|the '99' chromosome doesn't exist in directory structure|
83
+ |foo chr12 0 3 --log-level=debug |the 'foo' assembly doesn't exist in directory structure |
84
+ |hg19 chr99 0 3 --log-level=debug|the 'chr99' chromosome doesn't exist in directory structure|
@@ -5,11 +5,9 @@ module FastaRead
5
5
  include Methadone::Main
6
6
  include Methadone::CLILogging
7
7
 
8
- attr_reader :separate_lines
9
-
10
8
  def initialize(fasta, chromosome, cstart, cend)
11
9
  @fasta = fasta.gsub("\n", "")
12
- @chromosome = ">chr#{chromosome}"
10
+ @chromosome = ">#{chromosome}"
13
11
  @cstart = cstart.to_i
14
12
  @cend = cend.to_i
15
13
  end
@@ -1,3 +1,3 @@
1
1
  module FastaRead
2
- VERSION = "1.1.0"
2
+ VERSION = "2.0.1"
3
3
  end
@@ -5,13 +5,13 @@ module FastaRead
5
5
  let(:fasta) {">chr1\nAATCACACGTGCAGGAACCCTTTTCCAAAGGAGGGTCACGCTCACAGCTT\nCTGGAGTAGGACATGGACTTGTCTTTTTGGAGGCCCATCCTCAACGCACC\nACAGTTGACTACATCAAGGTCTGCCTCTGATCTGGTGGGAGTGCTGGGTG\nGTCTGTCTCCACCAGCACTTTGTGGGTGGGCTCTGTCCCCAGGAAATGCT\nC"}
6
6
  describe "#real_cstart" do
7
7
  context "when @cstart is > 0" do
8
- subject {Sequence.new(fasta, "12", "5", "200").real_cstart}
8
+ subject {Sequence.new(fasta, "chr12", "5", "200").real_cstart}
9
9
  it "should add file header string length" do
10
10
  expect(subject).to eq 10
11
11
  end
12
12
  end
13
13
  context "when @cstart is 0" do
14
- subject {Sequence.new(fasta, "1", "0", "200").real_cstart}
14
+ subject {Sequence.new(fasta, "chr1", "0", "200").real_cstart}
15
15
  it "should not subtract 1" do
16
16
  expect(subject).to eq 5
17
17
  end
@@ -19,14 +19,14 @@ module FastaRead
19
19
  end
20
20
 
21
21
  describe "#real_cend" do
22
- subject {Sequence.new(fasta, "1", "0", "200").real_cend}
22
+ subject {Sequence.new(fasta, "chr1", "0", "200").real_cend}
23
23
  it "should subtract 1 and add file header string length" do
24
24
  expect(subject).to eq 204
25
25
  end
26
26
  end
27
27
 
28
28
  describe "#process" do
29
- subject {Sequence.new(fasta, "1", "0", "200").process}
29
+ subject {Sequence.new(fasta, "chr1", "0", "200").process}
30
30
  describe "with a continuous character stream" do
31
31
  it "builds a character sequence" do
32
32
  expect(subject).to eq "AATCACACGTGCAGGAACCCTTTTCCAAAGGAGGGTCACGCTCACAGCTTCTGGAGTAGGACATGGACTTGTCTTTTTGGAGGCCCATCCTCAACGCACCACAGTTGACTACATCAAGGTCTGCCTCTGATCTGGTGGGAGTGCTGGGTGGTCTGTCTCCACCAGCACTTTGTGGGTGGGCTCTGTCCCCAGGAAATGCT"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fasta_read
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.0
4
+ version: 2.0.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrea D'Amico
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-04-05 00:00:00.000000000 Z
11
+ date: 2014-04-07 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: methadone