dna_sequence_aligner 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +8 -0
- data/History +10 -0
- data/LICENSE +22 -0
- data/README.rdoc +58 -0
- data/Rakefile +45 -0
- data/VERSION +1 -0
- data/bin/dna_sequence_aligner +61 -0
- data/bin/dna_translator.rb +59 -0
- data/lib/bio/alignment/dna_sequence.rb +313 -0
- data/older/align_all.rb +160 -0
- data/older/align_to_template.rb +143 -0
- data/reference/clustalw_opts.txt +73 -0
- data/script/fasta_compile_annotated.rb +19 -0
- data/spec/align_spec.rb +67 -0
- data/spec/spec_helper.rb +6 -0
- data/spec/testfiles/HA-mKSR1-KSRF1.txt +19 -0
- data/spec/testfiles/HA-mKSR1-KSRF2.txt +20 -0
- data/spec/testfiles/HA-mKSR1-KSRF3.txt +20 -0
- data/spec/testfiles/HA-mKSR1-KSRF4.txt +20 -0
- data/spec/testfiles/HA-mKSR1-KSRF5.txt +20 -0
- data/spec/testfiles/HA-mKSR1-OXL33.txt +20 -0
- data/spec/testfiles/KSR1_mouse_NM_013571_in_HA_pREX.ANNOTATED.fasta +77 -0
- data/spec/testfiles/testcase.fasta +55 -0
- metadata +99 -0
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
>100122-36_G07_HA-mKSR1-KSRF1.ab1 950 [cut-JTP]
|
2
|
+
GNNNNNCCAGAGATGCTGCGGCGCTGGGGGGCCAGCACGGAGGAGTGCAG
|
3
|
+
CCGCCTACAGCAAGCCCTTACCTGCCTTCGGAAGGTGACTGGCCTGGGAG
|
4
|
+
GGGAGCACAAAATGGACTCAGGTTGGAGTTCAACAGATGCTCGAGACAGT
|
5
|
+
AGCTTGGGGCCTCCCATGGACATGCTTTCCTCGCTGGGCAGAGCGGGTGC
|
6
|
+
CAGCACTCAGGGACCCCGTTCCATCTCCGTGTCCGCCCTGCCTGCCTCAG
|
7
|
+
ACTCTCCGGTCCCCGGCCTCAGTGAGGGCCTCTCGGACTCCTGTATCCCC
|
8
|
+
TTGCACACCAGCGGCCGGCTGACCCCCCGGGCCCTGCACAGCTTCATCAC
|
9
|
+
GCCCCCTACCACACCCCAGCTACGACGGCACGCCAAGCTGAAGCCACCAA
|
10
|
+
GGACACCCCCACCGCCAAGCCGCAAGGTCTTCCAGCTGCTCCCCAGCTTC
|
11
|
+
CCCACACTCACACGGAGCAAGTCCCACGAGTCCCAGCTGGGAAACCGAAT
|
12
|
+
CGACGACGTCACCCCGATGAAGTTTGAACTCCCTCATGGATCCCCACAGC
|
13
|
+
TGGTACGAAGGGATATCGGGCTCTCGGTGACGCACAGGTTCTCCACAAAG
|
14
|
+
TCATGGTTGTCACAGGTGTGCAACGTGTGCCAGAAGAGCATGATTTTTGG
|
15
|
+
CGTGAAGTGCAAACACTGCAGGTTAAAATGCCATAACAAGTGCACAAAGG
|
16
|
+
AAGCTCCCGCCTGCAGGATCACCTTCCTCCCACTGGCCAGGCTTCGGAGG
|
17
|
+
ACAGAGTCTGTCCCGTCAGATATCAACAACCCAGTGGACAGAGCAGCAGA
|
18
|
+
GCCCCATTTTGGAACCCTTCCCAAGGCCCTGACAAAGAAGGAGCACCCTC
|
19
|
+
CAGCCATGAACCTGGACTCCAGCAGCAACCCATCCTCC
|
@@ -0,0 +1,20 @@
|
|
1
|
+
>100122-36_I07_HA-mKSR1-KSRF2.ab1 950
|
2
|
+
NNTNANCGCANGTCTTCCAGCTGCTCCCCAGCTTCCCCACACTCACACGG
|
3
|
+
AGCAAGTCCCACGAGTCCCAGCTGGGAAACCGAATCGACGACGTCACCCC
|
4
|
+
GATGAAGTTTGAACTCCCTCATGGATCCCCACAGCTGGTACGAAGGGATA
|
5
|
+
TCGGGCTCTCGGTGACGCACAGGTTCTCCACAAAGTCATGGTTGTCACAG
|
6
|
+
GTGTGCAACGTGTGCCAGAAGAGCATGATTTTTGGCGTGAAGTGCAAACA
|
7
|
+
CTGCAGGTTAAAATGCCATAACAAGTGCACAAAGGAAGCTCCCGCCTGCA
|
8
|
+
GGATCACCTTCCTCCCACTGGCCAGGCTTCGGAGGACAGAGTCTGTCCCG
|
9
|
+
TCAGATATCAACAACCCAGTGGACAGAGCAGCAGAGCCCCATTTTGGAAC
|
10
|
+
CCTTCCCAAGGCCCTGACAAAGAAGGAGCACCCTCCAGCCATGAACCTGG
|
11
|
+
ACTCCAGCAGCAACCCATCCTCCACCACGTCCTCCACACCCTCATCGCCG
|
12
|
+
GCACCTTTCCTGACCTCATCTAATCCCTCCAGTGCCACCACGCCTCCCAA
|
13
|
+
CCCGTCACCTGGCCAGCGGGACAGCAGGTTCAGCTTCCCAGACATTTCAG
|
14
|
+
CCTGTTCTCAGGCAGCCCCGCTGTCCAGCACAGCCGACAGTACACGGCTC
|
15
|
+
GACGACCAGCCCAAAACAGATGTGCTAGGTGTTCACGAAGCAGAGGCTGA
|
16
|
+
GGAGCCTGAGGCTGGCAAGTCAGAGGCAGAGGATGACGAGGAGGATGAGG
|
17
|
+
TGGACGACCTCCCCAGCTCCCGCCGGCCCTGGAGGGGCCCCATCTCTCGA
|
18
|
+
AAGGCCAGCCAGACCAGCGTTTACCTGCAAGAGTGGGACATCCCCTTTGA
|
19
|
+
ACAGGTGGAACTGGGCGAGCCCATTGGACAGGGTCGCTGGGGCCGGGTGC
|
20
|
+
ACCGAGCCGTTGGCATGGCGAGGTGGNATTCGGCTGCTGGAGATGGANGN
|
@@ -0,0 +1,20 @@
|
|
1
|
+
>100122-36_K07_HA-mKSR1-KSRF3.ab1 950
|
2
|
+
NNNNNAGNCTGGGCANTCAGAGGCAGAGGANGACGAGGAGGATGAGGTGG
|
3
|
+
ACGACCTCCCCAGCTCCCGCCGGCCCTGGAGGGGCCCCATCTCTCGAAAG
|
4
|
+
GCCAGCCAGACCAGCGTTTACCTGCAAGAGTGGGACATCCCCTTTGAACA
|
5
|
+
GGTGGAACTGGGCGAGCCCATTGGACAGGGTCGCTGGGGCCGGGTGCACC
|
6
|
+
GAGGCCGTTGGCATGGCGAGGTGGCCATTCGGCTGCTGGAGATGGACGGC
|
7
|
+
CACAATCAGGACCACCTGAAGCTGTTCAAGAAAGAGGTGATGAACTACCG
|
8
|
+
GCAGACGCGGCATGAGAACGTGGTGCTCTTCATGGGGGCCTGCATGAACC
|
9
|
+
CACCTCACCTGGCCATTATCACCAGCTTCTGCAAGGGGCGGACATTGCAT
|
10
|
+
TCATTCGTGAGGGACCCCAAGACGTCTCTGGACATCAATAAGACTAGGCA
|
11
|
+
GATCGCCCAGGAGATCATCAAGGGCATGGGTTATCTTCATGCAAAAGGCA
|
12
|
+
TCGTGCACAAGGACCTCAAGTCCAAGAATGTCTTCTATGACAACGGCAAA
|
13
|
+
GTGGTCATCACAGACTTCGGGCTGTTTGGGATCTCGGGTGTGGTCCGAGA
|
14
|
+
GGAACGGCGCGAGAACCAACTGAAACTGTCACATGACTGGCTGTGCTACC
|
15
|
+
TGGCCCCCGAGATCGTACGAGAAATGATCCCAGGGCGGGACGAGGACCAG
|
16
|
+
CTGCCCTTCTCCAAAGCAGCCGATGTCTATGCATTCGGGACTGTGTGGTA
|
17
|
+
TGAACTACAGGCAAGAGACTGGCCCTTTAAGCACCAGCCTGCTGAGGCCT
|
18
|
+
TGATCTGGCAGATTGGAAGTGGGGAAGGAGTACGGCGCGTCCTGCATCCG
|
19
|
+
TCAGCCTGGGGAAGGAGTCGGCGAGATCCTGTCTGCCTGCTGGGCTTTCG
|
20
|
+
ATCTGCAGGAGAGACCCAGCTTCAGCCTGCTGATGGACATGCTGGAGAAG
|
@@ -0,0 +1,20 @@
|
|
1
|
+
>100122-36_M07_HA-mKSR1-KSRF4.ab1 950
|
2
|
+
CNNNNNAAANAAANTTGTTCCGGGGGGGGGAGGAGAANGGCTCCCTTTCT
|
3
|
+
CAAACCACCAAATTTTTATGCTTTGGGGAGGGGGGGGAAAAAACAACGGG
|
4
|
+
AAAAAGAGGGCCTTTTAAACCCCCCCCCTGGTGAGGTTTTTTTGGGAAAA
|
5
|
+
ATGGAAGGGGGGGAGGGAGTGGGGCGCGCCCTGCCCCCCGTCCGCGGGGG
|
6
|
+
AAAGGAAGGGGGAAAGTTCTTGTGTGCGGGGGGGTTTTTCTTTCGGGAGA
|
7
|
+
AAACCCCCTCTTCCCCCCTGTTGAGGAACTTGGGGAAGGGGCTCCCAAAG
|
8
|
+
CTGACCGGGCGCTTCCCCCCCCGGGGGCTTTTTGGAAGGTGGGCTAATTT
|
9
|
+
AAACACCAAAAAATTCTCCCCCCTTTTAAAAGGGTTGGCCGGGGGCCCCG
|
10
|
+
GGGACCCGGGAAACCAAAAATTGTGGGGCGCCCCCCACCAGGGGGCCCGG
|
11
|
+
GGTTTCGGGGGGGGCAATTTCCCCCCCTCCCCCCCCAACCCCTCCCCGCC
|
12
|
+
CGTTTTAATAATTTTGGGGGAAAACCCCGTTAGAAATAGGGCGGGGGTGT
|
13
|
+
TTTTTTTTTTTTTTTTTTTCCCCCCATTTGGCCTTTTTGGGAAAGGGGGG
|
14
|
+
GGGCCGGAAAACGGGCCCTTTTTTTTTGGAAAATTTTCTGGGGGTTTTTC
|
15
|
+
CCCCTCTCCAAAAAGAAATGCAGGGTTTGTAAATTGTCATGAAAAAATCT
|
16
|
+
TTTCCTCTGAAATTTTTTTAAAAAAAAACAACTTCTGTAGCGACTTTTTG
|
17
|
+
CAGGCAGCGAAACCCCCCCCTGGCGACGGGTCCTTCTGCGGAAAAACCCA
|
18
|
+
TGTGTAAAAATACNCGGGAAAGCGCGGNACACCNNNTCGCATGTNNTGAT
|
19
|
+
TTGTATATTNNAGAAAAAAAGAAAGTGTCTCTCCTCGAGCTTATTAAAGA
|
20
|
+
GGGGNTANGGGATGCCNAGAGGCCCCCNNTGTANNGNNTCTGTCGGGGGN
|
@@ -0,0 +1,20 @@
|
|
1
|
+
>100122-36_O07_HA-mKSR1-KSRF5.ab1 950
|
2
|
+
GGTTTTNTNACTGGCTGTGCTACCTGGCCCCGAGATCGTACGAGAAATGA
|
3
|
+
TCCCAGGGCGGGACGAGGACCAGCTGCCCTTCTCCAAAGCAGCCGATGTC
|
4
|
+
TATGCATTCGGGACTGTGTGGTATGAACTACAGGCAAGAGACTGGCCCTT
|
5
|
+
TAAGCACCAGCCTGCTGAGGCCTTGATCTGGCAGATTGGAAGTGGGGAAG
|
6
|
+
GAGTACGGCGCGTCCTGGCATCCGTCAGCCTGGGGAAGGAAGTCGGCGAG
|
7
|
+
ATCCTGTCTGCCTGCTGGGCTTTCGATCTGCAGGAGAGACCCAGCTTCAG
|
8
|
+
CCTGCTGATGGACATGCTGGAGAGGCTGCCCAAGCTGAACCGGCGGCTCT
|
9
|
+
CCCACCCTGGGCACTTTTGGAAGTCGGCTGACATTAACAGCAGCAAAGTC
|
10
|
+
ATGCCCCGCTTTGAAAGGTTTGGCCTGGGGACCCTGGAGTCCGGTAATCC
|
11
|
+
AAAGATGTAGTGCGGCCGCCAGCACAGTGGCCATGGCATCTAGGGCGGCC
|
12
|
+
AATTCCGCCCTCNTTCCCCCCCCACCCTCTCCCTCCCCCCTGTAACGTTA
|
13
|
+
CTGGCCGAAGCCGCTTGGAATAAGGCCGGTGTGCGTTTGTCTATATGTTA
|
14
|
+
TTTTCCACCATATTGCCGTCTTTTGGCAATGTGAGGGCCCGGAAACCTGG
|
15
|
+
CCCTGTCTTCTTGACGAGCATTCCTAGGGGTCTTTCCCCTCTCGCCAAAG
|
16
|
+
GAATGCAAGGTCTGTTGAATGTCGTGAAGGAAGCAGTTCCTCTGGAAGCT
|
17
|
+
TCTTGAAGACAAACAACGTCTGTAGCGACCCTTTGCAGGCAGCGGAACCC
|
18
|
+
CCCACCTGGCGACAGGTGCCTCTGCGGCCAAAAGCCACGTGTATAAGATA
|
19
|
+
CACCTGCAAAGGCGGCACAACCCCAGTGCCACGTTGTGAGTTGGATAGTT
|
20
|
+
GTGGAAAGAGTCAAATGGCTCTCCCTCAAGCGTATTCAACAAGGGGCTGA
|
@@ -0,0 +1,20 @@
|
|
1
|
+
>100122-36_E07_HA-mKSR1-OXL33.ab1 950
|
2
|
+
CNCNCNNGNNCNNCCGCCCACGTGAAGGCTGCCGACCCCGGGGGTGGACCA
|
3
|
+
TCCTCTAGACTGCCGGATCCCAGTGTGGTGGTACGGGAATTATGTACCCA
|
4
|
+
TACGATGTTCCAGATTACGCTCTTATGGCCATGGAGGCCCGAATTCAAAT
|
5
|
+
GGATAGAGCGGCGTTGCGCGCGGCAGCGATGGGCGAGAAAAAGGAGGGCG
|
6
|
+
GCGGCGGGGGCGCCGCGGCGGATGGGGGCGCAGGGGCCGCCGTCAGCCGG
|
7
|
+
GCGCTGCAGCAGTGCGGCCAGCTGCAGAAGCTCATCGATATCTCCATCGG
|
8
|
+
CAGTCTGCGCGGGCTGCGCACCAAGTGCTCAGTGTCTAACGACCTCACAC
|
9
|
+
AGCAGGAGATCCGGACCCTAGAGGCAAAGCTGGTGAAATACATTTGCAAG
|
10
|
+
CAGCAGCAGAGCAAGCTTAGTGTGACCCCAAGCGACAGGACCGCCGAGCT
|
11
|
+
CAACAGCTACCCACGCTTCAGTGACTGGCTGTACATCTTCAACGTGAGGC
|
12
|
+
CTGAGGTGGTGCAGGAGATCCCCCAAGAGCTCACACTGGATGCTCTGCTG
|
13
|
+
GAGATGGACGAGGCCAAAGCCAAGGAGATGCTGCGGCGCTGGGGGGCCAG
|
14
|
+
CACGGAGGAGTGCAGCCGCCTACAGCAAGCCCTTACCTGCCTTCGGAAGG
|
15
|
+
TGACTGGCCTGGGAGGGGAGCACAAAATGGACTCAGGTTGGAGTTCAACA
|
16
|
+
GATGCTCGAGACAGTAGCTTGGGGCCTCCCATGGACATGCTTTCCTCGCT
|
17
|
+
GGGCAGAGCGGGTGCCAGCACTCAGGGACCCCGTTCCATCTCCGTGTCCG
|
18
|
+
CCCTGCCTGCCTCAGACTCTCCGGTCCCCGGCCTCAGTGAGGGCCTCTCG
|
19
|
+
GACTCCTGTATCCCCTTGCACACCAGCGGCCGGCTGACCCCCCGGGCCCT
|
20
|
+
GCACAGCTTCATCACGCCCCCTACCACACCCCAGCTACGACAGCACGCCA
|
@@ -0,0 +1,77 @@
|
|
1
|
+
>HA-mKSR1-pREX-CD2 NM_013571.2
|
2
|
+
# this is the leader found in the pRex vector
|
3
|
+
GCTTGGATACACGCCGCCCACGTGAAGGCTGCCGACCCCGGGGGTGGACCATCCTCTAGACTGCC
|
4
|
+
# BamHI
|
5
|
+
GGATCC
|
6
|
+
CAGTGTGGTGGTAGGGAATTA
|
7
|
+
# this is the HA tag itself
|
8
|
+
ATGTACCCATACGATGTTCCAGATTACGCT
|
9
|
+
CTTATGGCCATGGAGGCCCGAATTC
|
10
|
+
# not sure how to account for these two A's but there they are
|
11
|
+
AA
|
12
|
+
# EXTRA BAGGAGE:
|
13
|
+
#GGTCGACCGAGATCTCTCGAGGTACC
|
14
|
+
# NotI cleavage site
|
15
|
+
#GCGGCCGC
|
16
|
+
#CAGCACAGTGGCCATGGCATCTAG
|
17
|
+
#This is the 5' UTR in mouse
|
18
|
+
#CTCGGGGCTTTCCTGCCGAGGCGCCCGTGTCCCCGGGCTCCTCGCCTCGGCCCCCAGCGGCCCCGATGCCGAGGC
|
19
|
+
#This is where translation actually begins:
|
20
|
+
ATGGATAGAGCGGCGTTGCGCGCGGCAGCGATGGGCGAGAAAAAGGAGGGCGGCGGCGGGGGCGC
|
21
|
+
CGCGGCGGACGGGGGCGCAGGGGCCGCCGTCAGCCGGGCGCTGCAGCAGTGCGGCCAGCTGCAGAAGCTC
|
22
|
+
ATCGATATCTCCATCGGCAGTCTGCGCGGGCTGCGCACCAAGTGCTCAGTGTCTAACGACCTCACACAGC
|
23
|
+
AGGAGATCCGGACCCTAGAGGCAAAGCTGGTGAAATACATTTGCAAGCAGCAGCAGAGCAAGCTTAGTGT
|
24
|
+
GACCCCAAGCGACAGGACCGCCGAGCTCAACAGCTACCCACGCTTCAGTGACTGGCTGTACATCTTCAAC
|
25
|
+
GTGAGGCCTGAGGTGGTGCAGGAGATCCCCCAAGAGCTCACACTGGATGCTCTGCTGGAGATGGACGAGG
|
26
|
+
CCAAAGCCAAGGAGATGCTGCGGCGCTGGGGGGCCAGCACGGAGGAGTGCAGCCGCCTACAGCAAGCCCT
|
27
|
+
TACCTGCCTTCGGAAGGTGACTGGCCTGGGAGGGGAGCACAAAATGGACTCAGGTTGGAGTTCAACAGAT
|
28
|
+
GCTCGAGACAGTAGCTTGGGGCCTCCCATGGACATGCTTTCCTCGCTGGGCAGAGCGGGTGCCAGCACTC
|
29
|
+
AGGGACCCCGTTCCATCTCCGTGTCCGCCCTGCCTGCCTCAGACTCTCCGGTCCCCGGCCTCAGTGAGGG
|
30
|
+
CCTCTCGGACTCCTGTATCCCCTTGCACACCAGCGGCCGGCTGACCCCCCGGGCCCTGCACAGCTTCATC
|
31
|
+
ACGCCCCCTACCACACCCCAGCTACGACGGCACGCCAAGCTGAAGCCACCAAGGACACCCCCACCGCCAA
|
32
|
+
GCCGCAAGGTCTTCCAGCTGCTCCCCAGCTTCCCCACACTCACACGGAGCAAGTCCCACGAGTCCCAGCT
|
33
|
+
GGGAAACCGAATCGACGACGTCACCCCGATGAAGTTTGAACTCCCTCATGGATCCCCACAGCTGGTACGA
|
34
|
+
AGGGATATCGGGCTCTCGGTGACGCACAGGTTCTCCACAAAGTCATGGTTGTCACAGGTGTGCAACGTGT
|
35
|
+
GCCAGAAGAGCATGATTTTTGGCGTGAAGTGCAAACACTGCAGGTTAAAATGCCATAACAAGTGCACAAA
|
36
|
+
GGAAGCTCCCGCCTGCAGGATCACCTTCCTCCCACTGGCCAGGCTTCGGAGGACAGAGTCTGTCCCGTCA
|
37
|
+
GATATCAACAACCCAGTGGACAGAGCAGCAGAGCCCCATTTTGGAACCCTTCCCAAGGCCCTGACAAAGA
|
38
|
+
AGGAGCACCCTCCAGCCATGAACCTGGACTCCAGCAGCAACCCATCCTCCACCACGTCCTCCACACCCTC
|
39
|
+
ATCGCCGGCACCTTTCCTGACCTCATCTAATCCCTCCAGTGCCACCACGCCTCCCAACCCGTCACCTGGC
|
40
|
+
CAGCGGGACAGCAGGTTCAGCTTCCCAGACATTTCAGCCTGTTCTCAGGCAGCCCCGCTGTCCAGCACAG
|
41
|
+
CCGACAGTACACGGCTCGACGACCAGCCCAAAACAGATGTGCTAGGTGTTCACGAAGCAGAGGCTGAGGA
|
42
|
+
GCCTGAGGCTGGCAAGTCAGAGGCAGAGGATGACGAGGAGGATGAGGTGGACGACCTCCCCAGCTCCCGC
|
43
|
+
CGGCCCTGGAGGGGCCCCATCTCTCGAAAGGCCAGCCAGACCAGCGTTTACCTGCAAGAGTGGGACATCC
|
44
|
+
CCTTTGAACAGGTGGAACTGGGCGAGCCCATTGGACAGGGTCGCTGGGGCCGGGTGCACCGAGGCCGTTG
|
45
|
+
GCATGGCGAGGTGGCCATTCGGCTGCTGGAGATGGACGGCCACAATCAGGACCACCTGAAGCTGTTCAAG
|
46
|
+
AAAGAGGTGATGAACTACCGGCAGACGCGGCATGAGAACGTGGTGCTCTTCATGGGGGCCTGCATGAACC
|
47
|
+
CACCTCACCTGGCCATTATCACCAGCTTCTGCAAGGGGCGGACATTGCATTCATTCGTGAGGGACCCCAA
|
48
|
+
GACGTCTCTGGACATCAATAAGACTAGGCAGATCGCCCAGGAGATCATCAAGGGCATGGGTTATCTTCAT
|
49
|
+
GCAAAAGGCATCGTGCACAAGGACCTCAAGTCCAAGAATGTCTTCTATGACAACGGCAAAGTGGTCATCA
|
50
|
+
CAGACTTCGGGCTGTTTGGGATCTCGGGTGTGGTCCGAGAGGAACGGCGCGAGAACCAACTGAAACTGTC
|
51
|
+
ACATGACTGGCTGTGCTACCTGGCCCCCGAGATCGTACGAGAAATGATCCCGGGGCGGGACGAGGACCAG
|
52
|
+
CTGCCCTTCTCCAAAGCAGCCGATGTCTATGCATTCGGGACTGTGTGGTATGAACTACAGGCAAGAGACT
|
53
|
+
GGCCCTTTAAGCACCAGCCTGCTGAGGCCTTGATCTGGCAGATTGGAAGTGGGGAAGGAGTACGGCGCGT
|
54
|
+
CCTGGCATCCGTCAGCCTGGGGAAGGAAGTCGGCGAGATCCTGTCTGCCTGCTGGGCTTTCGATCTGCAG
|
55
|
+
GAGAGACCCAGCTTCAGCCTGCTGATGGACATGCTGGAGAGGCTGCCCAAGCTGAACCGGCGGCTCTCCC
|
56
|
+
ACCCTGGGCACTTTTGGAAGTCGGCTGACATTAACAGCAGCAAAGTCATGCCCCGCTTTGAAAGGTTTGG
|
57
|
+
CCTGGGGACCCTGGAGTCCGGTAATCCAAAGATGTAGCCAGCCCTGCACGTTCATGCAGAGAGTGTCTTC
|
58
|
+
CTTTCGAAAACATGATCACGAAACATGCAGACCACCACCTCAAGGAATCAGAAGCATTGCATCCCAAGCT
|
59
|
+
GCGGACTGGGAGCGTGTCTCCTCCCTAAAGGACGTGCGTGCGTGCGTGCGTGCGTGCGTGCGTGCGTGCG
|
60
|
+
TCACCAAGGTGTGTGGAGCTCAGGATCGCAGCCATACACGCAACTCCAGATGATACCACTACCGCCAGTG
|
61
|
+
TTTACACAGAGGTTTCTGCCTGGCAAGCTTGGTATTTTACAGTAGGTGAAGATCATTCTGCAGAAGGGTG
|
62
|
+
CTGGCACAGTGGAGCAGCACGGATGTCCCCAGCCCCCGTTCTGGAAGACCCTACAGCTGTGAGAGGCCCA
|
63
|
+
GGGTTGAGCCAGATGAAAGAAAAGCTGCGTGGGTGTGGGCTGTACCCGGAAAAGGGCAGGTGGCAGGAGG
|
64
|
+
TTTGCCTTGGCCTGTGCTTGGGCCGAGAACCACACTAAGGAGCAGCAGCCTGAGTTAGGAATCTATCTGG
|
65
|
+
ATTACGGGGATCAGAGTTCCTGGAGAGTGGACTCAGTTTCTGCTCTGATCCAGGCCTGTTGTGCTTTTTT
|
66
|
+
TTTTTCCCCCTTAAAAAAAAAAAAGTACAGACAGAATCTCAGCGGCTTCTAGACTGATCTGATGGATCTT
|
67
|
+
AGCCCGGCTTCTACTGCGGGGGGGAGGGGGGGAGGGATAGCCACATATCTGTGGAGACACCCACTTCTTT
|
68
|
+
ATCTGAGGCCTCCAGGTAGGCACAAAGGCTGTGGAACTCAGCCTCTATCATCAGACACCCCCCCCCAATG
|
69
|
+
CCTCATTGACCCCCTTCCCCCAGAGCCAAGGGCTAGCCCATCGGGTGTGTGTACAGTAAGTTCTTGGTGA
|
70
|
+
AGGAGAACAGGGACGTTGGCAGAAGCAGTTTGCAGTGGCCCTAGCATCTTAAAACCCATTGTCTGTCACA
|
71
|
+
CCAGAAGGTTCTAGACCTACCACCACTTCCCTTCCCCATCTCATGGAAACCTTTTAGCCCATTCTGACCC
|
72
|
+
CTGTGTGTGCTCTGAGCTCAGATCGGGTTATGAGACCGCCCAGGCACATCAGTCAGGGAGGCTCTGATGT
|
73
|
+
GAGCCGCAGACCTCTGTGTTCATTCCTATGAGCTGGAGGGGCTGGACTGGGTGGGGTCAGATGTGCTTGG
|
74
|
+
CAGGAACTGTCAGCTGCTGAGCAGGGTGGTCCCTGAGCGGAGGATAAGCAGCATCAGACTCCACAACCAG
|
75
|
+
AGGAAGAAAGAAATGGGGATGGAGCGGAGACCCACGGGCTGAGTCCCGCTGTGGAGTGGCCTTGCAGCTC
|
76
|
+
CCTCTCAGTTAAAACTCCCAGTAAAGCCACAGTTCTCCGAGCACCCAAGTCTGCTCCAGCCGTCTCTTAA
|
77
|
+
AACAGGCCACTCTCTGAGAAGGAATTC
|
@@ -0,0 +1,55 @@
|
|
1
|
+
>HA-mKSR1-pREX-CD2 NM_013571.2
|
2
|
+
# this is the leader found in the pRex vector
|
3
|
+
GCTTGGATACACGCCGCCCACGTGAAGGCTGCCGACCCCGGGGGTGGACCATCCTCTAGACTGCC
|
4
|
+
# BamHI
|
5
|
+
GGATCC
|
6
|
+
CAGTGTGGTGGTAGGGAATTA
|
7
|
+
# this is the HA tag itself
|
8
|
+
ATGTACCCATACGATGTTCCAGATTACGCT
|
9
|
+
CTTATGGCCATGGAGGCCCGAATTC
|
10
|
+
# not sure how to account for these two A's but there they are
|
11
|
+
AA
|
12
|
+
# EXTRA BAGGAGE:
|
13
|
+
#GGTCGACCGAGATCTCTCGAGGTACC
|
14
|
+
# NotI cleavage site
|
15
|
+
#GCGGCCGC
|
16
|
+
#CAGCACAGTGGCCATGGCATCTAG
|
17
|
+
#This is the 5' UTR in mouse
|
18
|
+
#CTCGGGGCTTTCCTGCCGAGGCGCCCGTGTCCCCGGGCTCCTCGCCTCGGCCCCCAGCGGCCCCGATGCCGAGGC
|
19
|
+
#This is where translation actually begins:
|
20
|
+
ATGGATAGAGCGGCGTTGCGCGCGGCAGCGATGGGCGAGAAAAAGGAGGGCGGCGGCGGGGGCGC
|
21
|
+
CGCGGCGGACGGGGGCGCAGGGGCCGCCGTCAGCCGGGCGCTGCAGCAGTGCGGCCAGCTGCAGAAGCTC
|
22
|
+
ATCGATATCTCCATCGGCAGTCTGCGCGGGCTGCGCACCAAGTGCTCAGTGTCTAACGACCTCACACAGC
|
23
|
+
AGGAGATCCGGACCCTAGAGGCAAAGCTGGTGAAATACATTTGCAAGCAGCAGCAGAGCAAGCTTAGTGT
|
24
|
+
GACCCCAAGCGACAGGACCGCCGAGCTCAACAGCTACCCACGCTTCAGTGACTGGCTGTACATCTTCAAC
|
25
|
+
GTGAGGCCTGAGGTGGTGCAGGAGATCCCCCAAGAGCTCACACTGGATGCTCTGCTGGAGATGGACGAGG
|
26
|
+
CCAAAGCCAAGGAGATGCTGCGGCGCTGGGGGGCCAGCACGGAGGAGTGCAGCCGCCTACAGCAAGCCCT
|
27
|
+
TACCTGCCTTCGGAAGGTGACTGGCCTGGGAGGGGAGCACAAAATGGACTCAGGTTGGAGTTCAACAGAT
|
28
|
+
GCTCGAGACAGTAGCTTGGGGCCTCCCATGGACATGCTTTCCTCGCTGGGCAGAGCGGGTGCCAGCACTC
|
29
|
+
AGGGACCCCGTTCCATCTCCGTGTCCGCCCTGCCTGCCTCAGACTCTCCGGTCCCCGGCCTCAGTGAGGG
|
30
|
+
CCTCTCGGACTCCTGTATCCCCTTGCACACCAGCGGCCGGCTGACCCCCCGGGCCCTGCACAGCTTCATC
|
31
|
+
ACGCCCCCTACCACACCCCAGCTACGACGGCACGCCAAGCTGAAGCCACCAAGGACACCCCCACCGCCAA
|
32
|
+
GCCGCAAGGTCTTCCAGCTGCTCCCCAGCTTCCCCACACTCACACGGAGCAAGTCCCACGAGTCCCAGCT
|
33
|
+
GGGAAACCGAATCGACGACGTCACCCCGATGAAGTTTGAACTCCCTCATGGATCCCCACAGCTGGTACGA
|
34
|
+
AGGGATATCGGGCTCTCGGTGACGCACAGGTTCTCCACAAAGTCATGGTTGTCACAGGTGTGCAACGTGT
|
35
|
+
GCCAGAAGAGCATGATTTTTGGCGTGAAGTGCAAACACTGCAGGTTAAAATGCCATAACAAGTGCACAAA
|
36
|
+
>100122-36_E07_HA-mKSR1-OXL33.ab1 950
|
37
|
+
CNCNCNNGNNCNNCCGCCCACGTGAAGGCTGCCGACCCCGGGGGTGGACCA
|
38
|
+
TCCTCTAGACTGCCGGATCCCAGTGTGGTGGTACGGGAATTATGTACCCA
|
39
|
+
TACGATGTTCCAGATTACGCTCTTATGGCCATGGAGGCCCGAATTCAAAT
|
40
|
+
GGATAGAGCGGCGTTGCGCGCGGCAGCGATGGGCGAGAAAAAGGAGGGCG
|
41
|
+
GCGGCGGGGGCGCCGCGGCGGATGGGGGCGCAGGGGCCGCCGTCAGCCGG
|
42
|
+
GCGCTGCAGCAGTGCGGCCAGCTGCAGAAGCTCATCGATATCTCCATCGG
|
43
|
+
CAGTCTGCGCGGGCTGCGCACCAAGTGCTCAGTGTCTAACGACCTCACAC
|
44
|
+
AGCAGGAGATCCGGACCCTAGAGGCAAAGCTGGTGAAATACATTTGCAAG
|
45
|
+
CAGCAGCAGAGCAAGCTTAGTGTGACCCCAAGCGACAGGACCGACCGAGCT
|
46
|
+
CAACAGCTACCCACGCTTCAGTGACTGGCTGTACATCTTCAACGTGAGGC
|
47
|
+
CTGAGGTGGTGCAGGAGATCCCCCAAGAGCTCACACTGGATGCTCTGCTG
|
48
|
+
GAGATGGACGAGGCCAAAGCCAAGGAGATGCTGCGGCGCTGGGGGGCCAG
|
49
|
+
CACGGAGGAGTGCAGCCGCCTACAGCAAGCCCTTACCTGCCTTCGGAAGG
|
50
|
+
TGACTGGCCTGGGAGGGGAGCACAAAATGGACTCAGGTTGGAGTTCAACA
|
51
|
+
GATGCTCGAGACAGTAGCTTGGGGCCTCCCATGGACATGCTTTCCTCGCT
|
52
|
+
GGGCAGAGCGGGTGCCAGCACTCAGGGACCCCGTTCCATCTCCGTGTCCG
|
53
|
+
CCCTGCCTGCCTCAGACTCTCCGGTCCCCGGCCTCAGTGAGGGCCTCTCG
|
54
|
+
GACTCCTGTATCCCCTTGCACACCAGCGGCCGGCTGACCCCCCGGGCCCT
|
55
|
+
GCACAGCTTCATCACGCCCCCTACCACACCCCAGCTACGACAGCACGCCA
|
metadata
ADDED
@@ -0,0 +1,99 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: dna_sequence_aligner
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.2
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- John T. Prince
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2010-02-12 00:00:00 -07:00
|
13
|
+
default_executable:
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: bio
|
17
|
+
type: :runtime
|
18
|
+
version_requirement:
|
19
|
+
version_requirements: !ruby/object:Gem::Requirement
|
20
|
+
requirements:
|
21
|
+
- - ">="
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: "0"
|
24
|
+
version:
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: spec-more
|
27
|
+
type: :development
|
28
|
+
version_requirement:
|
29
|
+
version_requirements: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: "0"
|
34
|
+
version:
|
35
|
+
description: does high pairwise alignment of sequencing reads with a template using bioruby and clustalw. gives template-centric output.
|
36
|
+
email: jtprince@gmail.com
|
37
|
+
executables:
|
38
|
+
- dna_sequence_aligner
|
39
|
+
- dna_translator.rb
|
40
|
+
extensions: []
|
41
|
+
|
42
|
+
extra_rdoc_files:
|
43
|
+
- LICENSE
|
44
|
+
- README.rdoc
|
45
|
+
files:
|
46
|
+
- .gitignore
|
47
|
+
- History
|
48
|
+
- LICENSE
|
49
|
+
- README.rdoc
|
50
|
+
- Rakefile
|
51
|
+
- VERSION
|
52
|
+
- bin/dna_sequence_aligner
|
53
|
+
- bin/dna_translator.rb
|
54
|
+
- lib/bio/alignment/dna_sequence.rb
|
55
|
+
- older/align_all.rb
|
56
|
+
- older/align_to_template.rb
|
57
|
+
- reference/clustalw_opts.txt
|
58
|
+
- script/fasta_compile_annotated.rb
|
59
|
+
- spec/align_spec.rb
|
60
|
+
- spec/spec_helper.rb
|
61
|
+
- spec/testfiles/HA-mKSR1-KSRF1.txt
|
62
|
+
- spec/testfiles/HA-mKSR1-KSRF2.txt
|
63
|
+
- spec/testfiles/HA-mKSR1-KSRF3.txt
|
64
|
+
- spec/testfiles/HA-mKSR1-KSRF4.txt
|
65
|
+
- spec/testfiles/HA-mKSR1-KSRF5.txt
|
66
|
+
- spec/testfiles/HA-mKSR1-OXL33.txt
|
67
|
+
- spec/testfiles/KSR1_mouse_NM_013571_in_HA_pREX.ANNOTATED.fasta
|
68
|
+
- spec/testfiles/testcase.fasta
|
69
|
+
has_rdoc: true
|
70
|
+
homepage: http://jtprince.github.com/dna_sequence_aligner
|
71
|
+
licenses: []
|
72
|
+
|
73
|
+
post_install_message:
|
74
|
+
rdoc_options:
|
75
|
+
- --charset=UTF-8
|
76
|
+
require_paths:
|
77
|
+
- lib
|
78
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: "0"
|
83
|
+
version:
|
84
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
85
|
+
requirements:
|
86
|
+
- - ">="
|
87
|
+
- !ruby/object:Gem::Version
|
88
|
+
version: "0"
|
89
|
+
version:
|
90
|
+
requirements: []
|
91
|
+
|
92
|
+
rubyforge_project:
|
93
|
+
rubygems_version: 1.3.5
|
94
|
+
signing_key:
|
95
|
+
specification_version: 3
|
96
|
+
summary: does high pairwise alignment of sequencing reads with a template
|
97
|
+
test_files:
|
98
|
+
- spec/align_spec.rb
|
99
|
+
- spec/spec_helper.rb
|