egor 0.0.5 → 0.9.0
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +28 -26
- data/Manifest.txt +12 -8
- data/README.rdoc +206 -54
- data/Rakefile +9 -9
- data/egor.gemspec +13 -15
- data/lib/egor.rb +1 -1
- data/lib/egor/cli.rb +535 -168
- data/lib/egor/environment.rb +34 -0
- data/lib/egor/environment_class_hash.rb +20 -0
- data/lib/egor/environment_feature.rb +26 -0
- data/lib/egor/environment_feature_array.rb +12 -0
- data/lib/egor/heatmap_array.rb +111 -0
- data/lib/narray_extensions.rb +3 -2
- data/lib/nmatrix_extensions.rb +227 -6
- data/lib/string_extensions.rb +17 -0
- data/test/egor/test_cli.rb +9 -0
- data/test/egor/test_environment_class_hash.rb +25 -0
- data/test/egor/test_environment_feature.rb +29 -0
- data/test/test_math_extensions.rb +11 -0
- data/test/test_narray_extensions.rb +14 -0
- data/test/test_string_extensions.rb +11 -0
- data/website/index.html +5 -76
- data/website/index.txt +183 -18
- data/website/stylesheets/screen.css +0 -1
- metadata +27 -20
- data/lib/enumerable_extensions.rb +0 -11
- data/lib/environment.rb +0 -58
- data/lib/environment_class_hash.rb +0 -18
- data/lib/environment_feature.rb +0 -14
- data/lib/environment_feature_array.rb +0 -10
- data/test/test_egor_cli.rb +0 -8
- data/test/test_enumerable_extensions.rb +0 -16
- data/test/test_environment_feature.rb +0 -11
@@ -0,0 +1,17 @@
|
|
1
|
+
module StringExtensions
|
2
|
+
|
3
|
+
def remove_internal_spaces
|
4
|
+
gsub(/[\n|\r|\s]+/, '')
|
5
|
+
end
|
6
|
+
|
7
|
+
def rgb_to_integer
|
8
|
+
if self.length == 7 # '#FF00FF'
|
9
|
+
Integer(self.gsub('#', '0x'))
|
10
|
+
else
|
11
|
+
raise "#{self} doesn't seem to be a proper RGB code."
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
String.send :include, StringExtensions
|
17
|
+
|
@@ -0,0 +1,25 @@
|
|
1
|
+
$:.unshift File.join(File.dirname(__FILE__), '..', '..', 'lib', 'egor')
|
2
|
+
|
3
|
+
require 'test/unit'
|
4
|
+
require 'environment_class_hash'
|
5
|
+
|
6
|
+
class TestEnvironmentClassHash < Test::Unit::TestCase
|
7
|
+
|
8
|
+
include Egor
|
9
|
+
|
10
|
+
def setup
|
11
|
+
@env_cls = EnvironmentClassHash.new
|
12
|
+
end
|
13
|
+
|
14
|
+
def test_group_by_non_residue_labels
|
15
|
+
assert(true)
|
16
|
+
end
|
17
|
+
|
18
|
+
def test_groups_sorted_by_residue_labels
|
19
|
+
assert(true)
|
20
|
+
end
|
21
|
+
|
22
|
+
def group_size
|
23
|
+
assert(true)
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
$:.unshift File.join(File.dirname(__FILE__), '..', '..', 'lib', 'egor')
|
2
|
+
|
3
|
+
require 'test/unit'
|
4
|
+
require 'environment_feature'
|
5
|
+
|
6
|
+
class TestEnvironmentFeature < Test::Unit::TestCase
|
7
|
+
|
8
|
+
include Egor
|
9
|
+
|
10
|
+
def setup
|
11
|
+
@env_ftr = EnvironmentFeature.new('Secondary Structure',
|
12
|
+
'HEPC'.split(''),
|
13
|
+
'HEPC'.split(''),
|
14
|
+
'T',
|
15
|
+
'F')
|
16
|
+
end
|
17
|
+
|
18
|
+
def test_to_s
|
19
|
+
assert_equal('Secondary Structure;HEPC;HEPC;T;F', @env_ftr.to_s)
|
20
|
+
end
|
21
|
+
|
22
|
+
def test_constrained?
|
23
|
+
assert(@env_ftr.constrained?)
|
24
|
+
end
|
25
|
+
|
26
|
+
def silent?
|
27
|
+
assert(!@env_ftr.silent?)
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
$:.unshift File.join(File.dirname(__FILE__), "..", "lib")
|
2
|
+
|
3
|
+
require "test/unit"
|
4
|
+
require "narray_extensions"
|
5
|
+
|
6
|
+
class TestArrayExtensions < Test::Unit::TestCase
|
7
|
+
|
8
|
+
def test_pretty_string(opts={})
|
9
|
+
m = NArray.float(3).indgen
|
10
|
+
result ="# A B C\n" +
|
11
|
+
"VAL 0.00 1.00 2.00"
|
12
|
+
assert_equal(result, m.pretty_string(:col_header => %w[A B C], :row_header => 'VAL'))
|
13
|
+
end
|
14
|
+
end
|
@@ -0,0 +1,11 @@
|
|
1
|
+
$:.unshift File.join(File.dirname(__FILE__), "..", "lib")
|
2
|
+
|
3
|
+
require "test/unit"
|
4
|
+
require "string_extensions"
|
5
|
+
|
6
|
+
class TestStringExtensions < Test::Unit::TestCase
|
7
|
+
|
8
|
+
def test_remove_internal_spaces
|
9
|
+
assert_equal("hellosemin", "he ll o\r\n sem in\r \n".remove_internal_spaces)
|
10
|
+
end
|
11
|
+
end
|
data/website/index.html
CHANGED
@@ -2,86 +2,15 @@
|
|
2
2
|
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
3
3
|
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
4
4
|
<head>
|
5
|
-
<
|
6
|
-
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
|
5
|
+
<meta http-equiv="Refresh" content="5; URL=http://github.com/semin/egor">
|
7
6
|
<title>
|
8
|
-
egor:
|
7
|
+
egor: Esst GeneratOR, a program for calculating environment-specific substitution tables
|
9
8
|
</title>
|
10
|
-
<script src="javascripts/rounded_corners_lite.inc.js" type="text/javascript"></script>
|
11
|
-
<style>
|
12
|
-
|
13
|
-
</style>
|
14
|
-
<script type="text/javascript">
|
15
|
-
window.onload = function() {
|
16
|
-
settings = {
|
17
|
-
tl: { radius: 10 },
|
18
|
-
tr: { radius: 10 },
|
19
|
-
bl: { radius: 10 },
|
20
|
-
br: { radius: 10 },
|
21
|
-
antiAlias: true,
|
22
|
-
autoPad: true,
|
23
|
-
validTags: ["div"]
|
24
|
-
}
|
25
|
-
var versionBox = new curvyCorners(settings, document.getElementById("version"));
|
26
|
-
versionBox.applyCornersToAll();
|
27
|
-
}
|
28
|
-
</script>
|
29
9
|
</head>
|
30
10
|
<body>
|
31
|
-
<
|
32
|
-
|
33
|
-
<h1>egor: Environment-specific substitution table GeneratOR</h1>
|
34
|
-
<div class="sidebar">
|
35
|
-
<div id="version" class="clickable" onclick='document.location = "http://rubyforge.org/projects/egor"; return false'>
|
36
|
-
<p>Get Version</p>
|
37
|
-
<a href="http://rubyforge.org/projects/egor" class="numbers">0.0.5</a>
|
38
|
-
</div>
|
39
|
-
</div>
|
40
|
-
<h2>What</h2>
|
41
|
-
<p>‘egor’ is a program for calculating environment-specific substitution tables</p>
|
42
|
-
<h2>Features</h2>
|
43
|
-
<ul>
|
44
|
-
<li>No more segmentation fault</li>
|
45
|
-
<li>Fast enough not to leave your place</li>
|
46
|
-
<li>Slow enough to check your emails or have some chats with your colleagues next you</li>
|
47
|
-
<li>Full smoothing supported</li>
|
48
|
-
<li>In theory, infinite number of environment features can be handled</li>
|
49
|
-
</ul>
|
50
|
-
<h2>Installation</h2>
|
51
|
-
<p><pre class='syntax'><span class="global">$ </span><span class="ident">sudo</span> <span class="ident">gem</span> <span class="ident">install</span> <span class="ident">egor</span></pre></p>
|
52
|
-
<h2>Demonstration of usage</h2>
|
53
|
-
<p>It’s pretty much the same as Kenji’s subst, so in most cases, you can swap ‘subst’ with ‘egor’.</p>
|
54
|
-
<pre>$ egor -l TEMLIST-file -c classdef.dat</pre>
|
55
|
-
or
|
56
|
-
<pre>$ egor -l TEM-file -c classdef.dat</pre>
|
57
|
-
<h2>Repository</h2>
|
58
|
-
<p>You can download a pre-built RubyGems package from</p>
|
59
|
-
<ul>
|
60
|
-
<li>rubyforge: <a href="http://rubyforge.org/projects/egor">http://rubyforge.org/projects/egor</a></li>
|
61
|
-
</ul>
|
62
|
-
<p>or, You can fetch the source from</p>
|
63
|
-
<ul>
|
64
|
-
<li>github: <a href="http://github.com/semin/egor/tree/master">http://github.com/semin/egor/tree/master</a></li>
|
65
|
-
</ul>
|
66
|
-
<pre>$ git clone git://github.com/semin/egor.git</pre>
|
67
|
-
<h2>License</h2>
|
68
|
-
<p>This code is free to use under the terms of the <span class="caps">MIT</span> license.</p>
|
69
|
-
<h2>Contact</h2>
|
70
|
-
<p>Comments are welcome, please send an email to me (seminlee at gmail dot com).</p>
|
71
|
-
<p class="coda">
|
72
|
-
Semin Lee, 8th December 2008<br>
|
73
|
-
Theme extended from <a href="http://rb2js.rubyforge.org/">Paul Battley</a>
|
11
|
+
<p>
|
12
|
+
An official web site for Egor will be available when our new web server is ready. Until then, this page will be redirected to its Git repository, <a href="http://rubyforge.org/projects/egor/">http://github.com/semin/egor</a> in 5 seconds.
|
74
13
|
</p>
|
75
|
-
|
76
|
-
|
77
|
-
<!-- insert site tracking codes here, like Google Urchin -->
|
78
|
-
<script type="text/javascript">
|
79
|
-
var gaJsHost = (("https:" == document.location.protocol) ? "https://ssl." : "http://www.");
|
80
|
-
document.write(unescape("%3Cscript src='" + gaJsHost + "google-analytics.com/ga.js' type='text/javascript'%3E%3C/script%3E"));
|
81
|
-
</script>
|
82
|
-
<script type="text/javascript">
|
83
|
-
var pageTracker = _gat._getTracker("UA-6291956-1");
|
84
|
-
pageTracker._trackPageview();
|
85
|
-
</script>
|
14
|
+
- Semin
|
86
15
|
</body>
|
87
16
|
</html>
|
data/website/index.txt
CHANGED
@@ -1,32 +1,178 @@
|
|
1
|
-
h1. egor:
|
1
|
+
h1. egor: Esst GeneratOR, a program for calculating environment-specific substitution tables
|
2
2
|
|
3
|
+
h2. Description
|
3
4
|
|
4
|
-
|
5
|
-
|
6
|
-
'egor' is a program for calculating environment-specific substitution tables
|
5
|
+
'egor' is a program for calculating environment-specific substitution tables from user providing environmental class definitions and sequence alignments with the annotations of the environment classes.
|
7
6
|
|
8
7
|
|
9
8
|
h2. Features
|
10
9
|
|
11
|
-
*
|
12
|
-
*
|
13
|
-
*
|
14
|
-
*
|
15
|
-
* In theory, infinite number of environment features can be handled
|
10
|
+
* Environment-specific substitution table generation based on user providing environmental class definition
|
11
|
+
* Entropy-based smoothing procedures to cope with sparse data problem
|
12
|
+
* BLOSUM-like weighting procedures using PID threshold
|
13
|
+
* Both unidirectional and bidirectional substitution matirces can be generated
|
16
14
|
|
17
15
|
|
18
16
|
h2. Installation
|
19
17
|
|
20
|
-
<pre
|
18
|
+
<pre>
|
19
|
+
~user $ sudo gem install egor
|
20
|
+
</pre>
|
21
|
+
|
22
|
+
|
23
|
+
h2. Requirements
|
24
|
+
|
25
|
+
* ruby 1.8.7 or above (http://www.ruby-lang.org)
|
26
|
+
* rubygems 1.2.0 or above (http://rubyforge.org/projects/rubygems/)
|
21
27
|
|
28
|
+
Following RubyGems will be automatically installed if you have rubygems installed on your machine
|
22
29
|
|
23
|
-
|
30
|
+
* narray (http://narray.rubyforge.org/)
|
31
|
+
* facets (http://facets.rubyforge.org/)
|
32
|
+
* bio (http://bioruby.open-bio.org/)
|
33
|
+
* simple_memoize (http://github.com/JackDanger/simple_memoize/tree/master)
|
24
34
|
|
25
|
-
It's pretty much the same as Kenji's subst, so in most cases, you can swap 'subst' with 'egor'.
|
26
35
|
|
27
|
-
|
36
|
+
h2. Basic Usage
|
37
|
+
|
38
|
+
It's pretty much the same as Kenji's subst (http://www-cryst.bioc.cam.ac.uk/~kenji/subst/), so in most cases, you can swap 'subst' with 'egor'.
|
39
|
+
|
40
|
+
<pre>
|
41
|
+
~user $ egor -l TEMLIST-file -c classdef.dat
|
28
42
|
or
|
29
|
-
|
43
|
+
~user $ egor -l TEM-file -c classdef.dat
|
44
|
+
</pre>
|
45
|
+
|
46
|
+
|
47
|
+
h2. Options
|
48
|
+
|
49
|
+
<pre>
|
50
|
+
--tem-file (-f) FILE: a tem file
|
51
|
+
--tem-list (-l) FILE: a list for tem files
|
52
|
+
--classdef (-c) FILE: a file for the defintion of environments (default: 'classdef.dat')
|
53
|
+
--outfile (-o) FILE: output filename (default 'allmat.dat')
|
54
|
+
--weight (-w) INTEGER: clustering level (PID) for the BLOSUM-like weighting (default: 60)
|
55
|
+
--noweight: calculate substitution count with no weights
|
56
|
+
--smooth (-s) INTEGER:
|
57
|
+
0 for partial smoothing (default)
|
58
|
+
1 for full smoothing
|
59
|
+
--p1smooth: perform smoothing for p1 probability calculation when partial smoothing
|
60
|
+
--nosmooth: perform no smoothing operation
|
61
|
+
--cys (-y) INTEGER:
|
62
|
+
0 for using C and J only for structure (default)
|
63
|
+
1 for both structure and sequence
|
64
|
+
2 for using only C for both (must be set when you have no 'disulphide' or 'disulfide' annotation in templates)
|
65
|
+
--output INTEGER:
|
66
|
+
0 for raw count (no smoothing performed)
|
67
|
+
1 for probabilities
|
68
|
+
2 for log odds ratios (default)
|
69
|
+
--noroundoff: do not round off log odds ratio
|
70
|
+
--scale INTEGER: log odds ratio matrices in 1/n bit units (default 3)
|
71
|
+
--sigma DOUBLE: change the sigma value for smoothing (default 5.0)
|
72
|
+
--autosigma: automatically adjust the sigma value for smoothing
|
73
|
+
--add DOUBLE: add this value to raw count when deriving log odds ratios without smoothing (default 1/#classes)
|
74
|
+
--penv: use environment-dependent frequencies for log odds ratio calculation (default false) (NOT implemented yet!!!)
|
75
|
+
--pidmin DOUBLE: count substitutions only for pairs with PID equal to or greater than this value (default none)
|
76
|
+
--pidmax DOUBLE: count substitutions only for pairs with PID smaller than this value (default none)
|
77
|
+
--verbose (-v) INTEGER
|
78
|
+
0 for ERROR level
|
79
|
+
1 for WARN or above level (default)
|
80
|
+
2 for INFO or above level
|
81
|
+
3 for DEBUG or above level
|
82
|
+
--version: print version
|
83
|
+
--help (-h): show help
|
84
|
+
</pre>
|
85
|
+
|
86
|
+
|
87
|
+
h2. Usage
|
88
|
+
|
89
|
+
h4. 1. Prepare an environmental class definition file.
|
90
|
+
|
91
|
+
<pre>
|
92
|
+
~user $ cat classdef.dat
|
93
|
+
#
|
94
|
+
# name of feature (string); values adopted in .tem file (string); class labels assigned for each value (string);\
|
95
|
+
# constrained or not (T or F); silent (used as masks)? (T or F)
|
96
|
+
#
|
97
|
+
secondary structure and phi angle;HEPC;HEPC;T;F
|
98
|
+
solvent accessibility;TF;Aa;F;F
|
99
|
+
hydrogen bond to other sidechain/heterogen;TF;Ss;F;F
|
100
|
+
hydrogen bond to mainchain CO;TF;Oo;F;F
|
101
|
+
hydrogen bond to mainchain NH;TF;Nn;F;F
|
102
|
+
</pre>
|
103
|
+
|
104
|
+
h4. 2. Prepare structural alignments and their annotations of above environmental classes in PIR format.
|
105
|
+
|
106
|
+
<pre>
|
107
|
+
~user $ cat sample1.tem
|
108
|
+
>P1;1mnma
|
109
|
+
sequence
|
110
|
+
QKERRKIEIKFIENKTRRHVTFSKRKHGIMKKAFELSVLTGTQVLLLVVSETGLVYTFSTPKFEPIVTQQEGRNL
|
111
|
+
IQACLNAPDD*
|
112
|
+
>P1;1egwa
|
113
|
+
sequence
|
114
|
+
--GRKKIQITRIMDERNRQVTFTKRKFGLMKKAYELSVLCDCEIALIIFNSSNKLFQYASTDMDKVLLKYTEY--
|
115
|
+
----------*
|
116
|
+
>P1;1mnma
|
117
|
+
secondary structure and phi angle
|
118
|
+
CPCCCCCCCCCCCCHHHHHHHHHHHHHHHHHHHHHHHHHHPCCCEEEEECCCPCEEEEECCCCCHHHHCHHHHHH
|
119
|
+
HHHHHCCCCP*
|
120
|
+
>P1;1egwa
|
121
|
+
secondary structure and phi angle
|
122
|
+
--CCCCCCCCCCCCHHHHHHHHHHHHHHHHHHHHHHHHHCPCCCEEEEECCCPCEEEEECCCHHHHHHHHHHC--
|
123
|
+
----------*
|
124
|
+
>P1;1mnma
|
125
|
+
solvent accessibility
|
126
|
+
TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTFTTTTTTTTTTTTTTTT
|
127
|
+
TTTTTTTTTT*
|
128
|
+
>P1;1egwa
|
129
|
+
solvent accessibility
|
130
|
+
--TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTFTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT--
|
131
|
+
----------*
|
132
|
+
...
|
133
|
+
</pre>
|
134
|
+
|
135
|
+
h4. 3. When you have two or more alignment files, you should make a separate file containing all the paths for the alignment files.
|
136
|
+
|
137
|
+
<pre>
|
138
|
+
~user $ ls -1 *.tem > TEMLIST
|
139
|
+
~user $ cat TEMLIST
|
140
|
+
sample1.tem
|
141
|
+
sample2.tem
|
142
|
+
...
|
143
|
+
</pre>
|
144
|
+
|
145
|
+
h4. 4. To produce substitution count matrices, type
|
146
|
+
|
147
|
+
<pre>
|
148
|
+
~user $ egor -l TEMLIST --output 0 -o substcount.mat
|
149
|
+
</pre>
|
150
|
+
|
151
|
+
h4. 5. To produce substitution probability matrices, type
|
152
|
+
|
153
|
+
<pre>
|
154
|
+
~user $ egor -l TEMLIST --output 1 -o substprob.mat
|
155
|
+
</pre>
|
156
|
+
|
157
|
+
h4. 6. To produce log odds ratio matrices, type
|
158
|
+
|
159
|
+
<pre>
|
160
|
+
~user $ egor -l TEMLIST --output 2 -o substlogo.mat
|
161
|
+
</pre>
|
162
|
+
|
163
|
+
h4. 7. To produce substitution data only from the sequence pairs within a given PID range, type (if you don't provide any name for output, 'allmat.dat' will be used.)
|
164
|
+
|
165
|
+
<pre>
|
166
|
+
~user $ egor -l TEMLIST --pidmin 60 --pidmax 80 --output 1
|
167
|
+
</pre>
|
168
|
+
|
169
|
+
h4. 8. To change the clustering level (default 60), type
|
170
|
+
|
171
|
+
<pre>
|
172
|
+
~user $ egor -l TEMLIST --weight 80 --output 2
|
173
|
+
</pre>
|
174
|
+
|
175
|
+
h4. 9. In case any positions are masked with the character 'X' in any environmental feature will be excluded from the calculation of substitution counts.
|
30
176
|
|
31
177
|
|
32
178
|
h2. Repository
|
@@ -39,14 +185,33 @@ or, You can fetch the source from
|
|
39
185
|
|
40
186
|
* github: "http://github.com/semin/egor/tree/master":http://github.com/semin/egor/tree/master
|
41
187
|
|
42
|
-
|
188
|
+
|
189
|
+
h2. Contact
|
190
|
+
|
191
|
+
Comments are welcome, please send an email to me (seminlee at gmail dot com).
|
43
192
|
|
44
193
|
|
45
194
|
h2. License
|
46
195
|
|
47
|
-
|
196
|
+
(The MIT License)
|
48
197
|
|
198
|
+
Copyright (c) 2008 Semin Lee
|
49
199
|
|
50
|
-
|
200
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
201
|
+
a copy of this software and associated documentation files (the
|
202
|
+
'Software'), to deal in the Software without restriction, including
|
203
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
204
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
205
|
+
permit persons to whom the Software is furnished to do so, subject to
|
206
|
+
the following conditions:
|
51
207
|
|
52
|
-
|
208
|
+
The above copyright notice and this permission notice shall be
|
209
|
+
included in all copies or substantial portions of the Software.
|
210
|
+
|
211
|
+
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
212
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
213
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
214
|
+
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
215
|
+
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
216
|
+
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
217
|
+
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|