egor 0.0.5 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +28 -26
- data/Manifest.txt +12 -8
- data/README.rdoc +206 -54
- data/Rakefile +9 -9
- data/egor.gemspec +13 -15
- data/lib/egor.rb +1 -1
- data/lib/egor/cli.rb +535 -168
- data/lib/egor/environment.rb +34 -0
- data/lib/egor/environment_class_hash.rb +20 -0
- data/lib/egor/environment_feature.rb +26 -0
- data/lib/egor/environment_feature_array.rb +12 -0
- data/lib/egor/heatmap_array.rb +111 -0
- data/lib/narray_extensions.rb +3 -2
- data/lib/nmatrix_extensions.rb +227 -6
- data/lib/string_extensions.rb +17 -0
- data/test/egor/test_cli.rb +9 -0
- data/test/egor/test_environment_class_hash.rb +25 -0
- data/test/egor/test_environment_feature.rb +29 -0
- data/test/test_math_extensions.rb +11 -0
- data/test/test_narray_extensions.rb +14 -0
- data/test/test_string_extensions.rb +11 -0
- data/website/index.html +5 -76
- data/website/index.txt +183 -18
- data/website/stylesheets/screen.css +0 -1
- metadata +27 -20
- data/lib/enumerable_extensions.rb +0 -11
- data/lib/environment.rb +0 -58
- data/lib/environment_class_hash.rb +0 -18
- data/lib/environment_feature.rb +0 -14
- data/lib/environment_feature_array.rb +0 -10
- data/test/test_egor_cli.rb +0 -8
- data/test/test_enumerable_extensions.rb +0 -16
- data/test/test_environment_feature.rb +0 -11
@@ -0,0 +1,17 @@
|
|
1
|
+
module StringExtensions
|
2
|
+
|
3
|
+
def remove_internal_spaces
|
4
|
+
gsub(/[\n|\r|\s]+/, '')
|
5
|
+
end
|
6
|
+
|
7
|
+
def rgb_to_integer
|
8
|
+
if self.length == 7 # '#FF00FF'
|
9
|
+
Integer(self.gsub('#', '0x'))
|
10
|
+
else
|
11
|
+
raise "#{self} doesn't seem to be a proper RGB code."
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
String.send :include, StringExtensions
|
17
|
+
|
@@ -0,0 +1,25 @@
|
|
1
|
+
$:.unshift File.join(File.dirname(__FILE__), '..', '..', 'lib', 'egor')
|
2
|
+
|
3
|
+
require 'test/unit'
|
4
|
+
require 'environment_class_hash'
|
5
|
+
|
6
|
+
class TestEnvironmentClassHash < Test::Unit::TestCase
|
7
|
+
|
8
|
+
include Egor
|
9
|
+
|
10
|
+
def setup
|
11
|
+
@env_cls = EnvironmentClassHash.new
|
12
|
+
end
|
13
|
+
|
14
|
+
def test_group_by_non_residue_labels
|
15
|
+
assert(true)
|
16
|
+
end
|
17
|
+
|
18
|
+
def test_groups_sorted_by_residue_labels
|
19
|
+
assert(true)
|
20
|
+
end
|
21
|
+
|
22
|
+
def group_size
|
23
|
+
assert(true)
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
$:.unshift File.join(File.dirname(__FILE__), '..', '..', 'lib', 'egor')
|
2
|
+
|
3
|
+
require 'test/unit'
|
4
|
+
require 'environment_feature'
|
5
|
+
|
6
|
+
class TestEnvironmentFeature < Test::Unit::TestCase
|
7
|
+
|
8
|
+
include Egor
|
9
|
+
|
10
|
+
def setup
|
11
|
+
@env_ftr = EnvironmentFeature.new('Secondary Structure',
|
12
|
+
'HEPC'.split(''),
|
13
|
+
'HEPC'.split(''),
|
14
|
+
'T',
|
15
|
+
'F')
|
16
|
+
end
|
17
|
+
|
18
|
+
def test_to_s
|
19
|
+
assert_equal('Secondary Structure;HEPC;HEPC;T;F', @env_ftr.to_s)
|
20
|
+
end
|
21
|
+
|
22
|
+
def test_constrained?
|
23
|
+
assert(@env_ftr.constrained?)
|
24
|
+
end
|
25
|
+
|
26
|
+
def silent?
|
27
|
+
assert(!@env_ftr.silent?)
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
$:.unshift File.join(File.dirname(__FILE__), "..", "lib")
|
2
|
+
|
3
|
+
require "test/unit"
|
4
|
+
require "narray_extensions"
|
5
|
+
|
6
|
+
class TestArrayExtensions < Test::Unit::TestCase
|
7
|
+
|
8
|
+
def test_pretty_string(opts={})
|
9
|
+
m = NArray.float(3).indgen
|
10
|
+
result ="# A B C\n" +
|
11
|
+
"VAL 0.00 1.00 2.00"
|
12
|
+
assert_equal(result, m.pretty_string(:col_header => %w[A B C], :row_header => 'VAL'))
|
13
|
+
end
|
14
|
+
end
|
@@ -0,0 +1,11 @@
|
|
1
|
+
$:.unshift File.join(File.dirname(__FILE__), "..", "lib")
|
2
|
+
|
3
|
+
require "test/unit"
|
4
|
+
require "string_extensions"
|
5
|
+
|
6
|
+
class TestStringExtensions < Test::Unit::TestCase
|
7
|
+
|
8
|
+
def test_remove_internal_spaces
|
9
|
+
assert_equal("hellosemin", "he ll o\r\n sem in\r \n".remove_internal_spaces)
|
10
|
+
end
|
11
|
+
end
|
data/website/index.html
CHANGED
@@ -2,86 +2,15 @@
|
|
2
2
|
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
3
3
|
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
4
4
|
<head>
|
5
|
-
<
|
6
|
-
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
|
5
|
+
<meta http-equiv="Refresh" content="5; URL=http://github.com/semin/egor">
|
7
6
|
<title>
|
8
|
-
egor:
|
7
|
+
egor: Esst GeneratOR, a program for calculating environment-specific substitution tables
|
9
8
|
</title>
|
10
|
-
<script src="javascripts/rounded_corners_lite.inc.js" type="text/javascript"></script>
|
11
|
-
<style>
|
12
|
-
|
13
|
-
</style>
|
14
|
-
<script type="text/javascript">
|
15
|
-
window.onload = function() {
|
16
|
-
settings = {
|
17
|
-
tl: { radius: 10 },
|
18
|
-
tr: { radius: 10 },
|
19
|
-
bl: { radius: 10 },
|
20
|
-
br: { radius: 10 },
|
21
|
-
antiAlias: true,
|
22
|
-
autoPad: true,
|
23
|
-
validTags: ["div"]
|
24
|
-
}
|
25
|
-
var versionBox = new curvyCorners(settings, document.getElementById("version"));
|
26
|
-
versionBox.applyCornersToAll();
|
27
|
-
}
|
28
|
-
</script>
|
29
9
|
</head>
|
30
10
|
<body>
|
31
|
-
<
|
32
|
-
|
33
|
-
<h1>egor: Environment-specific substitution table GeneratOR</h1>
|
34
|
-
<div class="sidebar">
|
35
|
-
<div id="version" class="clickable" onclick='document.location = "http://rubyforge.org/projects/egor"; return false'>
|
36
|
-
<p>Get Version</p>
|
37
|
-
<a href="http://rubyforge.org/projects/egor" class="numbers">0.0.5</a>
|
38
|
-
</div>
|
39
|
-
</div>
|
40
|
-
<h2>What</h2>
|
41
|
-
<p>‘egor’ is a program for calculating environment-specific substitution tables</p>
|
42
|
-
<h2>Features</h2>
|
43
|
-
<ul>
|
44
|
-
<li>No more segmentation fault</li>
|
45
|
-
<li>Fast enough not to leave your place</li>
|
46
|
-
<li>Slow enough to check your emails or have some chats with your colleagues next you</li>
|
47
|
-
<li>Full smoothing supported</li>
|
48
|
-
<li>In theory, infinite number of environment features can be handled</li>
|
49
|
-
</ul>
|
50
|
-
<h2>Installation</h2>
|
51
|
-
<p><pre class='syntax'><span class="global">$ </span><span class="ident">sudo</span> <span class="ident">gem</span> <span class="ident">install</span> <span class="ident">egor</span></pre></p>
|
52
|
-
<h2>Demonstration of usage</h2>
|
53
|
-
<p>It’s pretty much the same as Kenji’s subst, so in most cases, you can swap ‘subst’ with ‘egor’.</p>
|
54
|
-
<pre>$ egor -l TEMLIST-file -c classdef.dat</pre>
|
55
|
-
or
|
56
|
-
<pre>$ egor -l TEM-file -c classdef.dat</pre>
|
57
|
-
<h2>Repository</h2>
|
58
|
-
<p>You can download a pre-built RubyGems package from</p>
|
59
|
-
<ul>
|
60
|
-
<li>rubyforge: <a href="http://rubyforge.org/projects/egor">http://rubyforge.org/projects/egor</a></li>
|
61
|
-
</ul>
|
62
|
-
<p>or, You can fetch the source from</p>
|
63
|
-
<ul>
|
64
|
-
<li>github: <a href="http://github.com/semin/egor/tree/master">http://github.com/semin/egor/tree/master</a></li>
|
65
|
-
</ul>
|
66
|
-
<pre>$ git clone git://github.com/semin/egor.git</pre>
|
67
|
-
<h2>License</h2>
|
68
|
-
<p>This code is free to use under the terms of the <span class="caps">MIT</span> license.</p>
|
69
|
-
<h2>Contact</h2>
|
70
|
-
<p>Comments are welcome, please send an email to me (seminlee at gmail dot com).</p>
|
71
|
-
<p class="coda">
|
72
|
-
Semin Lee, 8th December 2008<br>
|
73
|
-
Theme extended from <a href="http://rb2js.rubyforge.org/">Paul Battley</a>
|
11
|
+
<p>
|
12
|
+
An official web site for Egor will be available when our new web server is ready. Until then, this page will be redirected to its Git repository, <a href="http://rubyforge.org/projects/egor/">http://github.com/semin/egor</a> in 5 seconds.
|
74
13
|
</p>
|
75
|
-
|
76
|
-
|
77
|
-
<!-- insert site tracking codes here, like Google Urchin -->
|
78
|
-
<script type="text/javascript">
|
79
|
-
var gaJsHost = (("https:" == document.location.protocol) ? "https://ssl." : "http://www.");
|
80
|
-
document.write(unescape("%3Cscript src='" + gaJsHost + "google-analytics.com/ga.js' type='text/javascript'%3E%3C/script%3E"));
|
81
|
-
</script>
|
82
|
-
<script type="text/javascript">
|
83
|
-
var pageTracker = _gat._getTracker("UA-6291956-1");
|
84
|
-
pageTracker._trackPageview();
|
85
|
-
</script>
|
14
|
+
- Semin
|
86
15
|
</body>
|
87
16
|
</html>
|
data/website/index.txt
CHANGED
@@ -1,32 +1,178 @@
|
|
1
|
-
h1. egor:
|
1
|
+
h1. egor: Esst GeneratOR, a program for calculating environment-specific substitution tables
|
2
2
|
|
3
|
+
h2. Description
|
3
4
|
|
4
|
-
|
5
|
-
|
6
|
-
'egor' is a program for calculating environment-specific substitution tables
|
5
|
+
'egor' is a program for calculating environment-specific substitution tables from user providing environmental class definitions and sequence alignments with the annotations of the environment classes.
|
7
6
|
|
8
7
|
|
9
8
|
h2. Features
|
10
9
|
|
11
|
-
*
|
12
|
-
*
|
13
|
-
*
|
14
|
-
*
|
15
|
-
* In theory, infinite number of environment features can be handled
|
10
|
+
* Environment-specific substitution table generation based on user providing environmental class definition
|
11
|
+
* Entropy-based smoothing procedures to cope with sparse data problem
|
12
|
+
* BLOSUM-like weighting procedures using PID threshold
|
13
|
+
* Both unidirectional and bidirectional substitution matirces can be generated
|
16
14
|
|
17
15
|
|
18
16
|
h2. Installation
|
19
17
|
|
20
|
-
<pre
|
18
|
+
<pre>
|
19
|
+
~user $ sudo gem install egor
|
20
|
+
</pre>
|
21
|
+
|
22
|
+
|
23
|
+
h2. Requirements
|
24
|
+
|
25
|
+
* ruby 1.8.7 or above (http://www.ruby-lang.org)
|
26
|
+
* rubygems 1.2.0 or above (http://rubyforge.org/projects/rubygems/)
|
21
27
|
|
28
|
+
Following RubyGems will be automatically installed if you have rubygems installed on your machine
|
22
29
|
|
23
|
-
|
30
|
+
* narray (http://narray.rubyforge.org/)
|
31
|
+
* facets (http://facets.rubyforge.org/)
|
32
|
+
* bio (http://bioruby.open-bio.org/)
|
33
|
+
* simple_memoize (http://github.com/JackDanger/simple_memoize/tree/master)
|
24
34
|
|
25
|
-
It's pretty much the same as Kenji's subst, so in most cases, you can swap 'subst' with 'egor'.
|
26
35
|
|
27
|
-
|
36
|
+
h2. Basic Usage
|
37
|
+
|
38
|
+
It's pretty much the same as Kenji's subst (http://www-cryst.bioc.cam.ac.uk/~kenji/subst/), so in most cases, you can swap 'subst' with 'egor'.
|
39
|
+
|
40
|
+
<pre>
|
41
|
+
~user $ egor -l TEMLIST-file -c classdef.dat
|
28
42
|
or
|
29
|
-
|
43
|
+
~user $ egor -l TEM-file -c classdef.dat
|
44
|
+
</pre>
|
45
|
+
|
46
|
+
|
47
|
+
h2. Options
|
48
|
+
|
49
|
+
<pre>
|
50
|
+
--tem-file (-f) FILE: a tem file
|
51
|
+
--tem-list (-l) FILE: a list for tem files
|
52
|
+
--classdef (-c) FILE: a file for the defintion of environments (default: 'classdef.dat')
|
53
|
+
--outfile (-o) FILE: output filename (default 'allmat.dat')
|
54
|
+
--weight (-w) INTEGER: clustering level (PID) for the BLOSUM-like weighting (default: 60)
|
55
|
+
--noweight: calculate substitution count with no weights
|
56
|
+
--smooth (-s) INTEGER:
|
57
|
+
0 for partial smoothing (default)
|
58
|
+
1 for full smoothing
|
59
|
+
--p1smooth: perform smoothing for p1 probability calculation when partial smoothing
|
60
|
+
--nosmooth: perform no smoothing operation
|
61
|
+
--cys (-y) INTEGER:
|
62
|
+
0 for using C and J only for structure (default)
|
63
|
+
1 for both structure and sequence
|
64
|
+
2 for using only C for both (must be set when you have no 'disulphide' or 'disulfide' annotation in templates)
|
65
|
+
--output INTEGER:
|
66
|
+
0 for raw count (no smoothing performed)
|
67
|
+
1 for probabilities
|
68
|
+
2 for log odds ratios (default)
|
69
|
+
--noroundoff: do not round off log odds ratio
|
70
|
+
--scale INTEGER: log odds ratio matrices in 1/n bit units (default 3)
|
71
|
+
--sigma DOUBLE: change the sigma value for smoothing (default 5.0)
|
72
|
+
--autosigma: automatically adjust the sigma value for smoothing
|
73
|
+
--add DOUBLE: add this value to raw count when deriving log odds ratios without smoothing (default 1/#classes)
|
74
|
+
--penv: use environment-dependent frequencies for log odds ratio calculation (default false) (NOT implemented yet!!!)
|
75
|
+
--pidmin DOUBLE: count substitutions only for pairs with PID equal to or greater than this value (default none)
|
76
|
+
--pidmax DOUBLE: count substitutions only for pairs with PID smaller than this value (default none)
|
77
|
+
--verbose (-v) INTEGER
|
78
|
+
0 for ERROR level
|
79
|
+
1 for WARN or above level (default)
|
80
|
+
2 for INFO or above level
|
81
|
+
3 for DEBUG or above level
|
82
|
+
--version: print version
|
83
|
+
--help (-h): show help
|
84
|
+
</pre>
|
85
|
+
|
86
|
+
|
87
|
+
h2. Usage
|
88
|
+
|
89
|
+
h4. 1. Prepare an environmental class definition file.
|
90
|
+
|
91
|
+
<pre>
|
92
|
+
~user $ cat classdef.dat
|
93
|
+
#
|
94
|
+
# name of feature (string); values adopted in .tem file (string); class labels assigned for each value (string);\
|
95
|
+
# constrained or not (T or F); silent (used as masks)? (T or F)
|
96
|
+
#
|
97
|
+
secondary structure and phi angle;HEPC;HEPC;T;F
|
98
|
+
solvent accessibility;TF;Aa;F;F
|
99
|
+
hydrogen bond to other sidechain/heterogen;TF;Ss;F;F
|
100
|
+
hydrogen bond to mainchain CO;TF;Oo;F;F
|
101
|
+
hydrogen bond to mainchain NH;TF;Nn;F;F
|
102
|
+
</pre>
|
103
|
+
|
104
|
+
h4. 2. Prepare structural alignments and their annotations of above environmental classes in PIR format.
|
105
|
+
|
106
|
+
<pre>
|
107
|
+
~user $ cat sample1.tem
|
108
|
+
>P1;1mnma
|
109
|
+
sequence
|
110
|
+
QKERRKIEIKFIENKTRRHVTFSKRKHGIMKKAFELSVLTGTQVLLLVVSETGLVYTFSTPKFEPIVTQQEGRNL
|
111
|
+
IQACLNAPDD*
|
112
|
+
>P1;1egwa
|
113
|
+
sequence
|
114
|
+
--GRKKIQITRIMDERNRQVTFTKRKFGLMKKAYELSVLCDCEIALIIFNSSNKLFQYASTDMDKVLLKYTEY--
|
115
|
+
----------*
|
116
|
+
>P1;1mnma
|
117
|
+
secondary structure and phi angle
|
118
|
+
CPCCCCCCCCCCCCHHHHHHHHHHHHHHHHHHHHHHHHHHPCCCEEEEECCCPCEEEEECCCCCHHHHCHHHHHH
|
119
|
+
HHHHHCCCCP*
|
120
|
+
>P1;1egwa
|
121
|
+
secondary structure and phi angle
|
122
|
+
--CCCCCCCCCCCCHHHHHHHHHHHHHHHHHHHHHHHHHCPCCCEEEEECCCPCEEEEECCCHHHHHHHHHHC--
|
123
|
+
----------*
|
124
|
+
>P1;1mnma
|
125
|
+
solvent accessibility
|
126
|
+
TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTFTTTTTTTTTTTTTTTT
|
127
|
+
TTTTTTTTTT*
|
128
|
+
>P1;1egwa
|
129
|
+
solvent accessibility
|
130
|
+
--TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTFTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT--
|
131
|
+
----------*
|
132
|
+
...
|
133
|
+
</pre>
|
134
|
+
|
135
|
+
h4. 3. When you have two or more alignment files, you should make a separate file containing all the paths for the alignment files.
|
136
|
+
|
137
|
+
<pre>
|
138
|
+
~user $ ls -1 *.tem > TEMLIST
|
139
|
+
~user $ cat TEMLIST
|
140
|
+
sample1.tem
|
141
|
+
sample2.tem
|
142
|
+
...
|
143
|
+
</pre>
|
144
|
+
|
145
|
+
h4. 4. To produce substitution count matrices, type
|
146
|
+
|
147
|
+
<pre>
|
148
|
+
~user $ egor -l TEMLIST --output 0 -o substcount.mat
|
149
|
+
</pre>
|
150
|
+
|
151
|
+
h4. 5. To produce substitution probability matrices, type
|
152
|
+
|
153
|
+
<pre>
|
154
|
+
~user $ egor -l TEMLIST --output 1 -o substprob.mat
|
155
|
+
</pre>
|
156
|
+
|
157
|
+
h4. 6. To produce log odds ratio matrices, type
|
158
|
+
|
159
|
+
<pre>
|
160
|
+
~user $ egor -l TEMLIST --output 2 -o substlogo.mat
|
161
|
+
</pre>
|
162
|
+
|
163
|
+
h4. 7. To produce substitution data only from the sequence pairs within a given PID range, type (if you don't provide any name for output, 'allmat.dat' will be used.)
|
164
|
+
|
165
|
+
<pre>
|
166
|
+
~user $ egor -l TEMLIST --pidmin 60 --pidmax 80 --output 1
|
167
|
+
</pre>
|
168
|
+
|
169
|
+
h4. 8. To change the clustering level (default 60), type
|
170
|
+
|
171
|
+
<pre>
|
172
|
+
~user $ egor -l TEMLIST --weight 80 --output 2
|
173
|
+
</pre>
|
174
|
+
|
175
|
+
h4. 9. In case any positions are masked with the character 'X' in any environmental feature will be excluded from the calculation of substitution counts.
|
30
176
|
|
31
177
|
|
32
178
|
h2. Repository
|
@@ -39,14 +185,33 @@ or, You can fetch the source from
|
|
39
185
|
|
40
186
|
* github: "http://github.com/semin/egor/tree/master":http://github.com/semin/egor/tree/master
|
41
187
|
|
42
|
-
|
188
|
+
|
189
|
+
h2. Contact
|
190
|
+
|
191
|
+
Comments are welcome, please send an email to me (seminlee at gmail dot com).
|
43
192
|
|
44
193
|
|
45
194
|
h2. License
|
46
195
|
|
47
|
-
|
196
|
+
(The MIT License)
|
48
197
|
|
198
|
+
Copyright (c) 2008 Semin Lee
|
49
199
|
|
50
|
-
|
200
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
201
|
+
a copy of this software and associated documentation files (the
|
202
|
+
'Software'), to deal in the Software without restriction, including
|
203
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
204
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
205
|
+
permit persons to whom the Software is furnished to do so, subject to
|
206
|
+
the following conditions:
|
51
207
|
|
52
|
-
|
208
|
+
The above copyright notice and this permission notice shall be
|
209
|
+
included in all copies or substantial portions of the Software.
|
210
|
+
|
211
|
+
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
212
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
213
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
214
|
+
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
215
|
+
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
216
|
+
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
217
|
+
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|