egor 0.0.5 → 0.9.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,17 @@
1
+ module StringExtensions
2
+
3
+ def remove_internal_spaces
4
+ gsub(/[\n|\r|\s]+/, '')
5
+ end
6
+
7
+ def rgb_to_integer
8
+ if self.length == 7 # '#FF00FF'
9
+ Integer(self.gsub('#', '0x'))
10
+ else
11
+ raise "#{self} doesn't seem to be a proper RGB code."
12
+ end
13
+ end
14
+ end
15
+
16
+ String.send :include, StringExtensions
17
+
@@ -0,0 +1,9 @@
1
+ require File.join(File.dirname(__FILE__), "..", "test_helper.rb")
2
+ require 'egor/cli'
3
+
4
+ class TestEgorCli < Test::Unit::TestCase
5
+ def test_execute
6
+ #Egor::CLI.execute
7
+ assert(true)
8
+ end
9
+ end
@@ -0,0 +1,25 @@
1
+ $:.unshift File.join(File.dirname(__FILE__), '..', '..', 'lib', 'egor')
2
+
3
+ require 'test/unit'
4
+ require 'environment_class_hash'
5
+
6
+ class TestEnvironmentClassHash < Test::Unit::TestCase
7
+
8
+ include Egor
9
+
10
+ def setup
11
+ @env_cls = EnvironmentClassHash.new
12
+ end
13
+
14
+ def test_group_by_non_residue_labels
15
+ assert(true)
16
+ end
17
+
18
+ def test_groups_sorted_by_residue_labels
19
+ assert(true)
20
+ end
21
+
22
+ def group_size
23
+ assert(true)
24
+ end
25
+ end
@@ -0,0 +1,29 @@
1
+ $:.unshift File.join(File.dirname(__FILE__), '..', '..', 'lib', 'egor')
2
+
3
+ require 'test/unit'
4
+ require 'environment_feature'
5
+
6
+ class TestEnvironmentFeature < Test::Unit::TestCase
7
+
8
+ include Egor
9
+
10
+ def setup
11
+ @env_ftr = EnvironmentFeature.new('Secondary Structure',
12
+ 'HEPC'.split(''),
13
+ 'HEPC'.split(''),
14
+ 'T',
15
+ 'F')
16
+ end
17
+
18
+ def test_to_s
19
+ assert_equal('Secondary Structure;HEPC;HEPC;T;F', @env_ftr.to_s)
20
+ end
21
+
22
+ def test_constrained?
23
+ assert(@env_ftr.constrained?)
24
+ end
25
+
26
+ def silent?
27
+ assert(!@env_ftr.silent?)
28
+ end
29
+ end
@@ -0,0 +1,11 @@
1
+ $:.unshift File.join(File.dirname(__FILE__), "..", "lib")
2
+
3
+ require 'test/unit'
4
+ require 'math_extensions'
5
+
6
+ class TestMathExtensions < Test::Unit::TestCase
7
+
8
+ def test_log2
9
+ assert_equal(1, Math::log2(2))
10
+ end
11
+ end
@@ -0,0 +1,14 @@
1
+ $:.unshift File.join(File.dirname(__FILE__), "..", "lib")
2
+
3
+ require "test/unit"
4
+ require "narray_extensions"
5
+
6
+ class TestArrayExtensions < Test::Unit::TestCase
7
+
8
+ def test_pretty_string(opts={})
9
+ m = NArray.float(3).indgen
10
+ result ="# A B C\n" +
11
+ "VAL 0.00 1.00 2.00"
12
+ assert_equal(result, m.pretty_string(:col_header => %w[A B C], :row_header => 'VAL'))
13
+ end
14
+ end
@@ -0,0 +1,11 @@
1
+ $:.unshift File.join(File.dirname(__FILE__), "..", "lib")
2
+
3
+ require "test/unit"
4
+ require "string_extensions"
5
+
6
+ class TestStringExtensions < Test::Unit::TestCase
7
+
8
+ def test_remove_internal_spaces
9
+ assert_equal("hellosemin", "he ll o\r\n sem in\r \n".remove_internal_spaces)
10
+ end
11
+ end
@@ -2,86 +2,15 @@
2
2
  "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
3
3
  <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
4
4
  <head>
5
- <link rel="stylesheet" href="stylesheets/screen.css" type="text/css" media="screen" />
6
- <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
5
+ <meta http-equiv="Refresh" content="5; URL=http://github.com/semin/egor">
7
6
  <title>
8
- egor: Environment-specific substitution table GeneratOR
7
+ egor: Esst GeneratOR, a program for calculating environment-specific substitution tables
9
8
  </title>
10
- <script src="javascripts/rounded_corners_lite.inc.js" type="text/javascript"></script>
11
- <style>
12
-
13
- </style>
14
- <script type="text/javascript">
15
- window.onload = function() {
16
- settings = {
17
- tl: { radius: 10 },
18
- tr: { radius: 10 },
19
- bl: { radius: 10 },
20
- br: { radius: 10 },
21
- antiAlias: true,
22
- autoPad: true,
23
- validTags: ["div"]
24
- }
25
- var versionBox = new curvyCorners(settings, document.getElementById("version"));
26
- versionBox.applyCornersToAll();
27
- }
28
- </script>
29
9
  </head>
30
10
  <body>
31
- <div id="main">
32
-
33
- <h1>egor: Environment-specific substitution table GeneratOR</h1>
34
- <div class="sidebar">
35
- <div id="version" class="clickable" onclick='document.location = "http://rubyforge.org/projects/egor"; return false'>
36
- <p>Get Version</p>
37
- <a href="http://rubyforge.org/projects/egor" class="numbers">0.0.5</a>
38
- </div>
39
- </div>
40
- <h2>What</h2>
41
- <p>&#8216;egor&#8217; is a program for calculating environment-specific substitution tables</p>
42
- <h2>Features</h2>
43
- <ul>
44
- <li>No more segmentation fault</li>
45
- <li>Fast enough not to leave your place</li>
46
- <li>Slow enough to check your emails or have some chats with your colleagues next you</li>
47
- <li>Full smoothing supported</li>
48
- <li>In theory, infinite number of environment features can be handled</li>
49
- </ul>
50
- <h2>Installation</h2>
51
- <p><pre class='syntax'><span class="global">$ </span><span class="ident">sudo</span> <span class="ident">gem</span> <span class="ident">install</span> <span class="ident">egor</span></pre></p>
52
- <h2>Demonstration of usage</h2>
53
- <p>It&#8217;s pretty much the same as Kenji&#8217;s subst, so in most cases, you can swap &#8216;subst&#8217; with &#8216;egor&#8217;.</p>
54
- <pre>$ egor -l TEMLIST-file -c classdef.dat</pre>
55
- or
56
- <pre>$ egor -l TEM-file -c classdef.dat</pre>
57
- <h2>Repository</h2>
58
- <p>You can download a pre-built RubyGems package from</p>
59
- <ul>
60
- <li>rubyforge: <a href="http://rubyforge.org/projects/egor">http://rubyforge.org/projects/egor</a></li>
61
- </ul>
62
- <p>or, You can fetch the source from</p>
63
- <ul>
64
- <li>github: <a href="http://github.com/semin/egor/tree/master">http://github.com/semin/egor/tree/master</a></li>
65
- </ul>
66
- <pre>$ git clone git://github.com/semin/egor.git</pre>
67
- <h2>License</h2>
68
- <p>This code is free to use under the terms of the <span class="caps">MIT</span> license.</p>
69
- <h2>Contact</h2>
70
- <p>Comments are welcome, please send an email to me (seminlee at gmail dot com).</p>
71
- <p class="coda">
72
- Semin Lee, 8th December 2008<br>
73
- Theme extended from <a href="http://rb2js.rubyforge.org/">Paul Battley</a>
11
+ <p>
12
+ An official web site for Egor will be available when our new web server is ready. Until then, this page will be redirected to its Git repository, <a href="http://rubyforge.org/projects/egor/">http://github.com/semin/egor</a> in 5 seconds.
74
13
  </p>
75
- </div>
76
-
77
- <!-- insert site tracking codes here, like Google Urchin -->
78
- <script type="text/javascript">
79
- var gaJsHost = (("https:" == document.location.protocol) ? "https://ssl." : "http://www.");
80
- document.write(unescape("%3Cscript src='" + gaJsHost + "google-analytics.com/ga.js' type='text/javascript'%3E%3C/script%3E"));
81
- </script>
82
- <script type="text/javascript">
83
- var pageTracker = _gat._getTracker("UA-6291956-1");
84
- pageTracker._trackPageview();
85
- </script>
14
+ - Semin
86
15
  </body>
87
16
  </html>
@@ -1,32 +1,178 @@
1
- h1. egor: Environment-specific substitution table GeneratOR
1
+ h1. egor: Esst GeneratOR, a program for calculating environment-specific substitution tables
2
2
 
3
+ h2. Description
3
4
 
4
- h2. What
5
-
6
- 'egor' is a program for calculating environment-specific substitution tables
5
+ 'egor' is a program for calculating environment-specific substitution tables from user providing environmental class definitions and sequence alignments with the annotations of the environment classes.
7
6
 
8
7
 
9
8
  h2. Features
10
9
 
11
- * No more segmentation fault
12
- * Fast enough not to leave your place
13
- * Slow enough to check your emails or have some chats with your colleagues next you
14
- * Full smoothing supported
15
- * In theory, infinite number of environment features can be handled
10
+ * Environment-specific substitution table generation based on user providing environmental class definition
11
+ * Entropy-based smoothing procedures to cope with sparse data problem
12
+ * BLOSUM-like weighting procedures using PID threshold
13
+ * Both unidirectional and bidirectional substitution matirces can be generated
16
14
 
17
15
 
18
16
  h2. Installation
19
17
 
20
- <pre syntax="ruby">$ sudo gem install egor</pre>
18
+ <pre>
19
+ ~user $ sudo gem install egor
20
+ </pre>
21
+
22
+
23
+ h2. Requirements
24
+
25
+ * ruby 1.8.7 or above (http://www.ruby-lang.org)
26
+ * rubygems 1.2.0 or above (http://rubyforge.org/projects/rubygems/)
21
27
 
28
+ Following RubyGems will be automatically installed if you have rubygems installed on your machine
22
29
 
23
- h2. Demonstration of usage
30
+ * narray (http://narray.rubyforge.org/)
31
+ * facets (http://facets.rubyforge.org/)
32
+ * bio (http://bioruby.open-bio.org/)
33
+ * simple_memoize (http://github.com/JackDanger/simple_memoize/tree/master)
24
34
 
25
- It's pretty much the same as Kenji's subst, so in most cases, you can swap 'subst' with 'egor'.
26
35
 
27
- <pre>$ egor -l TEMLIST-file -c classdef.dat</pre>
36
+ h2. Basic Usage
37
+
38
+ It's pretty much the same as Kenji's subst (http://www-cryst.bioc.cam.ac.uk/~kenji/subst/), so in most cases, you can swap 'subst' with 'egor'.
39
+
40
+ <pre>
41
+ ~user $ egor -l TEMLIST-file -c classdef.dat
28
42
  or
29
- <pre>$ egor -l TEM-file -c classdef.dat</pre>
43
+ ~user $ egor -l TEM-file -c classdef.dat
44
+ </pre>
45
+
46
+
47
+ h2. Options
48
+
49
+ <pre>
50
+ --tem-file (-f) FILE: a tem file
51
+ --tem-list (-l) FILE: a list for tem files
52
+ --classdef (-c) FILE: a file for the defintion of environments (default: 'classdef.dat')
53
+ --outfile (-o) FILE: output filename (default 'allmat.dat')
54
+ --weight (-w) INTEGER: clustering level (PID) for the BLOSUM-like weighting (default: 60)
55
+ --noweight: calculate substitution count with no weights
56
+ --smooth (-s) INTEGER:
57
+ 0 for partial smoothing (default)
58
+ 1 for full smoothing
59
+ --p1smooth: perform smoothing for p1 probability calculation when partial smoothing
60
+ --nosmooth: perform no smoothing operation
61
+ --cys (-y) INTEGER:
62
+ 0 for using C and J only for structure (default)
63
+ 1 for both structure and sequence
64
+ 2 for using only C for both (must be set when you have no 'disulphide' or 'disulfide' annotation in templates)
65
+ --output INTEGER:
66
+ 0 for raw count (no smoothing performed)
67
+ 1 for probabilities
68
+ 2 for log odds ratios (default)
69
+ --noroundoff: do not round off log odds ratio
70
+ --scale INTEGER: log odds ratio matrices in 1/n bit units (default 3)
71
+ --sigma DOUBLE: change the sigma value for smoothing (default 5.0)
72
+ --autosigma: automatically adjust the sigma value for smoothing
73
+ --add DOUBLE: add this value to raw count when deriving log odds ratios without smoothing (default 1/#classes)
74
+ --penv: use environment-dependent frequencies for log odds ratio calculation (default false) (NOT implemented yet!!!)
75
+ --pidmin DOUBLE: count substitutions only for pairs with PID equal to or greater than this value (default none)
76
+ --pidmax DOUBLE: count substitutions only for pairs with PID smaller than this value (default none)
77
+ --verbose (-v) INTEGER
78
+ 0 for ERROR level
79
+ 1 for WARN or above level (default)
80
+ 2 for INFO or above level
81
+ 3 for DEBUG or above level
82
+ --version: print version
83
+ --help (-h): show help
84
+ </pre>
85
+
86
+
87
+ h2. Usage
88
+
89
+ h4. 1. Prepare an environmental class definition file.
90
+
91
+ <pre>
92
+ ~user $ cat classdef.dat
93
+ #
94
+ # name of feature (string); values adopted in .tem file (string); class labels assigned for each value (string);\
95
+ # constrained or not (T or F); silent (used as masks)? (T or F)
96
+ #
97
+ secondary structure and phi angle;HEPC;HEPC;T;F
98
+ solvent accessibility;TF;Aa;F;F
99
+ hydrogen bond to other sidechain/heterogen;TF;Ss;F;F
100
+ hydrogen bond to mainchain CO;TF;Oo;F;F
101
+ hydrogen bond to mainchain NH;TF;Nn;F;F
102
+ </pre>
103
+
104
+ h4. 2. Prepare structural alignments and their annotations of above environmental classes in PIR format.
105
+
106
+ <pre>
107
+ ~user $ cat sample1.tem
108
+ >P1;1mnma
109
+ sequence
110
+ QKERRKIEIKFIENKTRRHVTFSKRKHGIMKKAFELSVLTGTQVLLLVVSETGLVYTFSTPKFEPIVTQQEGRNL
111
+ IQACLNAPDD*
112
+ >P1;1egwa
113
+ sequence
114
+ --GRKKIQITRIMDERNRQVTFTKRKFGLMKKAYELSVLCDCEIALIIFNSSNKLFQYASTDMDKVLLKYTEY--
115
+ ----------*
116
+ >P1;1mnma
117
+ secondary structure and phi angle
118
+ CPCCCCCCCCCCCCHHHHHHHHHHHHHHHHHHHHHHHHHHPCCCEEEEECCCPCEEEEECCCCCHHHHCHHHHHH
119
+ HHHHHCCCCP*
120
+ >P1;1egwa
121
+ secondary structure and phi angle
122
+ --CCCCCCCCCCCCHHHHHHHHHHHHHHHHHHHHHHHHHCPCCCEEEEECCCPCEEEEECCCHHHHHHHHHHC--
123
+ ----------*
124
+ >P1;1mnma
125
+ solvent accessibility
126
+ TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTFTTTTTTTTTTTTTTTT
127
+ TTTTTTTTTT*
128
+ >P1;1egwa
129
+ solvent accessibility
130
+ --TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTFTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT--
131
+ ----------*
132
+ ...
133
+ </pre>
134
+
135
+ h4. 3. When you have two or more alignment files, you should make a separate file containing all the paths for the alignment files.
136
+
137
+ <pre>
138
+ ~user $ ls -1 *.tem > TEMLIST
139
+ ~user $ cat TEMLIST
140
+ sample1.tem
141
+ sample2.tem
142
+ ...
143
+ </pre>
144
+
145
+ h4. 4. To produce substitution count matrices, type
146
+
147
+ <pre>
148
+ ~user $ egor -l TEMLIST --output 0 -o substcount.mat
149
+ </pre>
150
+
151
+ h4. 5. To produce substitution probability matrices, type
152
+
153
+ <pre>
154
+ ~user $ egor -l TEMLIST --output 1 -o substprob.mat
155
+ </pre>
156
+
157
+ h4. 6. To produce log odds ratio matrices, type
158
+
159
+ <pre>
160
+ ~user $ egor -l TEMLIST --output 2 -o substlogo.mat
161
+ </pre>
162
+
163
+ h4. 7. To produce substitution data only from the sequence pairs within a given PID range, type (if you don't provide any name for output, 'allmat.dat' will be used.)
164
+
165
+ <pre>
166
+ ~user $ egor -l TEMLIST --pidmin 60 --pidmax 80 --output 1
167
+ </pre>
168
+
169
+ h4. 8. To change the clustering level (default 60), type
170
+
171
+ <pre>
172
+ ~user $ egor -l TEMLIST --weight 80 --output 2
173
+ </pre>
174
+
175
+ h4. 9. In case any positions are masked with the character 'X' in any environmental feature will be excluded from the calculation of substitution counts.
30
176
 
31
177
 
32
178
  h2. Repository
@@ -39,14 +185,33 @@ or, You can fetch the source from
39
185
 
40
186
  * github: "http://github.com/semin/egor/tree/master":http://github.com/semin/egor/tree/master
41
187
 
42
- <pre>$ git clone git://github.com/semin/egor.git</pre>
188
+
189
+ h2. Contact
190
+
191
+ Comments are welcome, please send an email to me (seminlee at gmail dot com).
43
192
 
44
193
 
45
194
  h2. License
46
195
 
47
- This code is free to use under the terms of the MIT license.
196
+ (The MIT License)
48
197
 
198
+ Copyright (c) 2008 Semin Lee
49
199
 
50
- h2. Contact
200
+ Permission is hereby granted, free of charge, to any person obtaining
201
+ a copy of this software and associated documentation files (the
202
+ 'Software'), to deal in the Software without restriction, including
203
+ without limitation the rights to use, copy, modify, merge, publish,
204
+ distribute, sublicense, and/or sell copies of the Software, and to
205
+ permit persons to whom the Software is furnished to do so, subject to
206
+ the following conditions:
51
207
 
52
- Comments are welcome, please send an email to me (seminlee at gmail dot com).
208
+ The above copyright notice and this permission notice shall be
209
+ included in all copies or substantial portions of the Software.
210
+
211
+ THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
212
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
213
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
214
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
215
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
216
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
217
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.