semin-ulla 0.9.8 → 0.9.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +5 -0
- data/README.rdoc +11 -9
- data/lib/ulla/cli.rb +47 -33
- data/lib/ulla.rb +1 -1
- data/ulla.gemspec +6 -6
- metadata +5 -4
data/History.txt
CHANGED
@@ -1,3 +1,8 @@
|
|
1
|
+
== 0.9.9 09/08/2009
|
2
|
+
|
3
|
+
* Changed default value for --classdef option from 'classdef.dat' to nil
|
4
|
+
When no definition file provided, --cys (-y) 2 and --nosmooth options are applied automatically
|
5
|
+
|
1
6
|
== 0.9.8 21/07/2009
|
2
7
|
|
3
8
|
* Fixed a bug in generating amino acids symbols when --cys 2 is set
|
data/README.rdoc
CHANGED
@@ -46,7 +46,8 @@ It's pretty much the same as Kenji's subst (http://www-cryst.bioc.cam.ac.uk/~ken
|
|
46
46
|
== Options
|
47
47
|
--tem-file (-f) FILE: a tem file
|
48
48
|
--tem-list (-l) FILE: a list for tem files
|
49
|
-
--classdef (-c) FILE: a file for the defintion of
|
49
|
+
--classdef (-c) FILE: a file for the defintion of environmental class
|
50
|
+
if no definition file provided, --cys (-y) 2 and --nosmooth options automatcially applied
|
50
51
|
--outfile (-o) FILE: output filename (default 'allmat.dat')
|
51
52
|
--weight (-w) INTEGER: clustering level (PID) for the BLOSUM-like weighting (default: 60)
|
52
53
|
--noweight: calculate substitution counts with no weights
|
@@ -144,27 +145,27 @@ It's pretty much the same as Kenji's subst (http://www-cryst.bioc.cam.ac.uk/~ken
|
|
144
145
|
sample2.tem
|
145
146
|
...
|
146
147
|
|
147
|
-
4. To produce substitution count matrices,
|
148
|
+
4. To produce substitution count matrices,
|
148
149
|
|
149
150
|
~user $ ulla -l TEMLIST --output 0 -o substcount.mat
|
150
151
|
|
151
|
-
5. To produce substitution probability matrices,
|
152
|
+
5. To produce substitution probability matrices,
|
152
153
|
|
153
154
|
~user $ ulla -l TEMLIST --output 1 -o substprob.mat
|
154
155
|
|
155
|
-
6. To produce log odds ratio matrices,
|
156
|
+
6. To produce log odds ratio matrices,
|
156
157
|
|
157
158
|
~user $ ulla -l TEMLIST --output 2 -o substlogo.mat
|
158
159
|
|
159
|
-
7. To produce substitution probability matrices
|
160
|
+
7. To produce substitution probability matrices from the sequence pairs within a certain PID range (if you don't provide any name for output, 'allmat.dat' will be used.),
|
160
161
|
|
161
162
|
~user $ ulla -l TEMLIST --pidmin 60 --pidmax 80 --output 1
|
162
163
|
|
163
|
-
8. To change the clustering level (default 60) to PID 80,
|
164
|
+
8. To change the clustering level (default 60) to PID 80,
|
164
165
|
|
165
166
|
~user $ ulla -l TEMLIST --weight 80 --output 1
|
166
167
|
|
167
|
-
9. In case positions are masked with the character 'X' in any environmental features, all mutations from/to the position will be excluded from
|
168
|
+
9. In case positions are masked with the character 'X' in any environmental features, all mutations from/to the position will be excluded from substitution counts.
|
168
169
|
|
169
170
|
10. Then, it will produce a file containing all the matrices, which will look like the one below. For more details, please check this notes (http://www-cryst.bioc.cam.ac.uk/~kenji/subst/NOTES).
|
170
171
|
|
@@ -235,8 +236,6 @@ It's pretty much the same as Kenji's subst (http://www-cryst.bioc.cam.ac.uk/~ken
|
|
235
236
|
|
236
237
|
http://www-cryst.bioc.cam.ac.uk/~semin/images/myheatmaps.gif
|
237
238
|
|
238
|
-
== TODO
|
239
|
-
|
240
239
|
== Repository
|
241
240
|
|
242
241
|
You can download a pre-built RubyGems package from
|
@@ -247,6 +246,9 @@ or, You can fetch the source from
|
|
247
246
|
|
248
247
|
* github: http://github.com/semin/ulla/tree/master
|
249
248
|
|
249
|
+
== Reference
|
250
|
+
|
251
|
+
* {Lee S., Blundell T.L. (2009) Ulla: a program for calculating environment-specific amino acid substitution tables. Bioinformatics. 25(15):1976-1977; doi:10.1093/bioinformatics/btp300}[http://bioinformatics.oxfordjournals.org/cgi/content/full/25/15/1976]
|
250
252
|
|
251
253
|
== Contact
|
252
254
|
|
data/lib/ulla/cli.rb
CHANGED
@@ -23,8 +23,8 @@ module Ulla
|
|
23
23
|
# :call-seq:
|
24
24
|
# Ulla::CLI::print_usage
|
25
25
|
#
|
26
|
-
def print_usage
|
27
|
-
|
26
|
+
def print_usage(verbose=false)
|
27
|
+
usage = <<-USAGE
|
28
28
|
ulla: a program to calculate environment-specific amino acid substitution tables.
|
29
29
|
|
30
30
|
Usage:
|
@@ -32,10 +32,14 @@ Usage:
|
|
32
32
|
or
|
33
33
|
ulla [ options ] -f TEM-file -c CLASSDEF-file
|
34
34
|
|
35
|
+
USAGE
|
36
|
+
|
37
|
+
options = <<-OPTIONS
|
35
38
|
Options:
|
36
39
|
--tem-file (-f) FILE: a tem file
|
37
40
|
--tem-list (-l) FILE: a list for tem files
|
38
|
-
--classdef (-c) FILE: a file for the defintion of
|
41
|
+
--classdef (-c) FILE: a file for the defintion of environmental class
|
42
|
+
if no definition file provided, --cys (-y) 2 and --nosmooth options automatcially applied
|
39
43
|
--outfile (-o) FILE: output filename (default 'allmat.dat')
|
40
44
|
--weight (-w) INTEGER: clustering level (PID) for the BLOSUM-like weighting (default: 60)
|
41
45
|
--noweight: calculate substitution counts with no weights
|
@@ -83,7 +87,9 @@ Options:
|
|
83
87
|
--version: print version
|
84
88
|
--help (-h): show help
|
85
89
|
|
86
|
-
|
90
|
+
OPTIONS
|
91
|
+
|
92
|
+
puts (verbose ? usage + options : usage)
|
87
93
|
end
|
88
94
|
|
89
95
|
# Calculate PID between two sequences
|
@@ -157,7 +163,7 @@ Options:
|
|
157
163
|
$tem_file = nil
|
158
164
|
$environment = 0
|
159
165
|
$col_size = nil
|
160
|
-
$classdef =
|
166
|
+
$classdef = nil
|
161
167
|
$outfile = 'allmat.dat'
|
162
168
|
$outfh = nil # file hanfle for outfile
|
163
169
|
$output = 2 # default: log odds matrix
|
@@ -247,7 +253,7 @@ Options:
|
|
247
253
|
opts.each do |opt, arg|
|
248
254
|
case opt
|
249
255
|
when '--help'
|
250
|
-
print_usage
|
256
|
+
print_usage(true)
|
251
257
|
exit 0
|
252
258
|
when '--tem-list'
|
253
259
|
$tem_list = arg
|
@@ -382,9 +388,15 @@ Options:
|
|
382
388
|
# Reading Environment Class Definition File
|
383
389
|
#
|
384
390
|
|
385
|
-
# if
|
391
|
+
# if no class definition provided, set --cys (-y) option 2 and --nosmooth option true
|
392
|
+
if $classdef.nil?
|
393
|
+
$cys = 2
|
394
|
+
$nosmooth = true
|
395
|
+
end
|
396
|
+
|
397
|
+
# if --cys option 2, we don't care about 'J' (for both Cystine and Cystine)
|
386
398
|
if $cys == 2
|
387
|
-
$amino_acids
|
399
|
+
$amino_acids.delete('J')
|
388
400
|
end
|
389
401
|
|
390
402
|
# create an EnvironmentFeatureArray object for storing all environment
|
@@ -404,33 +416,35 @@ Options:
|
|
404
416
|
|
405
417
|
# read environment class definiton file and store them into
|
406
418
|
# the hash prepared above
|
407
|
-
|
419
|
+
if !!$classdef
|
420
|
+
env_index = 1
|
408
421
|
|
409
|
-
|
410
|
-
|
411
|
-
|
412
|
-
next
|
413
|
-
elsif (env_ftr = line.split(/;/)).length == 5
|
414
|
-
$logger.info "An environment feature, #{line} detected."
|
415
|
-
if env_ftr[-1] == 'T'
|
416
|
-
# skip silenced environment feature
|
417
|
-
$logger.warn "The environment feature, #{line} silent."
|
422
|
+
IO.foreach($classdef) do |line|
|
423
|
+
line.chomp!
|
424
|
+
if line.start_with?('#') || line.blank?
|
418
425
|
next
|
426
|
+
elsif (env_ftr = line.split(/;/)).length == 5
|
427
|
+
$logger.info "An environment feature, #{line} detected."
|
428
|
+
if env_ftr[-1] == 'T'
|
429
|
+
# skip silenced environment feature
|
430
|
+
$logger.warn "The environment feature, #{line} silent."
|
431
|
+
next
|
432
|
+
end
|
433
|
+
if env_ftr[-2] == 'T'
|
434
|
+
$cst_features << env_index
|
435
|
+
$logger.warn "The environment feature, #{line} constrained."
|
436
|
+
end
|
437
|
+
$env_features << EnvironmentFeature.new(env_ftr[0],
|
438
|
+
env_ftr[1].split(''),
|
439
|
+
env_ftr[2].split(''),
|
440
|
+
env_ftr[3],
|
441
|
+
env_ftr[4])
|
442
|
+
env_index += 1
|
443
|
+
else
|
444
|
+
$logger.error "\"#{line}\" doesn't seem to be a proper format for " +
|
445
|
+
"an environment class definition."
|
446
|
+
exit 1
|
419
447
|
end
|
420
|
-
if env_ftr[-2] == 'T'
|
421
|
-
$cst_features << env_index
|
422
|
-
$logger.warn "The environment feature, #{line} constrained."
|
423
|
-
end
|
424
|
-
$env_features << EnvironmentFeature.new(env_ftr[0],
|
425
|
-
env_ftr[1].split(''),
|
426
|
-
env_ftr[2].split(''),
|
427
|
-
env_ftr[3],
|
428
|
-
env_ftr[4])
|
429
|
-
env_index += 1
|
430
|
-
else
|
431
|
-
$logger.error "\"#{line}\" doesn't seem to be a proper format for " +
|
432
|
-
"an environment class definition."
|
433
|
-
exit 1
|
434
448
|
end
|
435
449
|
end
|
436
450
|
|
@@ -447,7 +461,7 @@ Options:
|
|
447
461
|
# every environment class into the hash prepared above with the label
|
448
462
|
# as a key
|
449
463
|
$env_features.label_combinations.each_with_index do |ef1, i|
|
450
|
-
key1 = ef1.flatten.join
|
464
|
+
key1 = ef1.respond_to?(:flatten) ? ef1.flatten.join : ef1
|
451
465
|
$ext_amino_acids << key1
|
452
466
|
|
453
467
|
if $environment == 0
|
data/lib/ulla.rb
CHANGED
data/ulla.gemspec
CHANGED
@@ -2,11 +2,11 @@
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |s|
|
4
4
|
s.name = %q{ulla}
|
5
|
-
s.version = "0.9.
|
5
|
+
s.version = "0.9.9"
|
6
6
|
|
7
7
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
8
8
|
s.authors = ["Semin Lee"]
|
9
|
-
s.date = %q{2009-
|
9
|
+
s.date = %q{2009-08-09}
|
10
10
|
s.default_executable = %q{ulla}
|
11
11
|
s.description = %q{'ulla' is a program for calculating environment-specific substitution tables from user providing environmental class definitions and sequence alignments with the annotations of the environment classes.}
|
12
12
|
s.email = ["seminlee@gmail.com"]
|
@@ -18,7 +18,7 @@ Gem::Specification.new do |s|
|
|
18
18
|
s.rdoc_options = ["--main", "README.rdoc"]
|
19
19
|
s.require_paths = ["lib"]
|
20
20
|
s.rubyforge_project = %q{ulla}
|
21
|
-
s.rubygems_version = %q{1.3.
|
21
|
+
s.rubygems_version = %q{1.3.5}
|
22
22
|
s.summary = %q{'ulla' is a program for calculating environment-specific substitution tables from user providing environmental class definitions and sequence alignments with the annotations of the environment classes.}
|
23
23
|
s.test_files = ["test/test_math_extensions.rb", "test/test_narray_extensions.rb", "test/test_nmatrix_extensions.rb", "test/test_string_extensions.rb", "test/ulla/test_cli.rb", "test/ulla/test_environment_class_hash.rb", "test/ulla/test_environment_feature.rb", "test/test_helper.rb", "test/test_ulla.rb"]
|
24
24
|
|
@@ -31,19 +31,19 @@ Gem::Specification.new do |s|
|
|
31
31
|
s.add_runtime_dependency(%q<bio>, [">= 1.2.1"])
|
32
32
|
s.add_runtime_dependency(%q<facets>, [">= 2.4.5"])
|
33
33
|
s.add_runtime_dependency(%q<rmagick>, [">= 2.9.1"])
|
34
|
-
s.add_development_dependency(%q<hoe>, [">= 2.3.
|
34
|
+
s.add_development_dependency(%q<hoe>, [">= 2.3.3"])
|
35
35
|
else
|
36
36
|
s.add_dependency(%q<narray>, [">= 0.5.9.5"])
|
37
37
|
s.add_dependency(%q<bio>, [">= 1.2.1"])
|
38
38
|
s.add_dependency(%q<facets>, [">= 2.4.5"])
|
39
39
|
s.add_dependency(%q<rmagick>, [">= 2.9.1"])
|
40
|
-
s.add_dependency(%q<hoe>, [">= 2.3.
|
40
|
+
s.add_dependency(%q<hoe>, [">= 2.3.3"])
|
41
41
|
end
|
42
42
|
else
|
43
43
|
s.add_dependency(%q<narray>, [">= 0.5.9.5"])
|
44
44
|
s.add_dependency(%q<bio>, [">= 1.2.1"])
|
45
45
|
s.add_dependency(%q<facets>, [">= 2.4.5"])
|
46
46
|
s.add_dependency(%q<rmagick>, [">= 2.9.1"])
|
47
|
-
s.add_dependency(%q<hoe>, [">= 2.3.
|
47
|
+
s.add_dependency(%q<hoe>, [">= 2.3.3"])
|
48
48
|
end
|
49
49
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: semin-ulla
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.9.
|
4
|
+
version: 0.9.9
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Semin Lee
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-
|
12
|
+
date: 2009-08-09 00:00:00 -07:00
|
13
13
|
default_executable: ulla
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
@@ -60,7 +60,7 @@ dependencies:
|
|
60
60
|
requirements:
|
61
61
|
- - ">="
|
62
62
|
- !ruby/object:Gem::Version
|
63
|
-
version: 2.3.
|
63
|
+
version: 2.3.3
|
64
64
|
version:
|
65
65
|
description: "'ulla' is a program for calculating environment-specific substitution tables from user providing environmental class definitions and sequence alignments with the annotations of the environment classes."
|
66
66
|
email:
|
@@ -115,6 +115,7 @@ files:
|
|
115
115
|
- website/template.html.erb
|
116
116
|
has_rdoc: false
|
117
117
|
homepage: http://www-cryst.bioc.cam.ac.uk/ulla
|
118
|
+
licenses:
|
118
119
|
post_install_message: PostInstall.txt
|
119
120
|
rdoc_options:
|
120
121
|
- --main
|
@@ -136,7 +137,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
136
137
|
requirements: []
|
137
138
|
|
138
139
|
rubyforge_project: ulla
|
139
|
-
rubygems_version: 1.
|
140
|
+
rubygems_version: 1.3.5
|
140
141
|
signing_key:
|
141
142
|
specification_version: 3
|
142
143
|
summary: "'ulla' is a program for calculating environment-specific substitution tables from user providing environmental class definitions and sequence alignments with the annotations of the environment classes."
|