macroape 3.3.4 → 3.3.5

Sign up to get free protection for your applications and to get access to all the features.
data/README.md CHANGED
@@ -20,7 +20,7 @@ Or install it yourself as:
20
20
  For more information read manual at https://docs.google.com/document/pub?id=1_jsxhMNzMzy4d2d_byAd3n6Szg5gEcqG_Sf7w9tEqWw (not last version but comprehensive description of approach)
21
21
 
22
22
  ## Basic usage as a command-line tool
23
- MacroAPE have 6 command line tools:
23
+ MacroAPE have 7 command line tools:
24
24
 
25
25
  ### Tools for calculating thresholds and pvalues:
26
26
  * find_threshold \<PWM file\> [-p \<pvalue\> (by default: 0.0005)]
@@ -34,8 +34,12 @@ Or install it yourself as:
34
34
  * preprocess_collection \<folder with motif files\> [-o \<collection output file\>]
35
35
  * scan_collection \<query PWM file\> \<collection file\>
36
36
 
37
+ ### Tool for finding mutual alignment of several motifs relative to first(leader) motif. It's designed to use with sequence_logo to draw logos of clusters
38
+ * align_motifs \<pwm_leader\> \<pwm_2\> \<pwm_3\> ...
39
+
37
40
  Also you can use -h option to print help for a tool in console.
38
41
  There are lots of different command line options. Most useful option is -d <discretization=1|10|100|1000>. You can vary precision/speed rate by specifing a discretization. For more information look through a manual.
42
+ Some of tools also can process PCMs in addition to PWMs.
39
43
 
40
44
  ## Basic usage in your code
41
45
  require 'macroape'
@@ -51,7 +55,7 @@ Or install it yourself as:
51
55
  similarity_info = cmp.jaccard(first_threshold, second_threshold)
52
56
  puts "Jaccard similarity: #{similarity_info[:similarity]}"
53
57
 
54
- For more details look a source code of utilities in lib/exec/ folder
58
+ For more details look a source code of utilities in lib/macroape/cli/ folder
55
59
 
56
60
  ## Contributing
57
61
 
data/TODO.txt CHANGED
@@ -1,6 +1,3 @@
1
- Absolutely necessary:
2
- (already work in preprocess_colleсtion and align_motifs) Make it available to load PCM files with (it should be first preprocessed to PWMs in a standardized way) -- may be it's better to use pipeline
3
-
4
1
  Specs and tests:
5
2
  create spec on use of MaxHashSize, MaxHashSizeDouble
6
3
  create spec for testing case when {real_pvalue == 0, threshold == best_score + 1}
@@ -17,5 +14,5 @@ Usability issues:
17
14
 
18
15
  remove .stdin placeholder. Use tty? method instead
19
16
 
20
- use OptionParser (??? can OptionParser get stub ARGV ???)
17
+ use OptionParser or docopt
21
18
  make options more uniform so that some of them were reusable(and the question: can I apply two option parsers consequently?)
@@ -44,7 +44,8 @@ module Macroape
44
44
  second_background = [1,1,1,1]
45
45
  max_hash_size = 1000000
46
46
  max_pair_hash_size = 1000
47
-
47
+
48
+ data_model = argv.delete('--pcm') ? Bioinform::PCM : Bioinform::PWM
48
49
 
49
50
  first_file = argv.shift
50
51
  second_file = argv.shift
@@ -93,18 +94,20 @@ module Macroape
93
94
  parser = Bioinform::StringParser.new($stdin.read) if first_file == '.stdin' || second_file == '.stdin'
94
95
 
95
96
  if first_file == '.stdin'
96
- pwm_first = Bioinform::PWM.new( parser.parse )
97
+ input_first = parser.parse
97
98
  else
98
99
  raise "Error! File #{first_file} don't exist" unless File.exist?(first_file)
99
- pwm_first = Bioinform::PWM.new(File.read(first_file))
100
+ input_first = File.read(first_file)
100
101
  end
102
+ pwm_first = data_model.new(input_first).to_pwm
101
103
 
102
104
  if second_file == '.stdin'
103
- pwm_second = Bioinform::PWM.new( parser.parse )
105
+ input_second = parser.parse
104
106
  else
105
107
  raise "Error! File #{second_file} don't exist" unless File.exist?(second_file)
106
- pwm_second = Bioinform::PWM.new(File.read(second_file))
108
+ input_second = File.read(second_file)
107
109
  end
110
+ pwm_second = data_model.new(input_second).to_pwm
108
111
 
109
112
  pwm_first.background!(first_background).max_hash_size!(max_hash_size).discrete!(discretization)
110
113
  pwm_second.background!(second_background).max_hash_size!(max_hash_size).discrete!(discretization)
@@ -47,7 +47,7 @@ module Macroape
47
47
  max_hash_size = 1000000
48
48
  max_pair_hash_size = 1000
49
49
 
50
-
50
+ data_model = argv.delete('--pcm') ? Bioinform::PCM : Bioinform::PWM
51
51
  first_file = argv.shift
52
52
  second_file = argv.shift
53
53
  raise "You'd specify two input sources (each is filename or .stdin)" unless first_file and second_file
@@ -76,18 +76,20 @@ module Macroape
76
76
  parser = Bioinform::StringParser.new($stdin.read) if first_file == '.stdin' || second_file == '.stdin'
77
77
 
78
78
  if first_file == '.stdin'
79
- pwm_first = Bioinform::PWM.new( parser.parse )
79
+ input_first = parser.parse
80
80
  else
81
81
  raise "Error! File #{first_file} don't exist" unless File.exist?(first_file)
82
- pwm_first = Bioinform::PWM.new(File.read(first_file))
82
+ input_first = File.read(first_file)
83
83
  end
84
+ pwm_first = data_model.new(input_first).to_pwm
84
85
 
85
86
  if second_file == '.stdin'
86
- pwm_second = Bioinform::PWM.new( parser.parse )
87
+ input_second = parser.parse
87
88
  else
88
89
  raise "Error! File #{second_file} don't exist" unless File.exist?(second_file)
89
- pwm_second = Bioinform::PWM.new(File.read(second_file))
90
+ input_second = File.read(second_file)
90
91
  end
92
+ pwm_second = data_model.new(input_second).to_pwm
91
93
 
92
94
  pwm_first.background!(first_background).max_hash_size!(max_hash_size).discrete!(discretization)
93
95
  pwm_second.background!(second_background).max_hash_size!(max_hash_size).discrete!(discretization)
@@ -41,6 +41,7 @@ module Macroape
41
41
  thresholds = []
42
42
  max_hash_size = 1000000
43
43
 
44
+ data_model = argv.delete('--pcm') ? Bioinform::PCM : Bioinform::PWM
44
45
  filename = argv.shift
45
46
 
46
47
  loop do
@@ -68,11 +69,12 @@ module Macroape
68
69
 
69
70
 
70
71
  if filename == '.stdin'
71
- pwm = Bioinform::PWM.new( $stdin.read )
72
+ input = $stdin.read
72
73
  else
73
74
  raise "Error! File #{filename} doesn't exist" unless File.exist?(filename)
74
- pwm = Bioinform::PWM.new( File.read(filename) )
75
+ input = File.read(filename)
75
76
  end
77
+ pwm = data_model.new(input).to_pwm
76
78
  pwm.background!(background).max_hash_size!(max_hash_size).discrete!(discretization)
77
79
 
78
80
  counts = pwm.counts_by_thresholds(* thresholds.map{|count| count * discretization})
@@ -36,6 +36,7 @@ module Macroape
36
36
  default_pvalues = [0.0005]
37
37
  discretization = 10000
38
38
  max_hash_size = 1000000
39
+ data_model = argv.delete('--pcm') ? Bioinform::PCM : Bioinform::PWM
39
40
 
40
41
  filename = argv.shift
41
42
  raise "No input. You'd specify input source: filename or .stdin" unless filename
@@ -63,12 +64,12 @@ module Macroape
63
64
  pvalues = default_pvalues if pvalues.empty?
64
65
 
65
66
  if filename == '.stdin'
66
- pwm = Bioinform::PWM.new( $stdin.read )
67
+ input = $stdin.read
67
68
  else
68
69
  raise "Error! File #{filename} doesn't exist" unless File.exist?(filename)
69
- pwm = Bioinform::PWM.new( File.read(filename) )
70
+ input = File.read(filename)
70
71
  end
71
-
72
+ pwm = data_model.new(input).to_pwm
72
73
  pwm.background!(background).max_hash_size!(max_hash_size).discrete!(discretization)
73
74
 
74
75
  pwm.thresholds(*pvalues) do |pvalue, threshold, real_pvalue|
@@ -74,7 +74,7 @@ module Macroape
74
74
  collection = Macroape::Collection.new(rough_discretization, precise_discretization, background, pvalues)
75
75
 
76
76
  if File.directory?(data_source)
77
- motifs = Dir.glob(File.join(data_source,'*')).map do |filename|
77
+ motifs = Dir.glob(File.join(data_source,'*')).map do |filename|
78
78
  pwm = data_model.new(File.read(filename))
79
79
  pwm.name ||= File.basename(filename, File.extname(filename))
80
80
  pwm
@@ -36,6 +36,7 @@ module Macroape
36
36
  exit
37
37
  end
38
38
 
39
+ data_model = argv.delete('--pcm') ? Bioinform::PCM : Bioinform::PWM
39
40
  filename = argv.shift
40
41
  collection_file = argv.shift
41
42
  raise "No input. You'd specify input source for pat: filename or .stdin" unless filename
@@ -82,12 +83,13 @@ module Macroape
82
83
  raise "Thresholds for pvalue #{pvalue} aren't presented in collection (#{collection.pvalues.join(', ')}). Use one of listed pvalues or recalculate the collection with needed pvalue" unless collection.pvalues.include? pvalue
83
84
 
84
85
  if filename == '.stdin'
85
- query_pwm = Bioinform::PWM.new( $stdin.read )
86
+ query_input = $stdin.read
86
87
  else
87
88
  raise "Error! File #{filename} doesn't exist" unless File.exist?(filename)
88
- query_pwm = Bioinform::PWM.new(File.read(filename))
89
+ query_input = File.read(filename)
89
90
  end
90
91
 
92
+ query_pwm = data_model.new(query_input).to_pwm
91
93
  query_pwm.background(background_query).max_hash_size(max_hash_size)
92
94
 
93
95
  query_pwm_rough = query_pwm.discrete(collection.rough_discretization)
@@ -1,3 +1,3 @@
1
1
  module Macroape
2
- VERSION = "3.3.4"
2
+ VERSION = "3.3.5"
3
3
  end
@@ -1,6 +1,10 @@
1
1
  require 'test_helper'
2
2
 
3
3
  class TestEvalAlignment < Test::Unit::TestCase
4
+ def test_process_pcm_files
5
+ assert_equal "0.2420758234928527\n779.0\t11\n.>>>>>>>>>>\n>>>>>>>>>>>\n-1\tdirect\n", Helpers.eval_alignment_output('test/data/KLF4_f2.pcm test/data/SP1_f1.pcm -1 direct --pcm')
6
+ end
7
+
4
8
  def test_process_at_optimal_alignment
5
9
  assert_equal "0.2420758234928527\n779.0\t11\n.>>>>>>>>>>\n>>>>>>>>>>>\n-1\tdirect\n", Helpers.eval_alignment_output('test/data/KLF4_f2.pat test/data/SP1_f1.pat -1 direct')
6
10
  end
@@ -1,6 +1,9 @@
1
1
  require 'test_helper'
2
2
 
3
3
  class TestEvalSimilarity < Test::Unit::TestCase
4
+ def test_process_pair_of_pcms
5
+ assert_equal "0.2420758234928527\n779.0\t11\n.>>>>>>>>>>\n>>>>>>>>>>>\n-1\tdirect\n", Helpers.eval_similarity_output('test/data/KLF4_f2.pcm test/data/SP1_f1.pcm --pcm')
6
+ end
4
7
  def test_process_pair_of_pwms
5
8
  assert_equal "0.2420758234928527\n779.0\t11\n.>>>>>>>>>>\n>>>>>>>>>>>\n-1\tdirect\n", Helpers.eval_similarity_output('test/data/KLF4_f2.pat test/data/SP1_f1.pat')
6
9
  end
@@ -1,6 +1,9 @@
1
1
  require 'test_helper'
2
2
 
3
3
  class FindPvalueTest < Test::Unit::TestCase
4
+ def test_process_pcm
5
+ assert_equal "4.1719\t1048.0\t0.00099945068359375\n", Helpers.find_pvalue_output('test/data/KLF4_f2.pcm 4.1719 --pcm')
6
+ end
4
7
  def test_process_one_threshold
5
8
  assert_equal "4.1719\t1048.0\t0.00099945068359375\n", Helpers.find_pvalue_output('test/data/KLF4_f2.pat 4.1719')
6
9
  end
@@ -10,6 +10,11 @@ class FindThresholdTest < Test::Unit::TestCase
10
10
  }
11
11
  assert_equal pvalues, ['0.0005', '0.001']
12
12
  end
13
+ def test_process_pcm
14
+ pvalue, threshold, real_pvalue = Helpers.find_threshold_output('test/data/KLF4_f2.pcm -p 0.001 --pcm').strip.split("\t")
15
+ assert_equal '0.001', pvalue
16
+ assert_equal Helpers.obtain_pvalue_by_threshold("test/data/KLF4_f2.pat #{threshold}"), real_pvalue
17
+ end
13
18
  def test_process_one_pvalue
14
19
  pvalue, threshold, real_pvalue = Helpers.find_threshold_output('test/data/KLF4_f2.pat -p 0.001').strip.split("\t")
15
20
  assert_equal '0.001', pvalue
@@ -1,6 +1,10 @@
1
1
  require 'test_helper'
2
2
 
3
3
  class TestScanCollection < Test::Unit::TestCase
4
+ def test_scan_pcm
5
+ assert_equal File.read('test/data/KLF4_f2_scan_results_default_cutoff.txt').gsub("\r\n", "\n"),
6
+ Helpers.scan_collection_output('test/data/KLF4_f2.pcm test/data/test_collection.yaml --silent --pcm').gsub("\r\n","\n")
7
+ end
4
8
  def test_scan_default_cutoff
5
9
  assert_equal File.read('test/data/KLF4_f2_scan_results_default_cutoff.txt').gsub("\r\n", "\n"),
6
10
  Helpers.scan_collection_output('test/data/KLF4_f2.pat test/data/test_collection.yaml --silent').gsub("\r\n","\n")
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: macroape
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.3.4
4
+ version: 3.3.5
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-09-01 00:00:00.000000000 Z
12
+ date: 2012-09-04 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bioinform