macroape 3.3.1 → 3.3.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -114,25 +114,12 @@ begin
114
114
 
115
115
  pwm_second.reverse_complement! if reverse
116
116
 
117
- first_pwm_alignment = '.' * [-shift, 0].max + '>' * pwm_first.length
118
- second_pwm_alignment = '.' * [shift, 0].max + (orientation == :direct ? '>' : '<') * pwm_second.length
119
- overlap = [pwm_first.length + [-shift,0].max, pwm_second.length + [shift,0].max].min - shift.abs
120
- alignment_length = [first_pwm_alignment.length, second_pwm_alignment.length].max
121
- (first_pwm_alignment.length...alignment_length).each{|i| first_pwm_alignment[i] = '.'}
122
- (second_pwm_alignment.length...alignment_length).each{|i| second_pwm_alignment[i] = '.'}
123
-
124
- cmp = Macroape::PWMCompareAligned.new(pwm_first.left_augment([-shift,0].max),
125
- pwm_second.left_augment([shift,0].max))
126
-
117
+ cmp = Macroape::PWMCompareAligned.new(pwm_first, pwm_second, shift, orientation)
118
+
127
119
  first_threshold = pwm_first.threshold(pvalue)
128
120
  second_threshold = pwm_second.threshold(pvalue)
129
121
 
130
- info = cmp.jaccard(first_threshold, second_threshold).merge(
131
- text: "#{first_pwm_alignment}\n#{second_pwm_alignment}",
132
- shift: shift,
133
- orientation: orientation,
134
- overlap: overlap,
135
- alignment_length: alignment_length)
122
+ info = cmp.alignment_infos.merge( cmp.jaccard(first_threshold, second_threshold) )
136
123
 
137
124
  puts "#{info[:similarity]}\n#{info[:recognized_by_both]}\t#{info[:alignment_length]}\n#{info[:text]}\n#{info[:shift]}\t#{info[:orientation]}"
138
125
 
@@ -74,18 +74,23 @@ begin
74
74
  Dir.glob(File.join(folder,'*')) do |filename|
75
75
  STDERR.puts filename unless silent
76
76
  pwm = Bioinform::PWM.new(File.read(filename))
77
+ pwm.name ||= File.basename(filename, File.extname(filename))
78
+
79
+ # When support of onefile collections is introduced - then here should be check if name exists.
80
+ # Otherwise it should skip motif and tell you about this
81
+ # Also two command line options to fail on skipping or to skip silently should be included
82
+
77
83
  info = {rough: {}, precise: {}}
78
- output = `ruby "#{File.join current_dir,'find_threshold.rb'}" #{filename} -p #{pvalues.join(' ')} -b #{background.join(' ')} -d #{rough_discretization}`.split("\n")
79
- output.each do |line|
80
- pvalue, threshold, real_pvalue = line.split.map(&:to_f)
81
- info[:rough][pvalue] = threshold
82
- end
84
+ pwm.background(background)
83
85
 
84
- output = `ruby "#{File.join current_dir,'find_threshold.rb'}" #{filename} -p #{pvalues.join(' ')} -b #{background.join(' ')} -d #{precise_discretization}`.split("\n")
85
- output.each do |line|
86
- pvalue, threshold, real_pvalue = line.split.map(&:to_f)
87
- info[:precise][pvalue] = threshold
86
+ pwm.discrete(rough_discretization).thresholds(*pvalues) do |pvalue, threshold, real_pvalue|
87
+ info[:rough][pvalue] = threshold / rough_discretization
88
88
  end
89
+
90
+ pwm.discrete(precise_discretization).thresholds(*pvalues) do |pvalue, threshold, real_pvalue|
91
+ info[:precise][pvalue] = threshold / precise_discretization
92
+ end
93
+
89
94
  collection.add_pwm(pwm, info)
90
95
  end
91
96
  File.open(output_file,'w') do |f|
@@ -94,7 +94,6 @@ begin
94
94
 
95
95
  similarities = {}
96
96
  precision_file_mode = {}
97
- unnamed_index = 0
98
97
 
99
98
  collection.pwms.each_key do |name|
100
99
  pwm = collection.pwms[name]
@@ -102,7 +101,6 @@ begin
102
101
  STDERR.puts pwm.name unless silent
103
102
  cmp = Macroape::PWMCompare.new(query_pwm_rough, pwm.background(collection.background).discrete(collection.rough_discretization))
104
103
  info = cmp.jaccard(threshold, pwm_info[:rough][pvalue] * collection.rough_discretization)
105
- name = pwm.name || "Unnamed #{unnamed_index += 1}"
106
104
  precision_file_mode[name] = :rough
107
105
 
108
106
  if precision_mode == :precise and info[:similarity] >= minimal_similarity
@@ -7,35 +7,20 @@ module Macroape
7
7
  end
8
8
 
9
9
  def jaccard(threshold_first, threshold_second)
10
- self.map_each_align do |align, alignment_info|
11
- align.jaccard(threshold_first, threshold_second).merge(alignment_info)
12
- end.max_by {|alignment_info| alignment_info[:similarity]}
10
+ self.map_each_alignment do |alignment|
11
+ alignment.alignment_infos.merge( alignment.jaccard(threshold_first, threshold_second) )
12
+ end.max_by {|alignment_infos| alignment_infos[:similarity] }
13
13
  end
14
-
15
-
16
- def each
14
+
15
+ def each_alignment
17
16
  second_rc = second.reverse_complement
18
17
  (-second.length..first.length).to_a.product([:direct,:revcomp]) do |shift, orientation|
19
- first_pwm_alignment = '.' * [-shift, 0].max + '>' * first.length
20
- second_pwm_alignment = '.' * [shift, 0].max + (orientation == :direct ? '>' : '<') * second.length
21
- overlap = [first.length + [-shift,0].max, second.length + [shift,0].max].min - shift.abs
22
- alignment_length = [first_pwm_alignment.length, second_pwm_alignment.length].max
23
- (first_pwm_alignment.length...alignment_length).each{|i| first_pwm_alignment[i] = '.'}
24
- (second_pwm_alignment.length...alignment_length).each{|i| second_pwm_alignment[i] = '.'}
25
-
26
- yield(PWMCompareAligned.new(first.left_augment([-shift,0].max),
27
- (orientation == :direct ? second : second_rc).left_augment([shift,0].max)),
28
- text: "#{first_pwm_alignment}\n#{second_pwm_alignment}",
29
- shift: shift,
30
- orientation: orientation,
31
- overlap: overlap,
32
- alignment_length: alignment_length
33
- )
18
+ yield PWMCompareAligned.new(first, (orientation == :direct ? second : second_rc), shift, orientation)
34
19
  end
35
20
  end
36
- include Enumerable
37
- alias :each_align :each
38
- alias :map_each_align :map
39
21
 
22
+ include Enumerable
23
+ alias_method :each, :each_alignment
24
+ alias_method :map_each_alignment, :map
40
25
  end
41
26
  end
@@ -2,16 +2,89 @@ require 'macroape/aligned_pair_intersection'
2
2
 
3
3
  module Macroape
4
4
  class PWMCompareAligned
5
- attr_reader :first, :second, :length
6
- def initialize(first, second)
5
+ attr_reader :first, :second, :length, :shift, :orientation, :unaligned_first, :unaligned_second
6
+ def initialize(first, second, shift, orientation)
7
+ @unaligned_first, @unaligned_second = first, second
8
+ @shift, @orientation = shift, orientation
9
+ if shift > 0
10
+ first, second = first, second.left_augment(shift)
11
+ else
12
+ first, second = first.left_augment(-shift), second
13
+ end
7
14
  @length = [first.length, second.length].max
8
15
  @first = first.right_augment(@length - first.length)
9
16
  @second = second.right_augment(@length - second.length)
10
17
  end
11
18
 
12
- #def discrete(rate)
13
- # PWMCompareAligned.new(first.discrete(rate), second.discrete(rate))
14
- #end
19
+ def direct?
20
+ orientation == :direct
21
+ end
22
+ def revcomp?
23
+ orientation == :revcomp
24
+ end
25
+
26
+ def overlap
27
+ length.times.count{|pos| first_overlaps?(pos) && second_overlaps?(pos) }
28
+ end
29
+
30
+ def first_pwm_alignment
31
+ length.times.map do |pos|
32
+ if first_overlaps?(pos)
33
+ '>'
34
+ else
35
+ '.'
36
+ end
37
+ end.join
38
+ end
39
+
40
+ def second_pwm_alignment
41
+ length.times.map do |pos|
42
+ if second_overlaps?(pos)
43
+ direct? ? '>' : '<'
44
+ else
45
+ '.'
46
+ end
47
+ end.join
48
+ end
49
+
50
+ def alignment_infos
51
+ {shift: shift,
52
+ orientation: orientation,
53
+ text: "#{first_pwm_alignment}\n#{second_pwm_alignment}",
54
+ overlap: overlap,
55
+ alignment_length: length}
56
+ end
57
+
58
+ def first_length
59
+ unaligned_first.length
60
+ end
61
+ def second_length
62
+ unaligned_second.length
63
+ end
64
+
65
+ # whether first matrix overlap specified position
66
+ def first_overlaps?(pos)
67
+ return false unless pos >= 0 && pos < length
68
+ if shift > 0
69
+ pos < first_length
70
+ else
71
+ pos >= -shift && pos < -shift + first_length
72
+ end
73
+ end
74
+
75
+ def second_overlaps?(pos)
76
+ return false unless pos >= 0 && pos < length
77
+ if shift > 0
78
+ pos >= shift && pos < shift + second_length
79
+ else
80
+ pos < second_length
81
+ end
82
+ end
83
+
84
+ =begin
85
+ def discrete(rate)
86
+ PWMCompareAligned.new(first.discrete(rate), second.discrete(rate))
87
+ end
15
88
 
16
89
  def sort_pair_of_matrices_by(&block)
17
90
  mat = first.pwm.zip(second.pwm).sort_by(&block).transpose
@@ -26,6 +99,7 @@ module Macroape
26
99
  def permute_columns(permutation_index)
27
100
  PWMCompareAligned.new(first.permute(permutation_index), second.permute(permutation_index))
28
101
  end
102
+ =end
29
103
 
30
104
  def jaccard(first_threshold, second_threshold)
31
105
  f = first.counts_by_thresholds(first_threshold).first
@@ -56,6 +56,10 @@ module Bioinform
56
56
  scores
57
57
  end
58
58
 
59
+ def count_distribution
60
+ count_distribution_after_threshold(worst_score)
61
+ end
62
+
59
63
  def recalc_score_hash(scores, column, least_sufficient)
60
64
  new_scores = Hash.new(0)
61
65
  scores.each do |score, count|
@@ -1,3 +1,3 @@
1
1
  module Macroape
2
- VERSION = "3.3.1"
2
+ VERSION = "3.3.2"
3
3
  end
data/macroape.gemspec CHANGED
@@ -15,5 +15,5 @@ Gem::Specification.new do |gem|
15
15
  gem.require_paths = ["lib"]
16
16
  gem.version = Macroape::VERSION
17
17
 
18
- gem.add_dependency('bioinform', '>= 0.1.1')
18
+ gem.add_dependency('bioinform', '>= 0.1.2')
19
19
  end
@@ -1,4 +1,3 @@
1
- KLF4_f2
2
1
  0.30861857265872605 -2.254321000121579 0.13505703522674192 0.3285194224375633
3
2
  -1.227018967707036 -4.814127713368663 1.3059890687390967 -4.908681463544344
4
3
  -2.443469374521196 -4.648238485031404 1.3588686548279805 -4.441801801188402
@@ -5,7 +5,9 @@ require 'macroape'
5
5
  class TestPreprocessCollection < Test::Unit::TestCase
6
6
  def test_multipvalue_preproceessing
7
7
  system(Helpers.exec_cmd('preprocess_collection','test/data/test_collection -o test/data/test_collection.yaml.tmp -p 0.0005 0.0001 0.00005 --silent'))
8
- assert_equal YAML.load_file('test/data/test_collection.yaml'), YAML.load_file('test/data/test_collection.yaml.tmp')
8
+ # Don't use YAML.load_file() instead of YAML.load(File.read()) because in ruby before v1.93 p194
9
+ # it doesn't immediately release file descriptor (if I understood error right way) so File.delete fails
10
+ assert_equal YAML.load(File.read('test/data/test_collection.yaml')), YAML.load(File.read('test/data/test_collection.yaml.tmp'))
9
11
  File.delete 'test/data/test_collection.yaml.tmp'
10
12
  end
11
13
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: macroape
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.3.1
4
+ version: 3.3.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-06-27 00:00:00.000000000 Z
12
+ date: 2012-07-07 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bioinform
@@ -18,7 +18,7 @@ dependencies:
18
18
  requirements:
19
19
  - - ! '>='
20
20
  - !ruby/object:Gem::Version
21
- version: 0.1.1
21
+ version: 0.1.2
22
22
  type: :runtime
23
23
  prerelease: false
24
24
  version_requirements: !ruby/object:Gem::Requirement
@@ -26,7 +26,7 @@ dependencies:
26
26
  requirements:
27
27
  - - ! '>='
28
28
  - !ruby/object:Gem::Version
29
- version: 0.1.1
29
+ version: 0.1.2
30
30
  description: Macroape is an abbreviation for MAtrix CompaRisOn by Approximate P-value
31
31
  Estimation. It's a bioinformatic tool for evaluating similarity measure and best
32
32
  alignment between a pair of Position Weight Matrices(PWM), finding thresholds by