macroape 3.3.1 → 3.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -114,25 +114,12 @@ begin
114
114
 
115
115
  pwm_second.reverse_complement! if reverse
116
116
 
117
- first_pwm_alignment = '.' * [-shift, 0].max + '>' * pwm_first.length
118
- second_pwm_alignment = '.' * [shift, 0].max + (orientation == :direct ? '>' : '<') * pwm_second.length
119
- overlap = [pwm_first.length + [-shift,0].max, pwm_second.length + [shift,0].max].min - shift.abs
120
- alignment_length = [first_pwm_alignment.length, second_pwm_alignment.length].max
121
- (first_pwm_alignment.length...alignment_length).each{|i| first_pwm_alignment[i] = '.'}
122
- (second_pwm_alignment.length...alignment_length).each{|i| second_pwm_alignment[i] = '.'}
123
-
124
- cmp = Macroape::PWMCompareAligned.new(pwm_first.left_augment([-shift,0].max),
125
- pwm_second.left_augment([shift,0].max))
126
-
117
+ cmp = Macroape::PWMCompareAligned.new(pwm_first, pwm_second, shift, orientation)
118
+
127
119
  first_threshold = pwm_first.threshold(pvalue)
128
120
  second_threshold = pwm_second.threshold(pvalue)
129
121
 
130
- info = cmp.jaccard(first_threshold, second_threshold).merge(
131
- text: "#{first_pwm_alignment}\n#{second_pwm_alignment}",
132
- shift: shift,
133
- orientation: orientation,
134
- overlap: overlap,
135
- alignment_length: alignment_length)
122
+ info = cmp.alignment_infos.merge( cmp.jaccard(first_threshold, second_threshold) )
136
123
 
137
124
  puts "#{info[:similarity]}\n#{info[:recognized_by_both]}\t#{info[:alignment_length]}\n#{info[:text]}\n#{info[:shift]}\t#{info[:orientation]}"
138
125
 
@@ -74,18 +74,23 @@ begin
74
74
  Dir.glob(File.join(folder,'*')) do |filename|
75
75
  STDERR.puts filename unless silent
76
76
  pwm = Bioinform::PWM.new(File.read(filename))
77
+ pwm.name ||= File.basename(filename, File.extname(filename))
78
+
79
+ # When support of onefile collections is introduced - then here should be check if name exists.
80
+ # Otherwise it should skip motif and tell you about this
81
+ # Also two command line options to fail on skipping or to skip silently should be included
82
+
77
83
  info = {rough: {}, precise: {}}
78
- output = `ruby "#{File.join current_dir,'find_threshold.rb'}" #{filename} -p #{pvalues.join(' ')} -b #{background.join(' ')} -d #{rough_discretization}`.split("\n")
79
- output.each do |line|
80
- pvalue, threshold, real_pvalue = line.split.map(&:to_f)
81
- info[:rough][pvalue] = threshold
82
- end
84
+ pwm.background(background)
83
85
 
84
- output = `ruby "#{File.join current_dir,'find_threshold.rb'}" #{filename} -p #{pvalues.join(' ')} -b #{background.join(' ')} -d #{precise_discretization}`.split("\n")
85
- output.each do |line|
86
- pvalue, threshold, real_pvalue = line.split.map(&:to_f)
87
- info[:precise][pvalue] = threshold
86
+ pwm.discrete(rough_discretization).thresholds(*pvalues) do |pvalue, threshold, real_pvalue|
87
+ info[:rough][pvalue] = threshold / rough_discretization
88
88
  end
89
+
90
+ pwm.discrete(precise_discretization).thresholds(*pvalues) do |pvalue, threshold, real_pvalue|
91
+ info[:precise][pvalue] = threshold / precise_discretization
92
+ end
93
+
89
94
  collection.add_pwm(pwm, info)
90
95
  end
91
96
  File.open(output_file,'w') do |f|
@@ -94,7 +94,6 @@ begin
94
94
 
95
95
  similarities = {}
96
96
  precision_file_mode = {}
97
- unnamed_index = 0
98
97
 
99
98
  collection.pwms.each_key do |name|
100
99
  pwm = collection.pwms[name]
@@ -102,7 +101,6 @@ begin
102
101
  STDERR.puts pwm.name unless silent
103
102
  cmp = Macroape::PWMCompare.new(query_pwm_rough, pwm.background(collection.background).discrete(collection.rough_discretization))
104
103
  info = cmp.jaccard(threshold, pwm_info[:rough][pvalue] * collection.rough_discretization)
105
- name = pwm.name || "Unnamed #{unnamed_index += 1}"
106
104
  precision_file_mode[name] = :rough
107
105
 
108
106
  if precision_mode == :precise and info[:similarity] >= minimal_similarity
@@ -7,35 +7,20 @@ module Macroape
7
7
  end
8
8
 
9
9
  def jaccard(threshold_first, threshold_second)
10
- self.map_each_align do |align, alignment_info|
11
- align.jaccard(threshold_first, threshold_second).merge(alignment_info)
12
- end.max_by {|alignment_info| alignment_info[:similarity]}
10
+ self.map_each_alignment do |alignment|
11
+ alignment.alignment_infos.merge( alignment.jaccard(threshold_first, threshold_second) )
12
+ end.max_by {|alignment_infos| alignment_infos[:similarity] }
13
13
  end
14
-
15
-
16
- def each
14
+
15
+ def each_alignment
17
16
  second_rc = second.reverse_complement
18
17
  (-second.length..first.length).to_a.product([:direct,:revcomp]) do |shift, orientation|
19
- first_pwm_alignment = '.' * [-shift, 0].max + '>' * first.length
20
- second_pwm_alignment = '.' * [shift, 0].max + (orientation == :direct ? '>' : '<') * second.length
21
- overlap = [first.length + [-shift,0].max, second.length + [shift,0].max].min - shift.abs
22
- alignment_length = [first_pwm_alignment.length, second_pwm_alignment.length].max
23
- (first_pwm_alignment.length...alignment_length).each{|i| first_pwm_alignment[i] = '.'}
24
- (second_pwm_alignment.length...alignment_length).each{|i| second_pwm_alignment[i] = '.'}
25
-
26
- yield(PWMCompareAligned.new(first.left_augment([-shift,0].max),
27
- (orientation == :direct ? second : second_rc).left_augment([shift,0].max)),
28
- text: "#{first_pwm_alignment}\n#{second_pwm_alignment}",
29
- shift: shift,
30
- orientation: orientation,
31
- overlap: overlap,
32
- alignment_length: alignment_length
33
- )
18
+ yield PWMCompareAligned.new(first, (orientation == :direct ? second : second_rc), shift, orientation)
34
19
  end
35
20
  end
36
- include Enumerable
37
- alias :each_align :each
38
- alias :map_each_align :map
39
21
 
22
+ include Enumerable
23
+ alias_method :each, :each_alignment
24
+ alias_method :map_each_alignment, :map
40
25
  end
41
26
  end
@@ -2,16 +2,89 @@ require 'macroape/aligned_pair_intersection'
2
2
 
3
3
  module Macroape
4
4
  class PWMCompareAligned
5
- attr_reader :first, :second, :length
6
- def initialize(first, second)
5
+ attr_reader :first, :second, :length, :shift, :orientation, :unaligned_first, :unaligned_second
6
+ def initialize(first, second, shift, orientation)
7
+ @unaligned_first, @unaligned_second = first, second
8
+ @shift, @orientation = shift, orientation
9
+ if shift > 0
10
+ first, second = first, second.left_augment(shift)
11
+ else
12
+ first, second = first.left_augment(-shift), second
13
+ end
7
14
  @length = [first.length, second.length].max
8
15
  @first = first.right_augment(@length - first.length)
9
16
  @second = second.right_augment(@length - second.length)
10
17
  end
11
18
 
12
- #def discrete(rate)
13
- # PWMCompareAligned.new(first.discrete(rate), second.discrete(rate))
14
- #end
19
+ def direct?
20
+ orientation == :direct
21
+ end
22
+ def revcomp?
23
+ orientation == :revcomp
24
+ end
25
+
26
+ def overlap
27
+ length.times.count{|pos| first_overlaps?(pos) && second_overlaps?(pos) }
28
+ end
29
+
30
+ def first_pwm_alignment
31
+ length.times.map do |pos|
32
+ if first_overlaps?(pos)
33
+ '>'
34
+ else
35
+ '.'
36
+ end
37
+ end.join
38
+ end
39
+
40
+ def second_pwm_alignment
41
+ length.times.map do |pos|
42
+ if second_overlaps?(pos)
43
+ direct? ? '>' : '<'
44
+ else
45
+ '.'
46
+ end
47
+ end.join
48
+ end
49
+
50
+ def alignment_infos
51
+ {shift: shift,
52
+ orientation: orientation,
53
+ text: "#{first_pwm_alignment}\n#{second_pwm_alignment}",
54
+ overlap: overlap,
55
+ alignment_length: length}
56
+ end
57
+
58
+ def first_length
59
+ unaligned_first.length
60
+ end
61
+ def second_length
62
+ unaligned_second.length
63
+ end
64
+
65
+ # whether first matrix overlap specified position
66
+ def first_overlaps?(pos)
67
+ return false unless pos >= 0 && pos < length
68
+ if shift > 0
69
+ pos < first_length
70
+ else
71
+ pos >= -shift && pos < -shift + first_length
72
+ end
73
+ end
74
+
75
+ def second_overlaps?(pos)
76
+ return false unless pos >= 0 && pos < length
77
+ if shift > 0
78
+ pos >= shift && pos < shift + second_length
79
+ else
80
+ pos < second_length
81
+ end
82
+ end
83
+
84
+ =begin
85
+ def discrete(rate)
86
+ PWMCompareAligned.new(first.discrete(rate), second.discrete(rate))
87
+ end
15
88
 
16
89
  def sort_pair_of_matrices_by(&block)
17
90
  mat = first.pwm.zip(second.pwm).sort_by(&block).transpose
@@ -26,6 +99,7 @@ module Macroape
26
99
  def permute_columns(permutation_index)
27
100
  PWMCompareAligned.new(first.permute(permutation_index), second.permute(permutation_index))
28
101
  end
102
+ =end
29
103
 
30
104
  def jaccard(first_threshold, second_threshold)
31
105
  f = first.counts_by_thresholds(first_threshold).first
@@ -56,6 +56,10 @@ module Bioinform
56
56
  scores
57
57
  end
58
58
 
59
+ def count_distribution
60
+ count_distribution_after_threshold(worst_score)
61
+ end
62
+
59
63
  def recalc_score_hash(scores, column, least_sufficient)
60
64
  new_scores = Hash.new(0)
61
65
  scores.each do |score, count|
@@ -1,3 +1,3 @@
1
1
  module Macroape
2
- VERSION = "3.3.1"
2
+ VERSION = "3.3.2"
3
3
  end
data/macroape.gemspec CHANGED
@@ -15,5 +15,5 @@ Gem::Specification.new do |gem|
15
15
  gem.require_paths = ["lib"]
16
16
  gem.version = Macroape::VERSION
17
17
 
18
- gem.add_dependency('bioinform', '>= 0.1.1')
18
+ gem.add_dependency('bioinform', '>= 0.1.2')
19
19
  end
@@ -1,4 +1,3 @@
1
- KLF4_f2
2
1
  0.30861857265872605 -2.254321000121579 0.13505703522674192 0.3285194224375633
3
2
  -1.227018967707036 -4.814127713368663 1.3059890687390967 -4.908681463544344
4
3
  -2.443469374521196 -4.648238485031404 1.3588686548279805 -4.441801801188402
@@ -5,7 +5,9 @@ require 'macroape'
5
5
  class TestPreprocessCollection < Test::Unit::TestCase
6
6
  def test_multipvalue_preproceessing
7
7
  system(Helpers.exec_cmd('preprocess_collection','test/data/test_collection -o test/data/test_collection.yaml.tmp -p 0.0005 0.0001 0.00005 --silent'))
8
- assert_equal YAML.load_file('test/data/test_collection.yaml'), YAML.load_file('test/data/test_collection.yaml.tmp')
8
+ # Don't use YAML.load_file() instead of YAML.load(File.read()) because in ruby before v1.93 p194
9
+ # it doesn't immediately release file descriptor (if I understood error right way) so File.delete fails
10
+ assert_equal YAML.load(File.read('test/data/test_collection.yaml')), YAML.load(File.read('test/data/test_collection.yaml.tmp'))
9
11
  File.delete 'test/data/test_collection.yaml.tmp'
10
12
  end
11
13
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: macroape
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.3.1
4
+ version: 3.3.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-06-27 00:00:00.000000000 Z
12
+ date: 2012-07-07 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bioinform
@@ -18,7 +18,7 @@ dependencies:
18
18
  requirements:
19
19
  - - ! '>='
20
20
  - !ruby/object:Gem::Version
21
- version: 0.1.1
21
+ version: 0.1.2
22
22
  type: :runtime
23
23
  prerelease: false
24
24
  version_requirements: !ruby/object:Gem::Requirement
@@ -26,7 +26,7 @@ dependencies:
26
26
  requirements:
27
27
  - - ! '>='
28
28
  - !ruby/object:Gem::Version
29
- version: 0.1.1
29
+ version: 0.1.2
30
30
  description: Macroape is an abbreviation for MAtrix CompaRisOn by Approximate P-value
31
31
  Estimation. It's a bioinformatic tool for evaluating similarity measure and best
32
32
  alignment between a pair of Position Weight Matrices(PWM), finding thresholds by