macroape 3.3.2 → 3.3.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -16,10 +16,10 @@ Output format:
16
16
  <aligned 1st matrix>
17
17
  <aligned 2nd matrix>
18
18
  <shift> <orientation>
19
-
20
- Examples:
19
+
20
+ Examples:
21
21
  ruby eval_alignment.rb motifs/KLF4_f2.pat motifs/SP1_f1.pat -1 direct -p 0.0005 -d 100 -b 0.4 0.3 0.2 0.1
22
- or on windows
22
+ or on windows
23
23
  type motifs/SP1.pat | ruby eval_alignment.rb motifs/KLF4.pat .stdin 0 revcomp -p 0.0005 -d 100 -b 0.4 0.3 0.2 0.1
24
24
  or in linux
25
25
  cat motifs/KLF4.pat motifs/SP1.pat | ruby eval_alignment.rb .stdin .stdin 3 direct -p 0.0005 -d 100 -b 0.4 0.3 0.2 0.1
@@ -38,20 +38,20 @@ discretization = 10
38
38
  first_background = [1,1,1,1]
39
39
  second_background = [1,1,1,1]
40
40
 
41
- begin
41
+ begin
42
42
  first_file = ARGV.shift
43
43
  second_file = ARGV.shift
44
-
44
+
45
45
  shift = ARGV.shift
46
46
  orientation = ARGV.shift
47
-
47
+
48
48
  raise "You'd specify two input sources (each is filename or .stdin)" unless first_file and second_file
49
49
  raise 'You\'d specify shift' unless shift
50
50
  raise 'You\'d specify orientation' unless orientation
51
-
51
+
52
52
  shift = shift.to_i
53
53
  orientation = orientation.to_sym
54
-
54
+
55
55
  case orientation
56
56
  when :direct
57
57
  reverse = false
@@ -60,7 +60,7 @@ begin
60
60
  else
61
61
  raise 'Unknown orientation(direct/revcomp)'
62
62
  end
63
-
63
+
64
64
 
65
65
  until ARGV.empty?
66
66
  case ARGV.shift
@@ -80,19 +80,19 @@ begin
80
80
  second_background = ARGV.shift(4).map(&:to_f)
81
81
  end
82
82
  end
83
- raise 'background should be symmetric' unless first_background == first_background.reverse
83
+ raise 'background should be symmetric: p(A)=p(T) and p(G) = p(C)' unless first_background == first_background.reverse
84
84
  raise 'background should be symmetric: p(A)=p(T) and p(G) = p(C)' unless second_background == second_background.reverse
85
85
 
86
-
86
+
87
87
  Macroape::MaxHashSizeSingle = 1000000 unless defined? Macroape::MaxHashSizeSingle
88
88
  Macroape::MaxHashSizeDouble = 1000 unless defined? Macroape::MaxHashSizeDouble
89
-
89
+
90
90
  # if first_file == '.stdin' || second_file == '.stdin'
91
91
  # r_stream, w_stream = IO.pipe
92
92
  # STDIN.readlines.each{|line| w_stream.write(line)}
93
93
  # w_stream.close
94
94
  # end
95
-
95
+
96
96
  if first_file == '.stdin'
97
97
  # r_stream, w_stream, extracted_pwm = extract_pwm(r_stream, w_stream)
98
98
  # pwm_first = Macroape::SingleMatrix.load_from_line_array(extracted_pwm).with_background(first_background).discrete(discretization)
@@ -100,7 +100,7 @@ begin
100
100
  raise "Error! File #{first_file} don't exist" unless File.exist?(first_file)
101
101
  pwm_first = Bioinform::PWM.new(File.read(first_file)).background(first_background).discrete(discretization)
102
102
  end
103
-
103
+
104
104
  if second_file == '.stdin'
105
105
  # r_stream, w_stream, extracted_pwm = extract_pwm(r_stream, w_stream)
106
106
  # pwm_second = Macroape::SingleMatrix.load_from_line_array(extracted_pwm).with_background(second_background).discrete(discretization)
@@ -108,21 +108,18 @@ begin
108
108
  raise "Error! File #{second_file} don't exist" unless File.exist?(second_file)
109
109
  pwm_second = Bioinform::PWM.new(File.read(second_file)).background(second_background).discrete(discretization)
110
110
  end
111
-
111
+
112
112
  # r_stream.close if first_file == '.stdin' || second_file == '.stdin'
113
-
114
-
115
- pwm_second.reverse_complement! if reverse
116
-
113
+
117
114
  cmp = Macroape::PWMCompareAligned.new(pwm_first, pwm_second, shift, orientation)
118
-
115
+
119
116
  first_threshold = pwm_first.threshold(pvalue)
120
117
  second_threshold = pwm_second.threshold(pvalue)
121
118
 
122
119
  info = cmp.alignment_infos.merge( cmp.jaccard(first_threshold, second_threshold) )
123
-
120
+
124
121
  puts "#{info[:similarity]}\n#{info[:recognized_by_both]}\t#{info[:alignment_length]}\n#{info[:text]}\n#{info[:shift]}\t#{info[:orientation]}"
125
-
122
+
126
123
  rescue => err
127
124
  STDERR.puts "\n#{err}\n#{err.backtrace.first(5).join("\n")}\n\nUse -help option for help\n"
128
125
  end
@@ -17,10 +17,10 @@ Output has format:
17
17
  <optimal alignment, the 1st matrix>
18
18
  <optimal alignment, the 2nd matrix>
19
19
  <shift> <orientation>
20
-
21
- Examples:
20
+
21
+ Examples:
22
22
  ruby eval_similarity.rb motifs/KLF4.pat motifs/SP1.pat -p 0.0005 -d 100 -b 0.4 0.3 0.2 0.1
23
- or on windows
23
+ or on windows
24
24
  type motifs/SP1.pat | ruby eval_similarity.rb motifs/KLF4.pat .stdin -p 0.0005 -d 100 -b 0.4 0.3 0.2 0.1
25
25
  or in linux
26
26
  cat motifs/KLF4.pat motifs/SP1.pat | ruby eval_similarity.rb .stdin .stdin -p 0.0005 -d 100 -b 0.4 0.3 0.2 0.1
@@ -40,7 +40,7 @@ discretization = 10
40
40
  first_background = [1,1,1,1]
41
41
  second_background = [1,1,1,1]
42
42
 
43
- begin
43
+ begin
44
44
  first_file = ARGV.shift
45
45
  second_file = ARGV.shift
46
46
  raise "You'd specify two input sources (each is filename or .stdin)" unless first_file and second_file
@@ -60,22 +60,22 @@ begin
60
60
  when '-b1'
61
61
  first_background = ARGV.shift(4).map(&:to_f)
62
62
  when '-b2'
63
- second_background = ARGV.shift(4).map(&:to_f)
63
+ second_background = ARGV.shift(4).map(&:to_f)
64
64
  end
65
65
  end
66
66
  raise 'background should be symmetric: p(A)=p(T) and p(G) = p(C)' unless first_background == first_background.reverse
67
67
  raise 'background should be symmetric: p(A)=p(T) and p(G) = p(C)' unless second_background == second_background.reverse
68
-
68
+
69
69
  Macroape::MaxHashSizeSingle = 1000000 unless defined? Macroape::MaxHashSizeSingle
70
70
  Macroape::MaxHashSizeDouble = 1000 unless defined? Macroape::MaxHashSizeDouble
71
-
72
-
71
+
72
+
73
73
  # if first_file == '.stdin' || second_file == '.stdin'
74
74
  # r_stream, w_stream = IO.pipe
75
75
  # STDIN.readlines.each{|line| w_stream.write(line)}
76
76
  # w_stream.close
77
77
  # end
78
-
78
+
79
79
  if first_file == '.stdin'
80
80
  # r_stream, w_stream, extracted_pwm = extract_pwm(r_stream, w_stream)
81
81
  # pwm_first = Macroape::SingleMatrix.load_from_line_array(extracted_pwm).with_background(first_background).discrete(discretization)
@@ -83,7 +83,7 @@ begin
83
83
  raise "Error! File #{first_file} don't exist" unless File.exist?(first_file)
84
84
  pwm_first = Bioinform::PWM.new(File.read(first_file)).background(first_background).discrete(discretization)
85
85
  end
86
-
86
+
87
87
  if second_file == '.stdin'
88
88
  # r_stream, w_stream, extracted_pwm = extract_pwm(r_stream, w_stream)
89
89
  # pwm_second = Macroape::SingleMatrix.load_from_line_array(extracted_pwm).with_background(second_background).discrete(discretization)
@@ -91,7 +91,7 @@ begin
91
91
  raise "Error! File #{second_file} don't exist" unless File.exist?(second_file)
92
92
  pwm_second = Bioinform::PWM.new(File.read(second_file)).background(second_background).discrete(discretization)
93
93
  end
94
-
94
+
95
95
  r_stream.close if first_file == '.stdin' || second_file == '.stdin'
96
96
 
97
97
  cmp = Macroape::PWMCompare.new(pwm_first, pwm_second)
@@ -100,9 +100,9 @@ begin
100
100
  second_threshold = pwm_second.threshold(pvalue)
101
101
 
102
102
  info = cmp.jaccard(first_threshold, second_threshold)
103
-
103
+
104
104
  puts "#{info[:similarity]}\n#{info[:recognized_by_both]}\t#{info[:alignment_length]}\n#{info[:text]}\n#{info[:shift]}\t#{info[:orientation]}"
105
-
105
+
106
106
  rescue => err
107
107
  STDERR.puts "\n#{err}\n#{err.backtrace.first(5).join("\n")}\n\nUse -help option for help\n"
108
108
  end
@@ -1,7 +1,7 @@
1
1
  help_string = %q{
2
2
  Command-line format:
3
3
  ruby find_pvalue.rb <pat-file> <threshold list> [options]
4
- or in linux
4
+ or in linux
5
5
  cat <pat-file> | ruby find_pvalue.rb .stdin <threshold> [options]
6
6
  or on windows
7
7
  type <pat-file> | ruby find_pvalue.rb .stdin <threshold> [options]
@@ -16,7 +16,7 @@ Output format:
16
16
  threshold_3 count_3 pvalue_3
17
17
  The results are printed out in the same order as in the given threshold list.
18
18
 
19
- Examples:
19
+ Examples:
20
20
  ruby find_pvalue.rb motifs/KLF4.pat 7.32 -d 1000 -b 0.2 0.3 0.2 0.3
21
21
  or on windows
22
22
  type motifs/KLF4.pat | ruby find_pvalue.rb .stdin 7.32 4.31 5.42
@@ -35,7 +35,7 @@ end
35
35
  discretization = 10000
36
36
  background = [1,1,1,1]
37
37
  thresholds = []
38
- begin
38
+ begin
39
39
  filename = ARGV.shift
40
40
 
41
41
  loop do
@@ -46,10 +46,10 @@ begin
46
46
  raise StopIteration
47
47
  end
48
48
  end
49
-
49
+
50
50
  raise "No input. You'd specify input source: filename or .stdin" unless filename
51
51
  raise 'You should specify at least one threshold' if thresholds.empty?
52
-
52
+
53
53
  until ARGV.empty?
54
54
  case ARGV.shift
55
55
  when '-b'
@@ -61,8 +61,8 @@ begin
61
61
  end
62
62
  end
63
63
  Macroape::MaxHashSizeSingle = 1000000 unless defined? Macroape::MaxHashSizeSingle
64
-
65
-
64
+
65
+
66
66
  if filename == '.stdin'
67
67
  # TODO
68
68
  else
@@ -14,9 +14,9 @@ Options:
14
14
  Output format:
15
15
  requested_pvalue_1 threshold_1 achieved_pvalue_1
16
16
  requested_pvalue_2 threshold_2 achieved_pvalue_2
17
-
18
-
19
- Example:
17
+
18
+
19
+ Example:
20
20
  ruby find_threshold.rb motifs/KLF4.pat -p 0.001 0.0001 0.0005 -d 1000 -b 0.4 0.3 0.2 0.1
21
21
  }
22
22
 
@@ -35,7 +35,7 @@ discretization = 10000
35
35
  begin
36
36
  filename = ARGV.shift
37
37
  raise "No input. You'd specify input source: filename or .stdin" unless filename
38
-
38
+
39
39
  pvalues = []
40
40
  until ARGV.empty?
41
41
  case ARGV.shift
@@ -57,18 +57,18 @@ begin
57
57
  end
58
58
  end
59
59
  pvalues = default_pvalues if pvalues.empty?
60
-
60
+
61
61
  Macroape::MaxHashSizeSingle = 1000000 unless defined? Macroape::MaxHashSizeSingle
62
-
62
+
63
63
  if filename == '.stdin'
64
64
  ## TODO
65
65
  else
66
66
  raise "Error! File #{filename} doesn't exist" unless File.exist?(filename)
67
67
  pwm = Bioinform::PWM.new( File.read(filename) )
68
68
  end
69
-
69
+
70
70
  pwm.background(background)
71
-
71
+
72
72
  pwm.discrete(discretization).thresholds(*pvalues) do |pvalue, threshold, real_pvalue|
73
73
  puts "#{pvalue}\t#{threshold / discretization}\t#{real_pvalue}"
74
74
  end
@@ -10,7 +10,7 @@ Options:
10
10
  [--silent] - don't show current progress information during scan (by default this information's written into stderr)
11
11
 
12
12
  The tool stores preprocessed Macroape collection to the specified YAML-file.
13
-
13
+
14
14
  Example:
15
15
  ruby preprocess_collection.rb ./motifs -p 0.001 0.0005 0.0001 -d 1 10 -b 0.2 0.3 0.2 0.3 -o collection.yaml
16
16
  }
@@ -34,7 +34,7 @@ begin
34
34
  folder = ARGV.shift
35
35
  raise "No input. You'd specify folder with pat-files" unless folder
36
36
  raise "Error! Folder #{folder} doesn't exist" unless Dir.exist?(folder)
37
-
37
+
38
38
  pvalues = []
39
39
  silent = false
40
40
  until ARGV.empty?
@@ -64,7 +64,7 @@ begin
64
64
  end
65
65
  end
66
66
  pvalues = default_pvalues if pvalues.empty?
67
-
67
+
68
68
  Macroape::MaxHashSizeSingle = 1000000 unless defined? Macroape::MaxHashSizeSingle
69
69
  Macroape::MaxHashSizeDouble = 1000 unless defined? Macroape::MaxHashSizeDouble
70
70
 
@@ -75,14 +75,14 @@ begin
75
75
  STDERR.puts filename unless silent
76
76
  pwm = Bioinform::PWM.new(File.read(filename))
77
77
  pwm.name ||= File.basename(filename, File.extname(filename))
78
-
79
- # When support of onefile collections is introduced - then here should be check if name exists.
78
+
79
+ # When support of onefile collections is introduced - then here should be check if name exists.
80
80
  # Otherwise it should skip motif and tell you about this
81
81
  # Also two command line options to fail on skipping or to skip silently should be included
82
-
82
+
83
83
  info = {rough: {}, precise: {}}
84
84
  pwm.background(background)
85
-
85
+
86
86
  pwm.discrete(rough_discretization).thresholds(*pvalues) do |pvalue, threshold, real_pvalue|
87
87
  info[:rough][pvalue] = threshold / rough_discretization
88
88
  end
@@ -90,7 +90,7 @@ begin
90
90
  pwm.discrete(precise_discretization).thresholds(*pvalues) do |pvalue, threshold, real_pvalue|
91
91
  info[:precise][pvalue] = threshold / precise_discretization
92
92
  end
93
-
93
+
94
94
  collection.add_pwm(pwm, info)
95
95
  end
96
96
  File.open(output_file,'w') do |f|
@@ -11,13 +11,13 @@ Options:
11
11
  [-c <similarity cutoff (minimal similarity to be included in output)> ] or [--all], '-c 0.05' by default
12
12
  [--precise [<level, minimal similarity to check on a more precise discretization level on the second pass>]], off by default, '--precise 0.01' if level is not set
13
13
  [--silent] - don't show current progress information during scan (by default this information's written into stderr)
14
-
14
+
15
15
  Output format:
16
16
  <name> <similarity jaccard index> <shift> <overlap> <orientation> * [in case that result calculated on the second pass(in precise mode)]
17
- Attention! Name can contain whitespace characters.
17
+ Attention! Name can contain whitespace characters.
18
18
  Attention! The shift and orientation are reported for the collection matrix relative to the query matrix.
19
-
20
- Example:
19
+
20
+ Example:
21
21
  ruby scan_collection.rb motifs/KLF4.pat collection.yaml -p 0.005
22
22
  or in linux
23
23
  cat motifs/KLF4.pat | ruby scan_collection.rb .stdin collection.yaml -p 0.005 --precise 0.03
@@ -38,7 +38,7 @@ begin
38
38
  raise "No input. You'd specify input source for pat: filename or .stdin" unless filename
39
39
  raise "No input. You'd specify input file with collection" unless collection_file
40
40
  raise "Collection file #{collection_file} doesn't exist" unless File.exist?(collection_file)
41
-
41
+
42
42
  pvalue = 0.0005
43
43
  cutoff = 0.05 # minimal similarity to output
44
44
  collection = YAML.load_file(collection_file)
@@ -65,7 +65,7 @@ begin
65
65
  silent = true
66
66
  when '--precise'
67
67
  precision_mode = :precise
68
- begin
68
+ begin
69
69
  Float(ARGV.first)
70
70
  minimal_similarity = ARGV.shift.to_f
71
71
  rescue
@@ -75,26 +75,26 @@ begin
75
75
  end
76
76
  Macroape::MaxHashSizeSingle = 1000000 unless defined? Macroape::MaxHashSizeSingle
77
77
  Macroape::MaxHashSizeDouble = 1000 unless defined? Macroape::MaxHashSizeDouble
78
-
78
+
79
79
  raise "Thresholds for pvalue #{pvalue} aren't presented in collection (#{collection.pvalues.join(', ')}). Use one of listed pvalues or recalculate the collection with needed pvalue" unless collection.pvalues.include? pvalue
80
-
80
+
81
81
  if filename == '.stdin'
82
82
  # query_pwm = Macroape::SingleMatrix.load_from_stdin(STDIN)
83
83
  else
84
84
  raise "Error! File #{filename} doesn't exist" unless File.exist?(filename)
85
85
  query_pwm = Bioinform::PWM.new(File.read(filename))
86
86
  end
87
-
88
-
87
+
88
+
89
89
  query_pwm_rough = query_pwm.background(background_query).discrete(collection.rough_discretization)
90
90
  query_pwm_precise = query_pwm.background(background_query).discrete(collection.precise_discretization)
91
-
91
+
92
92
  threshold = query_pwm_rough.threshold(pvalue)
93
93
  threshold_precise = query_pwm_precise.threshold(pvalue)
94
-
94
+
95
95
  similarities = {}
96
96
  precision_file_mode = {}
97
-
97
+
98
98
  collection.pwms.each_key do |name|
99
99
  pwm = collection.pwms[name]
100
100
  pwm_info = collection.infos[name]
@@ -102,7 +102,7 @@ begin
102
102
  cmp = Macroape::PWMCompare.new(query_pwm_rough, pwm.background(collection.background).discrete(collection.rough_discretization))
103
103
  info = cmp.jaccard(threshold, pwm_info[:rough][pvalue] * collection.rough_discretization)
104
104
  precision_file_mode[name] = :rough
105
-
105
+
106
106
  if precision_mode == :precise and info[:similarity] >= minimal_similarity
107
107
  cmp = Macroape::PWMCompare.new(query_pwm_precise, pwm.background(collection.background).discrete(collection.precise_discretization))
108
108
  info = cmp.jaccard(threshold_precise, pwm_info[:precise][pvalue] * collection.precise_discretization)
@@ -110,7 +110,7 @@ begin
110
110
  end
111
111
  similarities[name] = info
112
112
  end
113
-
113
+
114
114
  puts "#pwm\tsimilarity\tshift\toverlap\torientation"
115
115
  similarities.sort_by do |name, info|
116
116
  info[:similarity]
@@ -118,7 +118,7 @@ begin
118
118
  precision_text = (precision_file_mode[name] == :precise) ? "\t*" : ""
119
119
  puts "#{name}\t#{info[:similarity]}\t#{info[:shift]}\t#{info[:overlap]}\t#{info[:orientation]}#{precision_text}" if info[:similarity] >= cutoff
120
120
  end
121
-
121
+
122
122
  rescue => err
123
123
  STDERR.puts "\n#{err}\n#{err.backtrace.first(5).join("\n")}\n\nUse -help option for help\n"
124
124
  end
@@ -13,12 +13,11 @@ module Macroape
13
13
  end
14
14
 
15
15
  def each_alignment
16
- second_rc = second.reverse_complement
17
16
  (-second.length..first.length).to_a.product([:direct,:revcomp]) do |shift, orientation|
18
- yield PWMCompareAligned.new(first, (orientation == :direct ? second : second_rc), shift, orientation)
17
+ yield PWMCompareAligned.new(first, second, shift, orientation)
19
18
  end
20
19
  end
21
-
20
+
22
21
  include Enumerable
23
22
  alias_method :each, :each_alignment
24
23
  alias_method :map_each_alignment, :map
@@ -2,31 +2,37 @@ require 'macroape/aligned_pair_intersection'
2
2
 
3
3
  module Macroape
4
4
  class PWMCompareAligned
5
- attr_reader :first, :second, :length, :shift, :orientation, :unaligned_first, :unaligned_second
6
- def initialize(first, second, shift, orientation)
7
- @unaligned_first, @unaligned_second = first, second
5
+ attr_reader :first, :second, :length, :shift, :orientation, :first_length, :second_length
6
+ def initialize(first_unaligned, second_unaligned, shift, orientation)
8
7
  @shift, @orientation = shift, orientation
8
+
9
+ @first_length, @second_length = first_unaligned.length, second_unaligned.length
10
+ @length = self.class.calculate_alignment_length(@first_length, @second_length, @shift)
11
+
12
+ first, second = first_unaligned, second_unaligned
13
+ second = second.reverse_complement if revcomp?
14
+
9
15
  if shift > 0
10
- first, second = first, second.left_augment(shift)
16
+ second = second.left_augment(shift)
11
17
  else
12
- first, second = first.left_augment(-shift), second
18
+ first = first.left_augment(-shift)
13
19
  end
14
- @length = [first.length, second.length].max
20
+
15
21
  @first = first.right_augment(@length - first.length)
16
22
  @second = second.right_augment(@length - second.length)
17
23
  end
18
-
24
+
19
25
  def direct?
20
26
  orientation == :direct
21
27
  end
22
28
  def revcomp?
23
29
  orientation == :revcomp
24
30
  end
25
-
31
+
26
32
  def overlap
27
33
  length.times.count{|pos| first_overlaps?(pos) && second_overlaps?(pos) }
28
34
  end
29
-
35
+
30
36
  def first_pwm_alignment
31
37
  length.times.map do |pos|
32
38
  if first_overlaps?(pos)
@@ -36,7 +42,7 @@ module Macroape
36
42
  end
37
43
  end.join
38
44
  end
39
-
45
+
40
46
  def second_pwm_alignment
41
47
  length.times.map do |pos|
42
48
  if second_overlaps?(pos)
@@ -46,7 +52,7 @@ module Macroape
46
52
  end
47
53
  end.join
48
54
  end
49
-
55
+
50
56
  def alignment_infos
51
57
  {shift: shift,
52
58
  orientation: orientation,
@@ -54,15 +60,8 @@ module Macroape
54
60
  overlap: overlap,
55
61
  alignment_length: length}
56
62
  end
57
-
58
- def first_length
59
- unaligned_first.length
60
- end
61
- def second_length
62
- unaligned_second.length
63
- end
64
-
65
- # whether first matrix overlap specified position
63
+
64
+ # whether first matrix overlap specified position of alignment
66
65
  def first_overlaps?(pos)
67
66
  return false unless pos >= 0 && pos < length
68
67
  if shift > 0
@@ -71,7 +70,7 @@ module Macroape
71
70
  pos >= -shift && pos < -shift + first_length
72
71
  end
73
72
  end
74
-
73
+
75
74
  def second_overlaps?(pos)
76
75
  return false unless pos >= 0 && pos < length
77
76
  if shift > 0
@@ -80,8 +79,8 @@ module Macroape
80
79
  pos < second_length
81
80
  end
82
81
  end
83
-
84
- =begin
82
+
83
+ =begin
85
84
  def discrete(rate)
86
85
  PWMCompareAligned.new(first.discrete(rate), second.discrete(rate))
87
86
  end
@@ -104,13 +103,13 @@ module Macroape
104
103
  def jaccard(first_threshold, second_threshold)
105
104
  f = first.counts_by_thresholds(first_threshold).first
106
105
  s = second.counts_by_thresholds(second_threshold).first
107
- if f == 0 or s == 0
106
+ if f == 0 || s == 0
108
107
  return {similarity: -1, tanimoto: -1, recognized_by_both: 0,
109
108
  recognized_by_first: f,
110
109
  recognized_by_second: s,
111
110
  }
112
111
  end
113
-
112
+
114
113
  intersect = counts_for_two_matrices(first_threshold, second_threshold)
115
114
  intersect = Math.sqrt(intersect[0] * intersect[1])
116
115
  union = f + s - intersect
@@ -118,6 +117,15 @@ module Macroape
118
117
  { similarity: similarity, tanimoto: 1.0 - similarity, recognized_by_both: intersect,
119
118
  recognized_by_first: f, recognized_by_second: s }
120
119
  end
121
-
120
+
121
+ def self.calculate_alignment_length(first_len, second_len, shift)
122
+ if shift > 0
123
+ [first_len, second_len + shift].max
124
+ else
125
+ [first_len - shift, second_len].max
126
+ end
127
+ end
128
+
122
129
  end
130
+
123
131
  end