macroape 3.3.2 → 3.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -16,10 +16,10 @@ Output format:
16
16
  <aligned 1st matrix>
17
17
  <aligned 2nd matrix>
18
18
  <shift> <orientation>
19
-
20
- Examples:
19
+
20
+ Examples:
21
21
  ruby eval_alignment.rb motifs/KLF4_f2.pat motifs/SP1_f1.pat -1 direct -p 0.0005 -d 100 -b 0.4 0.3 0.2 0.1
22
- or on windows
22
+ or on windows
23
23
  type motifs/SP1.pat | ruby eval_alignment.rb motifs/KLF4.pat .stdin 0 revcomp -p 0.0005 -d 100 -b 0.4 0.3 0.2 0.1
24
24
  or in linux
25
25
  cat motifs/KLF4.pat motifs/SP1.pat | ruby eval_alignment.rb .stdin .stdin 3 direct -p 0.0005 -d 100 -b 0.4 0.3 0.2 0.1
@@ -38,20 +38,20 @@ discretization = 10
38
38
  first_background = [1,1,1,1]
39
39
  second_background = [1,1,1,1]
40
40
 
41
- begin
41
+ begin
42
42
  first_file = ARGV.shift
43
43
  second_file = ARGV.shift
44
-
44
+
45
45
  shift = ARGV.shift
46
46
  orientation = ARGV.shift
47
-
47
+
48
48
  raise "You'd specify two input sources (each is filename or .stdin)" unless first_file and second_file
49
49
  raise 'You\'d specify shift' unless shift
50
50
  raise 'You\'d specify orientation' unless orientation
51
-
51
+
52
52
  shift = shift.to_i
53
53
  orientation = orientation.to_sym
54
-
54
+
55
55
  case orientation
56
56
  when :direct
57
57
  reverse = false
@@ -60,7 +60,7 @@ begin
60
60
  else
61
61
  raise 'Unknown orientation(direct/revcomp)'
62
62
  end
63
-
63
+
64
64
 
65
65
  until ARGV.empty?
66
66
  case ARGV.shift
@@ -80,19 +80,19 @@ begin
80
80
  second_background = ARGV.shift(4).map(&:to_f)
81
81
  end
82
82
  end
83
- raise 'background should be symmetric' unless first_background == first_background.reverse
83
+ raise 'background should be symmetric: p(A)=p(T) and p(G) = p(C)' unless first_background == first_background.reverse
84
84
  raise 'background should be symmetric: p(A)=p(T) and p(G) = p(C)' unless second_background == second_background.reverse
85
85
 
86
-
86
+
87
87
  Macroape::MaxHashSizeSingle = 1000000 unless defined? Macroape::MaxHashSizeSingle
88
88
  Macroape::MaxHashSizeDouble = 1000 unless defined? Macroape::MaxHashSizeDouble
89
-
89
+
90
90
  # if first_file == '.stdin' || second_file == '.stdin'
91
91
  # r_stream, w_stream = IO.pipe
92
92
  # STDIN.readlines.each{|line| w_stream.write(line)}
93
93
  # w_stream.close
94
94
  # end
95
-
95
+
96
96
  if first_file == '.stdin'
97
97
  # r_stream, w_stream, extracted_pwm = extract_pwm(r_stream, w_stream)
98
98
  # pwm_first = Macroape::SingleMatrix.load_from_line_array(extracted_pwm).with_background(first_background).discrete(discretization)
@@ -100,7 +100,7 @@ begin
100
100
  raise "Error! File #{first_file} don't exist" unless File.exist?(first_file)
101
101
  pwm_first = Bioinform::PWM.new(File.read(first_file)).background(first_background).discrete(discretization)
102
102
  end
103
-
103
+
104
104
  if second_file == '.stdin'
105
105
  # r_stream, w_stream, extracted_pwm = extract_pwm(r_stream, w_stream)
106
106
  # pwm_second = Macroape::SingleMatrix.load_from_line_array(extracted_pwm).with_background(second_background).discrete(discretization)
@@ -108,21 +108,18 @@ begin
108
108
  raise "Error! File #{second_file} don't exist" unless File.exist?(second_file)
109
109
  pwm_second = Bioinform::PWM.new(File.read(second_file)).background(second_background).discrete(discretization)
110
110
  end
111
-
111
+
112
112
  # r_stream.close if first_file == '.stdin' || second_file == '.stdin'
113
-
114
-
115
- pwm_second.reverse_complement! if reverse
116
-
113
+
117
114
  cmp = Macroape::PWMCompareAligned.new(pwm_first, pwm_second, shift, orientation)
118
-
115
+
119
116
  first_threshold = pwm_first.threshold(pvalue)
120
117
  second_threshold = pwm_second.threshold(pvalue)
121
118
 
122
119
  info = cmp.alignment_infos.merge( cmp.jaccard(first_threshold, second_threshold) )
123
-
120
+
124
121
  puts "#{info[:similarity]}\n#{info[:recognized_by_both]}\t#{info[:alignment_length]}\n#{info[:text]}\n#{info[:shift]}\t#{info[:orientation]}"
125
-
122
+
126
123
  rescue => err
127
124
  STDERR.puts "\n#{err}\n#{err.backtrace.first(5).join("\n")}\n\nUse -help option for help\n"
128
125
  end
@@ -17,10 +17,10 @@ Output has format:
17
17
  <optimal alignment, the 1st matrix>
18
18
  <optimal alignment, the 2nd matrix>
19
19
  <shift> <orientation>
20
-
21
- Examples:
20
+
21
+ Examples:
22
22
  ruby eval_similarity.rb motifs/KLF4.pat motifs/SP1.pat -p 0.0005 -d 100 -b 0.4 0.3 0.2 0.1
23
- or on windows
23
+ or on windows
24
24
  type motifs/SP1.pat | ruby eval_similarity.rb motifs/KLF4.pat .stdin -p 0.0005 -d 100 -b 0.4 0.3 0.2 0.1
25
25
  or in linux
26
26
  cat motifs/KLF4.pat motifs/SP1.pat | ruby eval_similarity.rb .stdin .stdin -p 0.0005 -d 100 -b 0.4 0.3 0.2 0.1
@@ -40,7 +40,7 @@ discretization = 10
40
40
  first_background = [1,1,1,1]
41
41
  second_background = [1,1,1,1]
42
42
 
43
- begin
43
+ begin
44
44
  first_file = ARGV.shift
45
45
  second_file = ARGV.shift
46
46
  raise "You'd specify two input sources (each is filename or .stdin)" unless first_file and second_file
@@ -60,22 +60,22 @@ begin
60
60
  when '-b1'
61
61
  first_background = ARGV.shift(4).map(&:to_f)
62
62
  when '-b2'
63
- second_background = ARGV.shift(4).map(&:to_f)
63
+ second_background = ARGV.shift(4).map(&:to_f)
64
64
  end
65
65
  end
66
66
  raise 'background should be symmetric: p(A)=p(T) and p(G) = p(C)' unless first_background == first_background.reverse
67
67
  raise 'background should be symmetric: p(A)=p(T) and p(G) = p(C)' unless second_background == second_background.reverse
68
-
68
+
69
69
  Macroape::MaxHashSizeSingle = 1000000 unless defined? Macroape::MaxHashSizeSingle
70
70
  Macroape::MaxHashSizeDouble = 1000 unless defined? Macroape::MaxHashSizeDouble
71
-
72
-
71
+
72
+
73
73
  # if first_file == '.stdin' || second_file == '.stdin'
74
74
  # r_stream, w_stream = IO.pipe
75
75
  # STDIN.readlines.each{|line| w_stream.write(line)}
76
76
  # w_stream.close
77
77
  # end
78
-
78
+
79
79
  if first_file == '.stdin'
80
80
  # r_stream, w_stream, extracted_pwm = extract_pwm(r_stream, w_stream)
81
81
  # pwm_first = Macroape::SingleMatrix.load_from_line_array(extracted_pwm).with_background(first_background).discrete(discretization)
@@ -83,7 +83,7 @@ begin
83
83
  raise "Error! File #{first_file} don't exist" unless File.exist?(first_file)
84
84
  pwm_first = Bioinform::PWM.new(File.read(first_file)).background(first_background).discrete(discretization)
85
85
  end
86
-
86
+
87
87
  if second_file == '.stdin'
88
88
  # r_stream, w_stream, extracted_pwm = extract_pwm(r_stream, w_stream)
89
89
  # pwm_second = Macroape::SingleMatrix.load_from_line_array(extracted_pwm).with_background(second_background).discrete(discretization)
@@ -91,7 +91,7 @@ begin
91
91
  raise "Error! File #{second_file} don't exist" unless File.exist?(second_file)
92
92
  pwm_second = Bioinform::PWM.new(File.read(second_file)).background(second_background).discrete(discretization)
93
93
  end
94
-
94
+
95
95
  r_stream.close if first_file == '.stdin' || second_file == '.stdin'
96
96
 
97
97
  cmp = Macroape::PWMCompare.new(pwm_first, pwm_second)
@@ -100,9 +100,9 @@ begin
100
100
  second_threshold = pwm_second.threshold(pvalue)
101
101
 
102
102
  info = cmp.jaccard(first_threshold, second_threshold)
103
-
103
+
104
104
  puts "#{info[:similarity]}\n#{info[:recognized_by_both]}\t#{info[:alignment_length]}\n#{info[:text]}\n#{info[:shift]}\t#{info[:orientation]}"
105
-
105
+
106
106
  rescue => err
107
107
  STDERR.puts "\n#{err}\n#{err.backtrace.first(5).join("\n")}\n\nUse -help option for help\n"
108
108
  end
@@ -1,7 +1,7 @@
1
1
  help_string = %q{
2
2
  Command-line format:
3
3
  ruby find_pvalue.rb <pat-file> <threshold list> [options]
4
- or in linux
4
+ or in linux
5
5
  cat <pat-file> | ruby find_pvalue.rb .stdin <threshold> [options]
6
6
  or on windows
7
7
  type <pat-file> | ruby find_pvalue.rb .stdin <threshold> [options]
@@ -16,7 +16,7 @@ Output format:
16
16
  threshold_3 count_3 pvalue_3
17
17
  The results are printed out in the same order as in the given threshold list.
18
18
 
19
- Examples:
19
+ Examples:
20
20
  ruby find_pvalue.rb motifs/KLF4.pat 7.32 -d 1000 -b 0.2 0.3 0.2 0.3
21
21
  or on windows
22
22
  type motifs/KLF4.pat | ruby find_pvalue.rb .stdin 7.32 4.31 5.42
@@ -35,7 +35,7 @@ end
35
35
  discretization = 10000
36
36
  background = [1,1,1,1]
37
37
  thresholds = []
38
- begin
38
+ begin
39
39
  filename = ARGV.shift
40
40
 
41
41
  loop do
@@ -46,10 +46,10 @@ begin
46
46
  raise StopIteration
47
47
  end
48
48
  end
49
-
49
+
50
50
  raise "No input. You'd specify input source: filename or .stdin" unless filename
51
51
  raise 'You should specify at least one threshold' if thresholds.empty?
52
-
52
+
53
53
  until ARGV.empty?
54
54
  case ARGV.shift
55
55
  when '-b'
@@ -61,8 +61,8 @@ begin
61
61
  end
62
62
  end
63
63
  Macroape::MaxHashSizeSingle = 1000000 unless defined? Macroape::MaxHashSizeSingle
64
-
65
-
64
+
65
+
66
66
  if filename == '.stdin'
67
67
  # TODO
68
68
  else
@@ -14,9 +14,9 @@ Options:
14
14
  Output format:
15
15
  requested_pvalue_1 threshold_1 achieved_pvalue_1
16
16
  requested_pvalue_2 threshold_2 achieved_pvalue_2
17
-
18
-
19
- Example:
17
+
18
+
19
+ Example:
20
20
  ruby find_threshold.rb motifs/KLF4.pat -p 0.001 0.0001 0.0005 -d 1000 -b 0.4 0.3 0.2 0.1
21
21
  }
22
22
 
@@ -35,7 +35,7 @@ discretization = 10000
35
35
  begin
36
36
  filename = ARGV.shift
37
37
  raise "No input. You'd specify input source: filename or .stdin" unless filename
38
-
38
+
39
39
  pvalues = []
40
40
  until ARGV.empty?
41
41
  case ARGV.shift
@@ -57,18 +57,18 @@ begin
57
57
  end
58
58
  end
59
59
  pvalues = default_pvalues if pvalues.empty?
60
-
60
+
61
61
  Macroape::MaxHashSizeSingle = 1000000 unless defined? Macroape::MaxHashSizeSingle
62
-
62
+
63
63
  if filename == '.stdin'
64
64
  ## TODO
65
65
  else
66
66
  raise "Error! File #{filename} doesn't exist" unless File.exist?(filename)
67
67
  pwm = Bioinform::PWM.new( File.read(filename) )
68
68
  end
69
-
69
+
70
70
  pwm.background(background)
71
-
71
+
72
72
  pwm.discrete(discretization).thresholds(*pvalues) do |pvalue, threshold, real_pvalue|
73
73
  puts "#{pvalue}\t#{threshold / discretization}\t#{real_pvalue}"
74
74
  end
@@ -10,7 +10,7 @@ Options:
10
10
  [--silent] - don't show current progress information during scan (by default this information's written into stderr)
11
11
 
12
12
  The tool stores preprocessed Macroape collection to the specified YAML-file.
13
-
13
+
14
14
  Example:
15
15
  ruby preprocess_collection.rb ./motifs -p 0.001 0.0005 0.0001 -d 1 10 -b 0.2 0.3 0.2 0.3 -o collection.yaml
16
16
  }
@@ -34,7 +34,7 @@ begin
34
34
  folder = ARGV.shift
35
35
  raise "No input. You'd specify folder with pat-files" unless folder
36
36
  raise "Error! Folder #{folder} doesn't exist" unless Dir.exist?(folder)
37
-
37
+
38
38
  pvalues = []
39
39
  silent = false
40
40
  until ARGV.empty?
@@ -64,7 +64,7 @@ begin
64
64
  end
65
65
  end
66
66
  pvalues = default_pvalues if pvalues.empty?
67
-
67
+
68
68
  Macroape::MaxHashSizeSingle = 1000000 unless defined? Macroape::MaxHashSizeSingle
69
69
  Macroape::MaxHashSizeDouble = 1000 unless defined? Macroape::MaxHashSizeDouble
70
70
 
@@ -75,14 +75,14 @@ begin
75
75
  STDERR.puts filename unless silent
76
76
  pwm = Bioinform::PWM.new(File.read(filename))
77
77
  pwm.name ||= File.basename(filename, File.extname(filename))
78
-
79
- # When support of onefile collections is introduced - then here should be check if name exists.
78
+
79
+ # When support of onefile collections is introduced - then here should be check if name exists.
80
80
  # Otherwise it should skip motif and tell you about this
81
81
  # Also two command line options to fail on skipping or to skip silently should be included
82
-
82
+
83
83
  info = {rough: {}, precise: {}}
84
84
  pwm.background(background)
85
-
85
+
86
86
  pwm.discrete(rough_discretization).thresholds(*pvalues) do |pvalue, threshold, real_pvalue|
87
87
  info[:rough][pvalue] = threshold / rough_discretization
88
88
  end
@@ -90,7 +90,7 @@ begin
90
90
  pwm.discrete(precise_discretization).thresholds(*pvalues) do |pvalue, threshold, real_pvalue|
91
91
  info[:precise][pvalue] = threshold / precise_discretization
92
92
  end
93
-
93
+
94
94
  collection.add_pwm(pwm, info)
95
95
  end
96
96
  File.open(output_file,'w') do |f|
@@ -11,13 +11,13 @@ Options:
11
11
  [-c <similarity cutoff (minimal similarity to be included in output)> ] or [--all], '-c 0.05' by default
12
12
  [--precise [<level, minimal similarity to check on a more precise discretization level on the second pass>]], off by default, '--precise 0.01' if level is not set
13
13
  [--silent] - don't show current progress information during scan (by default this information's written into stderr)
14
-
14
+
15
15
  Output format:
16
16
  <name> <similarity jaccard index> <shift> <overlap> <orientation> * [in case that result calculated on the second pass(in precise mode)]
17
- Attention! Name can contain whitespace characters.
17
+ Attention! Name can contain whitespace characters.
18
18
  Attention! The shift and orientation are reported for the collection matrix relative to the query matrix.
19
-
20
- Example:
19
+
20
+ Example:
21
21
  ruby scan_collection.rb motifs/KLF4.pat collection.yaml -p 0.005
22
22
  or in linux
23
23
  cat motifs/KLF4.pat | ruby scan_collection.rb .stdin collection.yaml -p 0.005 --precise 0.03
@@ -38,7 +38,7 @@ begin
38
38
  raise "No input. You'd specify input source for pat: filename or .stdin" unless filename
39
39
  raise "No input. You'd specify input file with collection" unless collection_file
40
40
  raise "Collection file #{collection_file} doesn't exist" unless File.exist?(collection_file)
41
-
41
+
42
42
  pvalue = 0.0005
43
43
  cutoff = 0.05 # minimal similarity to output
44
44
  collection = YAML.load_file(collection_file)
@@ -65,7 +65,7 @@ begin
65
65
  silent = true
66
66
  when '--precise'
67
67
  precision_mode = :precise
68
- begin
68
+ begin
69
69
  Float(ARGV.first)
70
70
  minimal_similarity = ARGV.shift.to_f
71
71
  rescue
@@ -75,26 +75,26 @@ begin
75
75
  end
76
76
  Macroape::MaxHashSizeSingle = 1000000 unless defined? Macroape::MaxHashSizeSingle
77
77
  Macroape::MaxHashSizeDouble = 1000 unless defined? Macroape::MaxHashSizeDouble
78
-
78
+
79
79
  raise "Thresholds for pvalue #{pvalue} aren't presented in collection (#{collection.pvalues.join(', ')}). Use one of listed pvalues or recalculate the collection with needed pvalue" unless collection.pvalues.include? pvalue
80
-
80
+
81
81
  if filename == '.stdin'
82
82
  # query_pwm = Macroape::SingleMatrix.load_from_stdin(STDIN)
83
83
  else
84
84
  raise "Error! File #{filename} doesn't exist" unless File.exist?(filename)
85
85
  query_pwm = Bioinform::PWM.new(File.read(filename))
86
86
  end
87
-
88
-
87
+
88
+
89
89
  query_pwm_rough = query_pwm.background(background_query).discrete(collection.rough_discretization)
90
90
  query_pwm_precise = query_pwm.background(background_query).discrete(collection.precise_discretization)
91
-
91
+
92
92
  threshold = query_pwm_rough.threshold(pvalue)
93
93
  threshold_precise = query_pwm_precise.threshold(pvalue)
94
-
94
+
95
95
  similarities = {}
96
96
  precision_file_mode = {}
97
-
97
+
98
98
  collection.pwms.each_key do |name|
99
99
  pwm = collection.pwms[name]
100
100
  pwm_info = collection.infos[name]
@@ -102,7 +102,7 @@ begin
102
102
  cmp = Macroape::PWMCompare.new(query_pwm_rough, pwm.background(collection.background).discrete(collection.rough_discretization))
103
103
  info = cmp.jaccard(threshold, pwm_info[:rough][pvalue] * collection.rough_discretization)
104
104
  precision_file_mode[name] = :rough
105
-
105
+
106
106
  if precision_mode == :precise and info[:similarity] >= minimal_similarity
107
107
  cmp = Macroape::PWMCompare.new(query_pwm_precise, pwm.background(collection.background).discrete(collection.precise_discretization))
108
108
  info = cmp.jaccard(threshold_precise, pwm_info[:precise][pvalue] * collection.precise_discretization)
@@ -110,7 +110,7 @@ begin
110
110
  end
111
111
  similarities[name] = info
112
112
  end
113
-
113
+
114
114
  puts "#pwm\tsimilarity\tshift\toverlap\torientation"
115
115
  similarities.sort_by do |name, info|
116
116
  info[:similarity]
@@ -118,7 +118,7 @@ begin
118
118
  precision_text = (precision_file_mode[name] == :precise) ? "\t*" : ""
119
119
  puts "#{name}\t#{info[:similarity]}\t#{info[:shift]}\t#{info[:overlap]}\t#{info[:orientation]}#{precision_text}" if info[:similarity] >= cutoff
120
120
  end
121
-
121
+
122
122
  rescue => err
123
123
  STDERR.puts "\n#{err}\n#{err.backtrace.first(5).join("\n")}\n\nUse -help option for help\n"
124
124
  end
@@ -13,12 +13,11 @@ module Macroape
13
13
  end
14
14
 
15
15
  def each_alignment
16
- second_rc = second.reverse_complement
17
16
  (-second.length..first.length).to_a.product([:direct,:revcomp]) do |shift, orientation|
18
- yield PWMCompareAligned.new(first, (orientation == :direct ? second : second_rc), shift, orientation)
17
+ yield PWMCompareAligned.new(first, second, shift, orientation)
19
18
  end
20
19
  end
21
-
20
+
22
21
  include Enumerable
23
22
  alias_method :each, :each_alignment
24
23
  alias_method :map_each_alignment, :map
@@ -2,31 +2,37 @@ require 'macroape/aligned_pair_intersection'
2
2
 
3
3
  module Macroape
4
4
  class PWMCompareAligned
5
- attr_reader :first, :second, :length, :shift, :orientation, :unaligned_first, :unaligned_second
6
- def initialize(first, second, shift, orientation)
7
- @unaligned_first, @unaligned_second = first, second
5
+ attr_reader :first, :second, :length, :shift, :orientation, :first_length, :second_length
6
+ def initialize(first_unaligned, second_unaligned, shift, orientation)
8
7
  @shift, @orientation = shift, orientation
8
+
9
+ @first_length, @second_length = first_unaligned.length, second_unaligned.length
10
+ @length = self.class.calculate_alignment_length(@first_length, @second_length, @shift)
11
+
12
+ first, second = first_unaligned, second_unaligned
13
+ second = second.reverse_complement if revcomp?
14
+
9
15
  if shift > 0
10
- first, second = first, second.left_augment(shift)
16
+ second = second.left_augment(shift)
11
17
  else
12
- first, second = first.left_augment(-shift), second
18
+ first = first.left_augment(-shift)
13
19
  end
14
- @length = [first.length, second.length].max
20
+
15
21
  @first = first.right_augment(@length - first.length)
16
22
  @second = second.right_augment(@length - second.length)
17
23
  end
18
-
24
+
19
25
  def direct?
20
26
  orientation == :direct
21
27
  end
22
28
  def revcomp?
23
29
  orientation == :revcomp
24
30
  end
25
-
31
+
26
32
  def overlap
27
33
  length.times.count{|pos| first_overlaps?(pos) && second_overlaps?(pos) }
28
34
  end
29
-
35
+
30
36
  def first_pwm_alignment
31
37
  length.times.map do |pos|
32
38
  if first_overlaps?(pos)
@@ -36,7 +42,7 @@ module Macroape
36
42
  end
37
43
  end.join
38
44
  end
39
-
45
+
40
46
  def second_pwm_alignment
41
47
  length.times.map do |pos|
42
48
  if second_overlaps?(pos)
@@ -46,7 +52,7 @@ module Macroape
46
52
  end
47
53
  end.join
48
54
  end
49
-
55
+
50
56
  def alignment_infos
51
57
  {shift: shift,
52
58
  orientation: orientation,
@@ -54,15 +60,8 @@ module Macroape
54
60
  overlap: overlap,
55
61
  alignment_length: length}
56
62
  end
57
-
58
- def first_length
59
- unaligned_first.length
60
- end
61
- def second_length
62
- unaligned_second.length
63
- end
64
-
65
- # whether first matrix overlap specified position
63
+
64
+ # whether first matrix overlap specified position of alignment
66
65
  def first_overlaps?(pos)
67
66
  return false unless pos >= 0 && pos < length
68
67
  if shift > 0
@@ -71,7 +70,7 @@ module Macroape
71
70
  pos >= -shift && pos < -shift + first_length
72
71
  end
73
72
  end
74
-
73
+
75
74
  def second_overlaps?(pos)
76
75
  return false unless pos >= 0 && pos < length
77
76
  if shift > 0
@@ -80,8 +79,8 @@ module Macroape
80
79
  pos < second_length
81
80
  end
82
81
  end
83
-
84
- =begin
82
+
83
+ =begin
85
84
  def discrete(rate)
86
85
  PWMCompareAligned.new(first.discrete(rate), second.discrete(rate))
87
86
  end
@@ -104,13 +103,13 @@ module Macroape
104
103
  def jaccard(first_threshold, second_threshold)
105
104
  f = first.counts_by_thresholds(first_threshold).first
106
105
  s = second.counts_by_thresholds(second_threshold).first
107
- if f == 0 or s == 0
106
+ if f == 0 || s == 0
108
107
  return {similarity: -1, tanimoto: -1, recognized_by_both: 0,
109
108
  recognized_by_first: f,
110
109
  recognized_by_second: s,
111
110
  }
112
111
  end
113
-
112
+
114
113
  intersect = counts_for_two_matrices(first_threshold, second_threshold)
115
114
  intersect = Math.sqrt(intersect[0] * intersect[1])
116
115
  union = f + s - intersect
@@ -118,6 +117,15 @@ module Macroape
118
117
  { similarity: similarity, tanimoto: 1.0 - similarity, recognized_by_both: intersect,
119
118
  recognized_by_first: f, recognized_by_second: s }
120
119
  end
121
-
120
+
121
+ def self.calculate_alignment_length(first_len, second_len, shift)
122
+ if shift > 0
123
+ [first_len, second_len + shift].max
124
+ else
125
+ [first_len - shift, second_len].max
126
+ end
127
+ end
128
+
122
129
  end
130
+
123
131
  end