pdist 0.0.3 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/pdist.rb +32 -29
  3. metadata +2 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: d7ebb3f205b20de1c6d95cd2b23258859ce4f2a1
4
- data.tar.gz: f58d113cdeecbc627cca8cdc91ce2714bc11d51e
3
+ metadata.gz: 8f6118cbd2a20a7657698ccb221e3eca96f0b06c
4
+ data.tar.gz: 595fa6bc50c46f3ea10c70236b3a6e3a47e39012
5
5
  SHA512:
6
- metadata.gz: 55212a0b26ece1011645ae268e21ede6d83c66a6b6b6e14364cbed7d1129f81161fab03f21114ced284116d027926bb8e33696890f71466c33d95998702da7a1
7
- data.tar.gz: af1e3ce555ee46312068c7ee0e3c9dfbe121260e75ecb5ef2ce4c72ddac5b04c113d9fc236773edc7dd476cab0bef10a464d1571e1d9928dc7702e8d2b249748
6
+ metadata.gz: d0f6b88bdd1b539087fa21eefffd16eb382e3b5879dbf3573fcfc08e07488a0296693824d8e3c6a6f9be14aedcbcf4879002c25ec4b9726f8173d1324e9b68c4
7
+ data.tar.gz: de356c84860ba351714b360bc88dc833632feb4802bf186b797b639971fbff39929b6bf3a8de5854328c18d98f2f66ff3f5eb0d71edc8e01608f2bef552c9f8c
@@ -3,38 +3,41 @@ class PDist
3
3
 
4
4
  require 'diff/lcs'
5
5
 
6
+ # Returns array of values for each object's deviation in permutation, from original
6
7
  def self.distances(original, permutation)
7
- both, difference_abs = [], []
8
- both << original.map{|x| permutation.index(x)} # works out the index of original values in permutation
9
- both << Array(0..(original.length - 1)) # index values that original originally at
10
- difference = both.transpose.map {|x| x.reduce(:-)} # taking away old position from new position, to find the distance that the frag has moved when re-ordered
11
- difference.each {|i| difference_abs << i.abs }
12
- return difference_abs
8
+ indices = []
9
+ indices << original.map{|x| permutation.index(x)} # indices of original values in permutation
10
+ indices << Array(0..(original.length - 1)) # indices of original values in original
11
+ difference = indices.transpose.map {|x| x.reduce(:-)} # subtracting each object's index in original, from it's index in permutation, to find deviation
12
+ return difference.map! {|i| i.abs } # the deviation's, as absolute values (direction does not matter)
13
13
  end
14
14
 
15
+ # Returns float of the deviation distance between original and permutation, normalized between 0.0 and 1.0
15
16
  def self.deviation(original, permutation)
16
- s = distances(original, permutation).inject(:+)
17
+ s = distances(original, permutation).inject(:+) # sum of each object's deviation, is the deviation distance between original and permutation
17
18
  n = permutation.length
18
- if n % 2 == 0
19
- score = (2.0 / (n ** 2).to_f) * s
19
+ if n % 2 == 0
20
+ distance = (2.0 / (n ** 2).to_f) * s
20
21
  else
21
- score = (2.0 / ((n ** 2) - 1).to_f) * s
22
+ distance = (2.0 / ((n ** 2) - 1).to_f) * s
22
23
  end
23
- return score
24
+ return distance # if/else block normalizes the deviation distance
24
25
  end
25
26
 
27
+ # Returns float of the squared deviation distance between original and permutation, normalized between 0.0 and 1.0
26
28
  def self.square(original, permutation)
27
29
  sq_dists = []
28
- distances(original, permutation).each{|d| sq_dists << d**2}
29
- s = sq_dists.inject(:+)
30
+ distances(original, permutation).each{|d| sq_dists << d**2} # each object's deviation is squared
31
+ s = sq_dists.inject(:+) # sum of each object's squared deviation, is the squared deviation distance between original and permutation
30
32
  n = permutation.length
31
- return (3.0 / (n**3 - n).to_f) * s
33
+ return (3.0 / (n**3 - n).to_f) * s # normalizes the squared deviation distance
32
34
  end
33
35
 
34
- def self.ham_dist(original, permutation)
36
+ # Returns float of the generalized hamming distance between original and permutation, normalized between 0.0 and 1.0
37
+ def self.hamming(original, permutation)
35
38
  x = 0
36
39
  hds = [] # hamming distances
37
- permutation.each do |frag_id|
40
+ permutation.each do |frag_id| # hamming distances are 0 when object's have same index in original and permutation, and 1 when not
38
41
  if frag_id == original[x]
39
42
  hds << 0
40
43
  else
@@ -42,18 +45,16 @@ class PDist
42
45
  end
43
46
  x+=1
44
47
  end
45
- return hds.inject(:+)
48
+ total_hd = hds.inject(:+).to_f # total hamming distance is the sum of the hamming distances
49
+ return total_hd / permutation.length.to_f # normalizes by dividing the total hamming distance by the maximum possible total, which == number of objects
46
50
  end
47
51
 
48
- def self.hamming(original, permutation) # generalized hamming distance
49
- ham_dist(original, permutation).to_f / permutation.length.to_f # normalize by dividing by the max score, which == number of objects
50
- end
51
-
52
- def self.rdist(original, permutation) # reverse R distance (since higher scores = bad)
52
+ # Returns float of the compliment R distance between original and permutation, normalized between 0.0 and 1.0
53
+ def self.rdist(original, permutation)
53
54
  x = 0
54
- r = []
55
+ r = [] # compliment of R distance == number of times two objects consecutive in original, are consectutive in permutation
55
56
  n = permutation.length
56
- (n - 1).times do
57
+ (n - 1).times do
57
58
  y = permutation.index(original[x])
58
59
  if original[x+1] == permutation[y+1]
59
60
  r << 0
@@ -62,14 +63,16 @@ class PDist
62
63
  end
63
64
  x+=1
64
65
  end
65
- return r.inject(:+).to_f / (n - 1).to_f
66
+ return r.inject(:+).to_f / (n - 1).to_f # normalized: dividing by maximum number of consecutive occurences
66
67
  end
67
68
 
69
+ # Returns float of the longest common sub-sequence between original and permutation, normalized between 0.0 and 1.0
68
70
  def self.lcs(original, permutation)
69
- lcs = Diff::LCS.LCS(original, permutation)
70
- return (permutation.length - lcs.length).to_f / (permutation.length - 1).to_f
71
+ lcs = Diff::LCS.LCS(original, permutation) # diff-lcs gem used to calculate longest common sub-sequence
72
+ return (permutation.length - lcs.length).to_f / (permutation.length - 1).to_f # normalized: dividing by longest possible common sub-sequence
71
73
  end
72
74
 
75
+ # Returns float of the kendall's tau distance between original and permutation, normalized between 0.0 and 1.0
73
76
  def self.kendalls_tau(original, permutation)
74
77
  n = permutation.length
75
78
  x = 0
@@ -86,7 +89,7 @@ class PDist
86
89
  end
87
90
  x+=1
88
91
  end
89
- s = kt.inject(:+)
90
- return 2 * (s.to_f / (n**2 - n).to_f)
92
+ s = kt.inject(:+) # the number of pairwise adjacent permutations required to transform original into permutation
93
+ return 2 * (s.to_f / (n**2 - n).to_f) # normalized kendall's tau distance
91
94
  end
92
95
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pdist
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
4
+ version: 0.0.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Edward Chalstrey
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-04-16 00:00:00.000000000 Z
11
+ date: 2014-04-28 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: 'Ruby gem to compare permutations using distance measures/metrics. github:
14
14
  https://github.com/edwardchalstrey1/pdist'