gene-matcher 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/lib/alignment.rb +62 -0
- data/lib/gene-matcher.rb +20 -0
- data/lib/smith-waterman.rb +119 -0
- metadata +45 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 829cbe42a19bf6eb809358fe514714377cc7a8a856b1d2a21070760962c60d02
|
4
|
+
data.tar.gz: 04545fb06b196d235531d125575e246e268c7e526f9684db5fb37079b66b9844
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: '0854fc5cac30e888a57dda0fbda709145fe206be6acea8a29493e793075e14633a33d5ee4fa57b6b84ea4e3baa660f7a869fc14a1d51a22fc930f16f377ed2ac'
|
7
|
+
data.tar.gz: 23e94a7632e246589b347ad9c87445780046b3d2bf1e44ae94691ea065fe22141a7d3cb12f3b1d05b855c9f2c76682577aeeef92bad2e6ccabad6b135ef00285
|
data/lib/alignment.rb
ADDED
@@ -0,0 +1,62 @@
|
|
1
|
+
class Alignment
|
2
|
+
attr_accessor :score, :alignmentI, :alignmentJ, :startI, :startJ, :endI, :endJ, :reversed, :aside, :source
|
3
|
+
|
4
|
+
BLANK = "-"
|
5
|
+
|
6
|
+
def initialize
|
7
|
+
# スコア
|
8
|
+
@score = 0
|
9
|
+
# 検索対象(データベースに入っていた)配列
|
10
|
+
@alignmentI = ""
|
11
|
+
# 検索した(クエリ=入力された)配列
|
12
|
+
@alignmentJ = ""
|
13
|
+
# start (元の配列中の)アライメント開始位置
|
14
|
+
# end(元の配列中の)アライメント終了位置
|
15
|
+
@startI = 0
|
16
|
+
@startJ = 0
|
17
|
+
endI = 0
|
18
|
+
endJ = 0
|
19
|
+
# 前後の反転
|
20
|
+
@reversed = false
|
21
|
+
# 逆の鎖
|
22
|
+
@aside = false
|
23
|
+
# アライメント対象配列の取得先。egtcの場合、クローンテーブルまたはアクセッションテーブル
|
24
|
+
@source = ""
|
25
|
+
end
|
26
|
+
|
27
|
+
# 二つの配列の一致部分と不一致部分を表した文字列を返す。
|
28
|
+
# ex. AGTCAAAAAAAAA- :...:::::::::. AT-TAAAAAAAAAG
|
29
|
+
#
|
30
|
+
# 戻り値 :と.からなる文字列
|
31
|
+
def alignment
|
32
|
+
len = @alignmentI.length
|
33
|
+
len = @alignmentJ.length if @alignmentJ.length < len
|
34
|
+
buf = ""
|
35
|
+
len.times do |i|
|
36
|
+
ii = @alignmentI[i]
|
37
|
+
jj = @alignmentJ[i]
|
38
|
+
if ii == jj
|
39
|
+
buf += ":"
|
40
|
+
else
|
41
|
+
buf += "."
|
42
|
+
end
|
43
|
+
end
|
44
|
+
return buf
|
45
|
+
end
|
46
|
+
|
47
|
+
# 二つの配列の一致部分の長さを返す
|
48
|
+
def alignment_count
|
49
|
+
count = 0
|
50
|
+
alignment.each_char do |c|
|
51
|
+
count += 1 if c == ":"
|
52
|
+
end
|
53
|
+
return count
|
54
|
+
end
|
55
|
+
|
56
|
+
def number_of_blank
|
57
|
+
end
|
58
|
+
|
59
|
+
def to_s
|
60
|
+
"score=#{@score} I=" + @alignmentI + " J=" + @alignmentJ;
|
61
|
+
end
|
62
|
+
end
|
data/lib/gene-matcher.rb
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
|
2
|
+
require_relative 'smith-waterman'
|
3
|
+
|
4
|
+
class Matcher
|
5
|
+
attr_accessor :input_sequence, :limit
|
6
|
+
attr_reader :alignments
|
7
|
+
|
8
|
+
def initialize(input_sequence, limit = 0.6)
|
9
|
+
@limit = limit
|
10
|
+
@input_sequence = input_sequence
|
11
|
+
@alignments = []
|
12
|
+
end
|
13
|
+
|
14
|
+
def scan(target_sequence)
|
15
|
+
sw = SmithWaterman.instance
|
16
|
+
a = sw.alignment(target_sequence, @input_sequence)
|
17
|
+
@alignments += [a] if a.score >= @limit
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
@@ -0,0 +1,119 @@
|
|
1
|
+
# Calculate the similarity of two sequences.
|
2
|
+
# Originally created in Java language on 2004/08/04
|
3
|
+
# Migrated to Ruby on 2023/07/10
|
4
|
+
require_relative 'alignment'
|
5
|
+
require 'singleton'
|
6
|
+
|
7
|
+
class SmithWaterman
|
8
|
+
include Singleton
|
9
|
+
|
10
|
+
def alignment(target,input)
|
11
|
+
alignments = [] * 2
|
12
|
+
alignments[0] = alignment_local(target,input)
|
13
|
+
alignments[1] = alignment_local(target,aside_sequence(input))
|
14
|
+
alignments[1].aside = true
|
15
|
+
|
16
|
+
# TODO: wholeLengthI, wholeLengthJを設定する
|
17
|
+
# a[1].startJ = a[1].wholeLengthJ -a[1].startJ -1;
|
18
|
+
# a[1].endJ = a[1].wholeLengthJ -a[1].endJ-1;
|
19
|
+
return max(alignments)
|
20
|
+
end
|
21
|
+
|
22
|
+
def alignment_local(target,input)
|
23
|
+
raise "input is nil or empty" if input.nil? || input.empty?
|
24
|
+
raise "target is nil or empty" if target.nil? || target.empty?
|
25
|
+
|
26
|
+
# 行列の初期化
|
27
|
+
|
28
|
+
matrix = Array.new(target.length) { Array.new(input.length,0) }
|
29
|
+
maxScore = 0; maxI = 0; maxJ = 0
|
30
|
+
target.length.times do |i|
|
31
|
+
ci = target[i] # 検索対象文字
|
32
|
+
input.length.times do |j|
|
33
|
+
cj = input[j] # 検索文字
|
34
|
+
candidates = [0] * 4
|
35
|
+
candidates[0] = 0 # 未使用(常に0)
|
36
|
+
if i > 0 && j > 0
|
37
|
+
candidates[1] = matrix[i-1][j-1] + s(ci,cj)
|
38
|
+
else
|
39
|
+
candidates[1] = s(ci,cj)
|
40
|
+
end
|
41
|
+
|
42
|
+
if i > 0
|
43
|
+
candidates[2] = matrix[i-1][j] - 1
|
44
|
+
end
|
45
|
+
if j > 0
|
46
|
+
candidates[3] = matrix[i][j-1] - 1
|
47
|
+
end
|
48
|
+
matrix[i][j] = candidates.max
|
49
|
+
# スコアの最大点を記憶
|
50
|
+
if matrix[i][j] >= maxScore
|
51
|
+
maxScore = matrix[i][j]
|
52
|
+
maxI = i
|
53
|
+
maxJ = j
|
54
|
+
end
|
55
|
+
end
|
56
|
+
puts ci+" "+matrix[i].join(" ") if ENV["DEBUG"]
|
57
|
+
end
|
58
|
+
puts "maxScore=#{maxScore} maxI=#{maxI} maxJ=#{maxJ}" if ENV["DEBUG"]
|
59
|
+
|
60
|
+
a = Alignment.new
|
61
|
+
a.endI = maxI
|
62
|
+
a.endJ = maxJ
|
63
|
+
i = maxI; j = maxJ; bufI = target[i]; bufJ = input[j]
|
64
|
+
while i > 0 && j > 0 do
|
65
|
+
dst = [] * 3
|
66
|
+
dst[0] = matrix[i-1][j-1]
|
67
|
+
dst[1] = matrix[i-1][j] if i > 0
|
68
|
+
dst[2] = matrix[i][j-1] if j > 0
|
69
|
+
break if dst.max == 0 # 行き先がなければ終了
|
70
|
+
case dst.index(dst.max)
|
71
|
+
when 0
|
72
|
+
i -= 1
|
73
|
+
j -= 1
|
74
|
+
bufI += target[i]
|
75
|
+
bufJ += input[j]
|
76
|
+
when 1
|
77
|
+
i -= 1
|
78
|
+
bufI += target[i]
|
79
|
+
bufJ += Alignment::BLANK
|
80
|
+
when 2
|
81
|
+
j -= 1
|
82
|
+
bufI += Alignment::BLANK
|
83
|
+
bufJ += input[j]
|
84
|
+
end
|
85
|
+
end
|
86
|
+
a.alignmentI = bufI.reverse
|
87
|
+
a.alignmentJ = bufJ.reverse
|
88
|
+
a.startI = i
|
89
|
+
a.startJ = j
|
90
|
+
|
91
|
+
if a.alignmentI.length <= 20
|
92
|
+
a.score = 0
|
93
|
+
else
|
94
|
+
a.score = a.alignment_count ** 2 / a.alignmentI.length.to_f
|
95
|
+
end
|
96
|
+
return a
|
97
|
+
end
|
98
|
+
|
99
|
+
private
|
100
|
+
# 与えられたアライメント配列のうちスコアが最大のものを返す。
|
101
|
+
def max(alignments)
|
102
|
+
max_alignment = alignments[0]
|
103
|
+
alignments.each do |a|
|
104
|
+
if a.score > max_alignment.score
|
105
|
+
max_alignment = a
|
106
|
+
end
|
107
|
+
end
|
108
|
+
max_alignment
|
109
|
+
end
|
110
|
+
|
111
|
+
# スコアリング関数。一致したら1、そうでなければ0を返す。
|
112
|
+
def s(a,b)
|
113
|
+
return a == b ? 1 : 0
|
114
|
+
end
|
115
|
+
|
116
|
+
def aside_sequence(seq)
|
117
|
+
seq.reverse.tr("AGTC", "TCAG")
|
118
|
+
end
|
119
|
+
end
|
metadata
ADDED
@@ -0,0 +1,45 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: gene-matcher
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- ITO Yosei
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2023-08-02 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description: Algorithm for determining similar regions between nucleic acid sequences.
|
14
|
+
email: y-itou@lumber-mill.co.jp
|
15
|
+
executables: []
|
16
|
+
extensions: []
|
17
|
+
extra_rdoc_files: []
|
18
|
+
files:
|
19
|
+
- lib/alignment.rb
|
20
|
+
- lib/gene-matcher.rb
|
21
|
+
- lib/smith-waterman.rb
|
22
|
+
homepage: https://github.com/lumbermill/gene-matcher
|
23
|
+
licenses:
|
24
|
+
- MIT
|
25
|
+
metadata: {}
|
26
|
+
post_install_message:
|
27
|
+
rdoc_options: []
|
28
|
+
require_paths:
|
29
|
+
- lib
|
30
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
31
|
+
requirements:
|
32
|
+
- - ">="
|
33
|
+
- !ruby/object:Gem::Version
|
34
|
+
version: '0'
|
35
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
36
|
+
requirements:
|
37
|
+
- - ">="
|
38
|
+
- !ruby/object:Gem::Version
|
39
|
+
version: '0'
|
40
|
+
requirements: []
|
41
|
+
rubygems_version: 3.3.26
|
42
|
+
signing_key:
|
43
|
+
specification_version: 4
|
44
|
+
summary: Algorithm for determining similar regions between nucleic acid sequences.
|
45
|
+
test_files: []
|