gene-matcher 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/alignment.rb +62 -0
- data/lib/gene-matcher.rb +20 -0
- data/lib/smith-waterman.rb +119 -0
- metadata +45 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 829cbe42a19bf6eb809358fe514714377cc7a8a856b1d2a21070760962c60d02
|
4
|
+
data.tar.gz: 04545fb06b196d235531d125575e246e268c7e526f9684db5fb37079b66b9844
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: '0854fc5cac30e888a57dda0fbda709145fe206be6acea8a29493e793075e14633a33d5ee4fa57b6b84ea4e3baa660f7a869fc14a1d51a22fc930f16f377ed2ac'
|
7
|
+
data.tar.gz: 23e94a7632e246589b347ad9c87445780046b3d2bf1e44ae94691ea065fe22141a7d3cb12f3b1d05b855c9f2c76682577aeeef92bad2e6ccabad6b135ef00285
|
data/lib/alignment.rb
ADDED
@@ -0,0 +1,62 @@
|
|
1
|
+
class Alignment
|
2
|
+
attr_accessor :score, :alignmentI, :alignmentJ, :startI, :startJ, :endI, :endJ, :reversed, :aside, :source
|
3
|
+
|
4
|
+
BLANK = "-"
|
5
|
+
|
6
|
+
def initialize
|
7
|
+
# スコア
|
8
|
+
@score = 0
|
9
|
+
# 検索対象(データベースに入っていた)配列
|
10
|
+
@alignmentI = ""
|
11
|
+
# 検索した(クエリ=入力された)配列
|
12
|
+
@alignmentJ = ""
|
13
|
+
# start (元の配列中の)アライメント開始位置
|
14
|
+
# end(元の配列中の)アライメント終了位置
|
15
|
+
@startI = 0
|
16
|
+
@startJ = 0
|
17
|
+
endI = 0
|
18
|
+
endJ = 0
|
19
|
+
# 前後の反転
|
20
|
+
@reversed = false
|
21
|
+
# 逆の鎖
|
22
|
+
@aside = false
|
23
|
+
# アライメント対象配列の取得先。egtcの場合、クローンテーブルまたはアクセッションテーブル
|
24
|
+
@source = ""
|
25
|
+
end
|
26
|
+
|
27
|
+
# 二つの配列の一致部分と不一致部分を表した文字列を返す。
|
28
|
+
# ex. AGTCAAAAAAAAA- :...:::::::::. AT-TAAAAAAAAAG
|
29
|
+
#
|
30
|
+
# 戻り値 :と.からなる文字列
|
31
|
+
def alignment
|
32
|
+
len = @alignmentI.length
|
33
|
+
len = @alignmentJ.length if @alignmentJ.length < len
|
34
|
+
buf = ""
|
35
|
+
len.times do |i|
|
36
|
+
ii = @alignmentI[i]
|
37
|
+
jj = @alignmentJ[i]
|
38
|
+
if ii == jj
|
39
|
+
buf += ":"
|
40
|
+
else
|
41
|
+
buf += "."
|
42
|
+
end
|
43
|
+
end
|
44
|
+
return buf
|
45
|
+
end
|
46
|
+
|
47
|
+
# 二つの配列の一致部分の長さを返す
|
48
|
+
def alignment_count
|
49
|
+
count = 0
|
50
|
+
alignment.each_char do |c|
|
51
|
+
count += 1 if c == ":"
|
52
|
+
end
|
53
|
+
return count
|
54
|
+
end
|
55
|
+
|
56
|
+
def number_of_blank
|
57
|
+
end
|
58
|
+
|
59
|
+
def to_s
|
60
|
+
"score=#{@score} I=" + @alignmentI + " J=" + @alignmentJ;
|
61
|
+
end
|
62
|
+
end
|
data/lib/gene-matcher.rb
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
|
2
|
+
require_relative 'smith-waterman'
|
3
|
+
|
4
|
+
class Matcher
|
5
|
+
attr_accessor :input_sequence, :limit
|
6
|
+
attr_reader :alignments
|
7
|
+
|
8
|
+
def initialize(input_sequence, limit = 0.6)
|
9
|
+
@limit = limit
|
10
|
+
@input_sequence = input_sequence
|
11
|
+
@alignments = []
|
12
|
+
end
|
13
|
+
|
14
|
+
def scan(target_sequence)
|
15
|
+
sw = SmithWaterman.instance
|
16
|
+
a = sw.alignment(target_sequence, @input_sequence)
|
17
|
+
@alignments += [a] if a.score >= @limit
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
@@ -0,0 +1,119 @@
|
|
1
|
+
# Calculate the similarity of two sequences.
|
2
|
+
# Originally created in Java language on 2004/08/04
|
3
|
+
# Migrated to Ruby on 2023/07/10
|
4
|
+
require_relative 'alignment'
|
5
|
+
require 'singleton'
|
6
|
+
|
7
|
+
class SmithWaterman
|
8
|
+
include Singleton
|
9
|
+
|
10
|
+
def alignment(target,input)
|
11
|
+
alignments = [] * 2
|
12
|
+
alignments[0] = alignment_local(target,input)
|
13
|
+
alignments[1] = alignment_local(target,aside_sequence(input))
|
14
|
+
alignments[1].aside = true
|
15
|
+
|
16
|
+
# TODO: wholeLengthI, wholeLengthJを設定する
|
17
|
+
# a[1].startJ = a[1].wholeLengthJ -a[1].startJ -1;
|
18
|
+
# a[1].endJ = a[1].wholeLengthJ -a[1].endJ-1;
|
19
|
+
return max(alignments)
|
20
|
+
end
|
21
|
+
|
22
|
+
def alignment_local(target,input)
|
23
|
+
raise "input is nil or empty" if input.nil? || input.empty?
|
24
|
+
raise "target is nil or empty" if target.nil? || target.empty?
|
25
|
+
|
26
|
+
# 行列の初期化
|
27
|
+
|
28
|
+
matrix = Array.new(target.length) { Array.new(input.length,0) }
|
29
|
+
maxScore = 0; maxI = 0; maxJ = 0
|
30
|
+
target.length.times do |i|
|
31
|
+
ci = target[i] # 検索対象文字
|
32
|
+
input.length.times do |j|
|
33
|
+
cj = input[j] # 検索文字
|
34
|
+
candidates = [0] * 4
|
35
|
+
candidates[0] = 0 # 未使用(常に0)
|
36
|
+
if i > 0 && j > 0
|
37
|
+
candidates[1] = matrix[i-1][j-1] + s(ci,cj)
|
38
|
+
else
|
39
|
+
candidates[1] = s(ci,cj)
|
40
|
+
end
|
41
|
+
|
42
|
+
if i > 0
|
43
|
+
candidates[2] = matrix[i-1][j] - 1
|
44
|
+
end
|
45
|
+
if j > 0
|
46
|
+
candidates[3] = matrix[i][j-1] - 1
|
47
|
+
end
|
48
|
+
matrix[i][j] = candidates.max
|
49
|
+
# スコアの最大点を記憶
|
50
|
+
if matrix[i][j] >= maxScore
|
51
|
+
maxScore = matrix[i][j]
|
52
|
+
maxI = i
|
53
|
+
maxJ = j
|
54
|
+
end
|
55
|
+
end
|
56
|
+
puts ci+" "+matrix[i].join(" ") if ENV["DEBUG"]
|
57
|
+
end
|
58
|
+
puts "maxScore=#{maxScore} maxI=#{maxI} maxJ=#{maxJ}" if ENV["DEBUG"]
|
59
|
+
|
60
|
+
a = Alignment.new
|
61
|
+
a.endI = maxI
|
62
|
+
a.endJ = maxJ
|
63
|
+
i = maxI; j = maxJ; bufI = target[i]; bufJ = input[j]
|
64
|
+
while i > 0 && j > 0 do
|
65
|
+
dst = [] * 3
|
66
|
+
dst[0] = matrix[i-1][j-1]
|
67
|
+
dst[1] = matrix[i-1][j] if i > 0
|
68
|
+
dst[2] = matrix[i][j-1] if j > 0
|
69
|
+
break if dst.max == 0 # 行き先がなければ終了
|
70
|
+
case dst.index(dst.max)
|
71
|
+
when 0
|
72
|
+
i -= 1
|
73
|
+
j -= 1
|
74
|
+
bufI += target[i]
|
75
|
+
bufJ += input[j]
|
76
|
+
when 1
|
77
|
+
i -= 1
|
78
|
+
bufI += target[i]
|
79
|
+
bufJ += Alignment::BLANK
|
80
|
+
when 2
|
81
|
+
j -= 1
|
82
|
+
bufI += Alignment::BLANK
|
83
|
+
bufJ += input[j]
|
84
|
+
end
|
85
|
+
end
|
86
|
+
a.alignmentI = bufI.reverse
|
87
|
+
a.alignmentJ = bufJ.reverse
|
88
|
+
a.startI = i
|
89
|
+
a.startJ = j
|
90
|
+
|
91
|
+
if a.alignmentI.length <= 20
|
92
|
+
a.score = 0
|
93
|
+
else
|
94
|
+
a.score = a.alignment_count ** 2 / a.alignmentI.length.to_f
|
95
|
+
end
|
96
|
+
return a
|
97
|
+
end
|
98
|
+
|
99
|
+
private
|
100
|
+
# 与えられたアライメント配列のうちスコアが最大のものを返す。
|
101
|
+
def max(alignments)
|
102
|
+
max_alignment = alignments[0]
|
103
|
+
alignments.each do |a|
|
104
|
+
if a.score > max_alignment.score
|
105
|
+
max_alignment = a
|
106
|
+
end
|
107
|
+
end
|
108
|
+
max_alignment
|
109
|
+
end
|
110
|
+
|
111
|
+
# スコアリング関数。一致したら1、そうでなければ0を返す。
|
112
|
+
def s(a,b)
|
113
|
+
return a == b ? 1 : 0
|
114
|
+
end
|
115
|
+
|
116
|
+
def aside_sequence(seq)
|
117
|
+
seq.reverse.tr("AGTC", "TCAG")
|
118
|
+
end
|
119
|
+
end
|
metadata
ADDED
@@ -0,0 +1,45 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: gene-matcher
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- ITO Yosei
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2023-08-02 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description: Algorithm for determining similar regions between nucleic acid sequences.
|
14
|
+
email: y-itou@lumber-mill.co.jp
|
15
|
+
executables: []
|
16
|
+
extensions: []
|
17
|
+
extra_rdoc_files: []
|
18
|
+
files:
|
19
|
+
- lib/alignment.rb
|
20
|
+
- lib/gene-matcher.rb
|
21
|
+
- lib/smith-waterman.rb
|
22
|
+
homepage: https://github.com/lumbermill/gene-matcher
|
23
|
+
licenses:
|
24
|
+
- MIT
|
25
|
+
metadata: {}
|
26
|
+
post_install_message:
|
27
|
+
rdoc_options: []
|
28
|
+
require_paths:
|
29
|
+
- lib
|
30
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
31
|
+
requirements:
|
32
|
+
- - ">="
|
33
|
+
- !ruby/object:Gem::Version
|
34
|
+
version: '0'
|
35
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
36
|
+
requirements:
|
37
|
+
- - ">="
|
38
|
+
- !ruby/object:Gem::Version
|
39
|
+
version: '0'
|
40
|
+
requirements: []
|
41
|
+
rubygems_version: 3.3.26
|
42
|
+
signing_key:
|
43
|
+
specification_version: 4
|
44
|
+
summary: Algorithm for determining similar regions between nucleic acid sequences.
|
45
|
+
test_files: []
|