gene-matcher 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 829cbe42a19bf6eb809358fe514714377cc7a8a856b1d2a21070760962c60d02
4
+ data.tar.gz: 04545fb06b196d235531d125575e246e268c7e526f9684db5fb37079b66b9844
5
+ SHA512:
6
+ metadata.gz: '0854fc5cac30e888a57dda0fbda709145fe206be6acea8a29493e793075e14633a33d5ee4fa57b6b84ea4e3baa660f7a869fc14a1d51a22fc930f16f377ed2ac'
7
+ data.tar.gz: 23e94a7632e246589b347ad9c87445780046b3d2bf1e44ae94691ea065fe22141a7d3cb12f3b1d05b855c9f2c76682577aeeef92bad2e6ccabad6b135ef00285
data/lib/alignment.rb ADDED
@@ -0,0 +1,62 @@
1
+ class Alignment
2
+ attr_accessor :score, :alignmentI, :alignmentJ, :startI, :startJ, :endI, :endJ, :reversed, :aside, :source
3
+
4
+ BLANK = "-"
5
+
6
+ def initialize
7
+ # スコア
8
+ @score = 0
9
+ # 検索対象(データベースに入っていた)配列
10
+ @alignmentI = ""
11
+ # 検索した(クエリ=入力された)配列
12
+ @alignmentJ = ""
13
+ # start (元の配列中の)アライメント開始位置
14
+ # end(元の配列中の)アライメント終了位置
15
+ @startI = 0
16
+ @startJ = 0
17
+ endI = 0
18
+ endJ = 0
19
+ # 前後の反転
20
+ @reversed = false
21
+ # 逆の鎖
22
+ @aside = false
23
+ # アライメント対象配列の取得先。egtcの場合、クローンテーブルまたはアクセッションテーブル
24
+ @source = ""
25
+ end
26
+
27
+ # 二つの配列の一致部分と不一致部分を表した文字列を返す。
28
+ # ex. AGTCAAAAAAAAA- :...:::::::::. AT-TAAAAAAAAAG
29
+ #
30
+ # 戻り値 :と.からなる文字列
31
+ def alignment
32
+ len = @alignmentI.length
33
+ len = @alignmentJ.length if @alignmentJ.length < len
34
+ buf = ""
35
+ len.times do |i|
36
+ ii = @alignmentI[i]
37
+ jj = @alignmentJ[i]
38
+ if ii == jj
39
+ buf += ":"
40
+ else
41
+ buf += "."
42
+ end
43
+ end
44
+ return buf
45
+ end
46
+
47
+ # 二つの配列の一致部分の長さを返す
48
+ def alignment_count
49
+ count = 0
50
+ alignment.each_char do |c|
51
+ count += 1 if c == ":"
52
+ end
53
+ return count
54
+ end
55
+
56
+ def number_of_blank
57
+ end
58
+
59
+ def to_s
60
+ "score=#{@score} I=" + @alignmentI + " J=" + @alignmentJ;
61
+ end
62
+ end
@@ -0,0 +1,20 @@
1
+
2
+ require_relative 'smith-waterman'
3
+
4
+ class Matcher
5
+ attr_accessor :input_sequence, :limit
6
+ attr_reader :alignments
7
+
8
+ def initialize(input_sequence, limit = 0.6)
9
+ @limit = limit
10
+ @input_sequence = input_sequence
11
+ @alignments = []
12
+ end
13
+
14
+ def scan(target_sequence)
15
+ sw = SmithWaterman.instance
16
+ a = sw.alignment(target_sequence, @input_sequence)
17
+ @alignments += [a] if a.score >= @limit
18
+ end
19
+ end
20
+
@@ -0,0 +1,119 @@
1
+ # Calculate the similarity of two sequences.
2
+ # Originally created in Java language on 2004/08/04
3
+ # Migrated to Ruby on 2023/07/10
4
+ require_relative 'alignment'
5
+ require 'singleton'
6
+
7
+ class SmithWaterman
8
+ include Singleton
9
+
10
+ def alignment(target,input)
11
+ alignments = [] * 2
12
+ alignments[0] = alignment_local(target,input)
13
+ alignments[1] = alignment_local(target,aside_sequence(input))
14
+ alignments[1].aside = true
15
+
16
+ # TODO: wholeLengthI, wholeLengthJを設定する
17
+ # a[1].startJ = a[1].wholeLengthJ -a[1].startJ -1;
18
+ # a[1].endJ = a[1].wholeLengthJ -a[1].endJ-1;
19
+ return max(alignments)
20
+ end
21
+
22
+ def alignment_local(target,input)
23
+ raise "input is nil or empty" if input.nil? || input.empty?
24
+ raise "target is nil or empty" if target.nil? || target.empty?
25
+
26
+ # 行列の初期化
27
+
28
+ matrix = Array.new(target.length) { Array.new(input.length,0) }
29
+ maxScore = 0; maxI = 0; maxJ = 0
30
+ target.length.times do |i|
31
+ ci = target[i] # 検索対象文字
32
+ input.length.times do |j|
33
+ cj = input[j] # 検索文字
34
+ candidates = [0] * 4
35
+ candidates[0] = 0 # 未使用(常に0)
36
+ if i > 0 && j > 0
37
+ candidates[1] = matrix[i-1][j-1] + s(ci,cj)
38
+ else
39
+ candidates[1] = s(ci,cj)
40
+ end
41
+
42
+ if i > 0
43
+ candidates[2] = matrix[i-1][j] - 1
44
+ end
45
+ if j > 0
46
+ candidates[3] = matrix[i][j-1] - 1
47
+ end
48
+ matrix[i][j] = candidates.max
49
+ # スコアの最大点を記憶
50
+ if matrix[i][j] >= maxScore
51
+ maxScore = matrix[i][j]
52
+ maxI = i
53
+ maxJ = j
54
+ end
55
+ end
56
+ puts ci+" "+matrix[i].join(" ") if ENV["DEBUG"]
57
+ end
58
+ puts "maxScore=#{maxScore} maxI=#{maxI} maxJ=#{maxJ}" if ENV["DEBUG"]
59
+
60
+ a = Alignment.new
61
+ a.endI = maxI
62
+ a.endJ = maxJ
63
+ i = maxI; j = maxJ; bufI = target[i]; bufJ = input[j]
64
+ while i > 0 && j > 0 do
65
+ dst = [] * 3
66
+ dst[0] = matrix[i-1][j-1]
67
+ dst[1] = matrix[i-1][j] if i > 0
68
+ dst[2] = matrix[i][j-1] if j > 0
69
+ break if dst.max == 0 # 行き先がなければ終了
70
+ case dst.index(dst.max)
71
+ when 0
72
+ i -= 1
73
+ j -= 1
74
+ bufI += target[i]
75
+ bufJ += input[j]
76
+ when 1
77
+ i -= 1
78
+ bufI += target[i]
79
+ bufJ += Alignment::BLANK
80
+ when 2
81
+ j -= 1
82
+ bufI += Alignment::BLANK
83
+ bufJ += input[j]
84
+ end
85
+ end
86
+ a.alignmentI = bufI.reverse
87
+ a.alignmentJ = bufJ.reverse
88
+ a.startI = i
89
+ a.startJ = j
90
+
91
+ if a.alignmentI.length <= 20
92
+ a.score = 0
93
+ else
94
+ a.score = a.alignment_count ** 2 / a.alignmentI.length.to_f
95
+ end
96
+ return a
97
+ end
98
+
99
+ private
100
+ # 与えられたアライメント配列のうちスコアが最大のものを返す。
101
+ def max(alignments)
102
+ max_alignment = alignments[0]
103
+ alignments.each do |a|
104
+ if a.score > max_alignment.score
105
+ max_alignment = a
106
+ end
107
+ end
108
+ max_alignment
109
+ end
110
+
111
+ # スコアリング関数。一致したら1、そうでなければ0を返す。
112
+ def s(a,b)
113
+ return a == b ? 1 : 0
114
+ end
115
+
116
+ def aside_sequence(seq)
117
+ seq.reverse.tr("AGTC", "TCAG")
118
+ end
119
+ end
metadata ADDED
@@ -0,0 +1,45 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: gene-matcher
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.1
5
+ platform: ruby
6
+ authors:
7
+ - ITO Yosei
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2023-08-02 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: Algorithm for determining similar regions between nucleic acid sequences.
14
+ email: y-itou@lumber-mill.co.jp
15
+ executables: []
16
+ extensions: []
17
+ extra_rdoc_files: []
18
+ files:
19
+ - lib/alignment.rb
20
+ - lib/gene-matcher.rb
21
+ - lib/smith-waterman.rb
22
+ homepage: https://github.com/lumbermill/gene-matcher
23
+ licenses:
24
+ - MIT
25
+ metadata: {}
26
+ post_install_message:
27
+ rdoc_options: []
28
+ require_paths:
29
+ - lib
30
+ required_ruby_version: !ruby/object:Gem::Requirement
31
+ requirements:
32
+ - - ">="
33
+ - !ruby/object:Gem::Version
34
+ version: '0'
35
+ required_rubygems_version: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - ">="
38
+ - !ruby/object:Gem::Version
39
+ version: '0'
40
+ requirements: []
41
+ rubygems_version: 3.3.26
42
+ signing_key:
43
+ specification_version: 4
44
+ summary: Algorithm for determining similar regions between nucleic acid sequences.
45
+ test_files: []