rbbt-util 5.44.0 → 5.44.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1c39baa8ee0c2633bea3878720d29aa0f49a057db7ec6700187685c7a07f7eae
4
- data.tar.gz: ad0f8a09cb34faa3d0aa3388c26733d4849d307a02df02989656c0aced5ea72a
3
+ metadata.gz: a9269fd6f3ed8e0b03f575c59f50d42c45a11bc07e138f4377e5069f3050fb29
4
+ data.tar.gz: 488a57247f23d41f8c94aa64cb38cf666c99ee43c4fa14399ed77b8bb1854519
5
5
  SHA512:
6
- metadata.gz: dcfcf7f6ea2b45dd4ca5e1257fb22cec0f06280be2b6c9aac3b118e1e649a8f5948db0162f62735b9c113ee9a878cb89e8f98a99fd7026a7f58f061531b93f89
7
- data.tar.gz: 59e94fecbb50bf9d7635545aab508ee6b986e44f81acac59aac7e888f6c9c5748fb2d0324a0e83d784c78b249e67a50baaaba1d40b58612f194de111fd4c6579
6
+ metadata.gz: 3a288fcd2e11209621bc61f911097c104e75c22c759ca7ac6ca0a1f908bf1ea1711587bf776b262f082a60c0296a9004cfc8fad85de738dabd6bad0bc45f6fed
7
+ data.tar.gz: 91e775b843358c29df1d87e1d9d5db8a92c37d842fd05757cc810a5f4c77a38cfe154a0a368429abb921cf43014333361426546a43aac09eabe81f07ca4315a5
data/etc/app.d/base.rb CHANGED
@@ -19,7 +19,7 @@ end
19
19
  use Rack::Session::Cookie, :key => 'rack.session',
20
20
  :path => '/',
21
21
  :expire_after => 2592000,
22
- :secret => "#{self.to_s} secret!!"
22
+ :secret => Misc.digest("#{self.to_s} secret!!") * 4
23
23
 
24
24
  #{{{ DIRECTORIES
25
25
  global_var = Rbbt.var.sinatra
@@ -214,7 +214,7 @@ module Persist
214
214
  def [](key, clean=false)
215
215
  database = database(key)
216
216
  return nil if database.nil?
217
- v = database.send(:[], key)
217
+ database.send(:[], key)
218
218
  end
219
219
 
220
220
  def <<(p)
@@ -256,8 +256,9 @@ module Misc
256
256
  ref = m[1]
257
257
  num = m[2]
258
258
  alt = m[3]
259
+ alt = "*" if alt == "Ter"
259
260
  ref = THREE_TO_ONE_AA_CODE[ref.downcase]
260
- alt = THREE_TO_ONE_AA_CODE[alt.downcase]
261
+ alt = THREE_TO_ONE_AA_CODE[alt.downcase] unless alt == "*"
261
262
  mutation = [ref, num, alt] * ""
262
263
  end
263
264
  one_aa_code = THREE_TO_ONE_AA_CODE.values
@@ -0,0 +1,298 @@
1
+ require 'rbbt'
2
+
3
+ require 'inline'
4
+
5
+ # From: https://github.com/mengyao/Complete-Striped-Smith-Waterman-Library
6
+ # Citation: SSW Library: An SIMD Smith-Waterman C/C++ Library for Use in Genomic Applications
7
+ # Mengyao Zhao, Wan-Ping Lee, Gabor T. Marth
8
+ # http://arxiv.org/abs/1208.6350
9
+ module SmithWaterman
10
+
11
+ inline(:C) do |builder|
12
+ prefix =<<-EOF
13
+ #include <stdlib.h>
14
+ #include <stdio.h>
15
+ #include <stdint.h>
16
+ #include #{'"' + Rbbt.share.software.opt.ssw["ssw.h"].find + '"'}
17
+ #include #{'"' + Rbbt.share.software.opt.ssw["ssw.c"].find + '"'}
18
+ EOF
19
+
20
+ prefix +=<<-'EOF'
21
+ void ssw_write (s_align* a,
22
+ char* ref_seq,
23
+ char* read_seq,
24
+ int8_t* table,
25
+ int fd) {
26
+
27
+ int max_length = 1000000;
28
+ dprintf(fd, "optimal_alignment_score: %d\tsub-optimal_alignment_score: %d\t", a->score1, a->score2);
29
+ if (a->ref_begin1 + 1) dprintf(fd, "target_begin: %d\t", a->ref_begin1 + 1);
30
+ dprintf(fd, "target_end: %d\t", a->ref_end1 + 1);
31
+ if (a->read_begin1 + 1) dprintf(fd, "query_begin: %d\t", a->read_begin1 + 1);
32
+ dprintf(fd, "query_end: %d\n\n", a->read_end1 + 1);
33
+ if (a->cigar) {
34
+ int32_t i, c = 0, left = 0, e = 0, qb = a->ref_begin1, pb = a->read_begin1;
35
+ while (e < a->cigarLen || left > 0) {
36
+ int32_t count = 0;
37
+ int32_t q = qb;
38
+ int32_t p = pb;
39
+ dprintf(fd, "Target: %8d ", q + 1);
40
+ for (c = e; c < a->cigarLen; ++c) {
41
+ int32_t letter = 0xf&*(a->cigar + c);
42
+ int32_t length = (0xfffffff0&*(a->cigar + c))>>4;
43
+ int32_t l = (count == 0 && left > 0) ? left: length;
44
+ for (i = 0; i < l; ++i) {
45
+ if (letter == 1) dprintf(fd, "-");
46
+ else {
47
+ dprintf(fd, "%c", *(ref_seq + q));
48
+ ++ q;
49
+ }
50
+ ++ count;
51
+ if (count == max_length) goto step2;
52
+ }
53
+ }
54
+ step2:
55
+ dprintf(fd, " %d\n ", q);
56
+ q = qb;
57
+ count = 0;
58
+ for (c = e; c < a->cigarLen; ++c) {
59
+ int32_t letter = 0xf&*(a->cigar + c);
60
+ int32_t length = (0xfffffff0&*(a->cigar + c))>>4;
61
+ int32_t l = (count == 0 && left > 0) ? left: length;
62
+ for (i = 0; i < l; ++i){
63
+ if (letter == 0) {
64
+ if (table[(int)*(ref_seq + q)] == table[(int)*(read_seq + p)])dprintf(fd, "|");
65
+ else dprintf(fd, "*");
66
+ ++q;
67
+ ++p;
68
+ } else {
69
+ dprintf(fd, "*");
70
+ if (letter == 1) ++p;
71
+ else ++q;
72
+ }
73
+ ++ count;
74
+ if (count == max_length) {
75
+ qb = q;
76
+ goto step3;
77
+ }
78
+ }
79
+ }
80
+ step3:
81
+ p = pb;
82
+ dprintf(fd, "\nQuery: %8d ", p + 1);
83
+ count = 0;
84
+ for (c = e; c < a->cigarLen; ++c) {
85
+ int32_t letter = 0xf&*(a->cigar + c);
86
+ int32_t length = (0xfffffff0&*(a->cigar + c))>>4;
87
+ int32_t l = (count == 0 && left > 0) ? left: length;
88
+ for (i = 0; i < l; ++i) {
89
+ if (letter == 2) dprintf(fd, "-");
90
+ else {
91
+ dprintf(fd, "%c", *(read_seq + p));
92
+ ++p;
93
+ }
94
+ ++ count;
95
+ if (count == max_length) {
96
+ pb = p;
97
+ left = l - i - 1;
98
+ e = (left == 0) ? (c + 1) : c;
99
+ goto end;
100
+ }
101
+ }
102
+ }
103
+ e = c;
104
+ left = 0;
105
+ end:
106
+ dprintf(fd, " %d\n\n", p);
107
+ }
108
+ }
109
+ }
110
+
111
+ EOF
112
+
113
+ builder.prefix prefix
114
+
115
+
116
+ script = <<-EOF
117
+ int ssw_nt(char * read_seq, char * ref_seq){
118
+ int32_t l, m, k, match = 2, mismatch = 2, gap_open = 3, gap_extension = 1; // default parameters for genome sequence alignment
119
+ // reference sequence
120
+ //char ref_seq[40] = {'C', 'A', 'G', 'C', 'C', 'T', 'T', 'T', 'C', 'T', 'G', 'A', 'C', 'C', 'C', 'G', 'G', 'A', 'A', 'A', 'T',
121
+ // 'C', 'A', 'A', 'A', 'A', 'T', 'A', 'G', 'G', 'C', 'A', 'C', 'A', 'A', 'C', 'A', 'A', 'A', '\0'};
122
+ //char read_seq[16] = {'C', 'T', 'G', 'A', 'G', 'C', 'C', 'G', 'G', 'T', 'A', 'A', 'A', 'T', 'C', '\0'}; // read sequence
123
+
124
+ s_profile* profile;
125
+ int8_t* num = (int8_t*)malloc(16); // the read sequence represented in numbers
126
+ int8_t* ref_num = (int8_t*)malloc(64); // the read sequence represented in numbers
127
+ s_align* result;
128
+
129
+ /* This table is used to transform nucleotide letters into numbers. */
130
+ int8_t nt_table[128] = {
131
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
132
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
133
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
134
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
135
+ 4, 0, 4, 1, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4,
136
+ 4, 4, 4, 4, 3, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
137
+ 4, 0, 4, 1, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4,
138
+ 4, 4, 4, 4, 3, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
139
+ };
140
+
141
+ // initialize scoring matrix for genome sequences
142
+ // A C G T N (or other ambiguous code)
143
+ // 2 -2 -2 -2 0 A
144
+ // -2 2 -2 -2 0 C
145
+ // -2 -2 2 -2 0 G
146
+ // -2 -2 -2 2 0 T
147
+ // 0 0 0 0 0 N (or other ambiguous code)
148
+ int8_t* mat = (int8_t*)calloc(25, sizeof(int8_t));
149
+ for (l = k = 0; l < 4; ++l) {
150
+ for (m = 0; m < 4; ++m) mat[k++] = l == m ? match : - mismatch; /* weight_match : -weight_mismatch */
151
+ mat[k++] = 0; // ambiguous base: no penalty
152
+ }
153
+ for (m = 0; m < 5; ++m) mat[k++] = 0;
154
+
155
+ for (m = 0; m < 15; ++m) num[m] = nt_table[(int)read_seq[m]];
156
+ profile = ssw_init(num, 15, mat, 5, 2);
157
+ for (m = 0; m < 39; ++m) ref_num[m] = nt_table[(int)ref_seq[m]];
158
+
159
+ // Only the 8 bit of the flag is setted. ssw_align will always return the best alignment beginning position and cigar.
160
+ result = ssw_align (profile, ref_num, 39, gap_open, gap_extension, 1, 0, 0, 15);
161
+ //ssw_write(result, ref_seq, read_seq, nt_tablte);
162
+
163
+ free(mat);
164
+ free(ref_num);
165
+ free(num);
166
+ return(0);
167
+ }
168
+
169
+ EOF
170
+ builder.c_singleton script
171
+
172
+ script = <<-EOF
173
+ int ssw_aa(char * read_seq, char * ref_seq, int read_seq_len, int ref_seq_len, int fd){
174
+
175
+ int32_t l, m, k, match = 2, mismatch = 2, gap_open = 3, gap_extension = 1; // default parameters for genome sequence alignment
176
+
177
+ s_profile* profile;
178
+ int8_t* num = (int8_t*)malloc(read_seq_len); // the read sequence represented in numbers
179
+ int8_t* ref_num = (int8_t*)malloc(ref_seq_len); // the reference sequence represented in numbers
180
+ s_align* result;
181
+
182
+ /* This table is used to transform amino acid letters into numbers. */
183
+ int8_t aa_table[128] = {
184
+ 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
185
+ 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
186
+ 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
187
+ 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
188
+ 23, 0, 20, 4, 3, 6, 13, 7, 8, 9, 23, 11, 10, 12, 2, 23,
189
+ 14, 5, 1, 15, 16, 23, 19, 17, 22, 18, 21, 23, 23, 23, 23, 23,
190
+ 23, 0, 20, 4, 3, 6, 13, 7, 8, 9, 23, 11, 10, 12, 2, 23,
191
+ 14, 5, 1, 15, 16, 23, 19, 17, 22, 18, 21, 23, 23, 23, 23, 23
192
+ };
193
+
194
+ int8_t mat[] = {
195
+ // A R N D C Q E G H I L K M F P S T W Y V B Z X *
196
+ 5, -2, -1, -2, -1, -1, -1, 0, -2, -1, -2, -1, -1, -3, -1, 1, 0, -3, -2, 0, -2, -1, -1, -5, // A
197
+ -2, 7, -1, -2, -4, 1, 0, -3, 0, -4, -3, 3, -2, -3, -3, -1, -1, -3, -1, -3, -1, 0, -1, -5, // R
198
+ -1, -1, 7, 2, -2, 0, 0, 0, 1, -3, -4, 0, -2, -4, -2, 1, 0, -4, -2, -3, 5, 0, -1, -5, // N
199
+ -2, -2, 2, 8, -4, 0, 2, -1, -1, -4, -4, -1, -4, -5, -1, 0, -1, -5, -3, -4, 6, 1, -1, -5, // D
200
+ -1, -4, -2, -4, 13, -3, -3, -3, -3, -2, -2, -3, -2, -2, -4, -1, -1, -5, -3, -1, -3, -3, -1, -5, // C
201
+ -1, 1, 0, 0, -3, 7, 2, -2, 1, -3, -2, 2, 0, -4, -1, 0, -1, -1, -1, -3, 0, 4, -1, -5, // Q
202
+ -1, 0, 0, 2, -3, 2, 6, -3, 0, -4, -3, 1, -2, -3, -1, -1, -1, -3, -2, -3, 1, 5, -1, -5, // E
203
+ 0, -3, 0, -1, -3, -2, -3, 8, -2, -4, -4, -2, -3, -4, -2, 0, -2, -3, -3, -4, -1, -2, -1, -5, // G
204
+ -2, 0, 1, -1, -3, 1, 0, -2, 10, -4, -3, 0, -1, -1, -2, -1, -2, -3, 2, -4, 0, 0, -1, -5, // H
205
+ -1, -4, -3, -4, -2, -3, -4, -4, -4, 5, 2, -3, 2, 0, -3, -3, -1, -3, -1, 4, -4, -3, -1, -5, // I
206
+ -2, -3, -4, -4, -2, -2, -3, -4, -3, 2, 5, -3, 3, 1, -4, -3, -1, -2, -1, 1, -4, -3, -1, -5, // L
207
+ -1, 3, 0, -1, -3, 2, 1, -2, 0, -3, -3, 6, -2, -4, -1, 0, -1, -3, -2, -3, 0, 1, -1, -5, // K
208
+ -1, -2, -2, -4, -2, 0, -2, -3, -1, 2, 3, -2, 7, 0, -3, -2, -1, -1, 0, 1, -3, -1, -1, -5, // M
209
+ -3, -3, -4, -5, -2, -4, -3, -4, -1, 0, 1, -4, 0, 8, -4, -3, -2, 1, 4, -1, -4, -4, -1, -5, // F
210
+ -1, -3, -2, -1, -4, -1, -1, -2, -2, -3, -4, -1, -3, -4, 10, -1, -1, -4, -3, -3, -2, -1, -1, -5, // P
211
+ 1, -1, 1, 0, -1, 0, -1, 0, -1, -3, -3, 0, -2, -3, -1, 5, 2, -4, -2, -2, 0, 0, -1, -5, // S
212
+ 0, -1, 0, -1, -1, -1, -1, -2, -2, -1, -1, -1, -1, -2, -1, 2, 5, -3, -2, 0, 0, -1, -1, -5, // T
213
+ -3, -3, -4, -5, -5, -1, -3, -3, -3, -3, -2, -3, -1, 1, -4, -4, -3, 15, 2, -3, -5, -2, -1, -5, // W
214
+ -2, -1, -2, -3, -3, -1, -2, -3, 2, -1, -1, -2, 0, 4, -3, -2, -2, 2, 8, -1, -3, -2, -1, -5, // Y
215
+ 0, -3, -3, -4, -1, -3, -3, -4, -4, 4, 1, -3, 1, -1, -3, -2, 0, -3, -1, 5, -3, -3, -1, -5, // V
216
+ -2, -1, 5, 6, -3, 0, 1, -1, 0, -4, -4, 0, -3, -4, -2, 0, 0, -5, -3, -3, 6, 1, -1, -5, // B
217
+ -1, 0, 0, 1, -3, 4, 5, -2, 0, -3, -3, 1, -1, -4, -1, 0, -1, -2, -2, -3, 1, 5, -1, -5, // Z
218
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -5, // X
219
+ -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, 1 // *
220
+ };
221
+
222
+
223
+ for (m = 0; m < read_seq_len; ++m) num[m] = aa_table[(int)read_seq[m]];
224
+ profile = ssw_init(num, read_seq_len, mat, 24, 2);
225
+ for (m = 0; m < ref_seq_len; ++m) ref_num[m] = aa_table[(int)ref_seq[m]];
226
+
227
+ // Only the 8 bit of the flag is setted. ssw_align will always return the best alignment beginning position and cigar.
228
+ result = ssw_align(profile, ref_num, ref_seq_len, gap_open, gap_extension, 1, 0, 0, read_seq_len );
229
+ ssw_write(result, ref_seq, read_seq, aa_table, fd);
230
+
231
+ free(num);
232
+ free(ref_num);
233
+ return(0);
234
+ }
235
+
236
+ EOF
237
+ builder.c_singleton script
238
+
239
+ end
240
+
241
+ def self.align(query_sequence, target_sequence)
242
+ Log.low { "Aligning #{ Misc.fingerprint query_sequence } to #{ Misc.fingerprint target_sequence }" }
243
+
244
+ begin
245
+ raise "No query sequence" if query_sequence.nil?
246
+ raise "No target sequence" if target_sequence.nil?
247
+
248
+ s_out = Misc.open_pipe do |s_in|
249
+ SmithWaterman.ssw_aa(query_sequence, target_sequence, query_sequence.length, target_sequence.length, s_in.fileno)
250
+ end
251
+
252
+ txt = s_out.read
253
+ s_out.close
254
+ s_out.join
255
+ txt
256
+
257
+ target_start, target, target_end = txt.match(/Target:\s+(\d+)\s+([A-Z\-?*]+)\s+(\d+)/).values_at 1, 2, 3
258
+
259
+ query_start, query, query_end = txt.match(/Query:\s+(\d+)\s+([A-Z\-?*]+)\s+(\d+)/).values_at 1, 2, 3
260
+
261
+ txt.replace ""
262
+ [("_" * (query_start.to_i - 1)) + query, ("_" * (target_start.to_i - 1)) + target]
263
+ rescue
264
+ Log.warn("Error in aligmnent: #{$!.message}")
265
+ return ["-", "-"]
266
+ end
267
+ end
268
+
269
+ def self.alignment_map(source, target)
270
+ alignment_source, alignment_target = SmithWaterman.align(source, target)
271
+ map = {}
272
+
273
+ offset_source, alignment_source = alignment_source.match(/^(_*)(.*)/).values_at( 1, 2)
274
+ offset_target, alignment_target = alignment_target.match(/^(_*)(.*)/).values_at( 1, 2)
275
+
276
+ gaps_source = 0
277
+ gaps_target = 0
278
+ miss_match = 0
279
+ alignment_source.chars.zip(alignment_target.chars).each_with_index do |p,i|
280
+ char_source, char_target = p
281
+ gaps_source += 1 if char_source == '-'
282
+ gaps_target += 1 if char_target == '-'
283
+ source_pos = i + 1 + offset_source.length - gaps_source
284
+ target_pos = i + 1 + offset_target.length - gaps_target
285
+ if char_source != char_target or char_source == "-"
286
+ miss_match += 1
287
+ else
288
+ map[source_pos] = target_pos
289
+ end
290
+ end
291
+
292
+ if miss_match + gaps_source > alignment_source.length.to_f / 2
293
+ {}
294
+ else
295
+ map
296
+ end
297
+ end
298
+ end
@@ -35,8 +35,8 @@ class Step
35
35
  rescue Exception
36
36
  Log.debug{"Error loading info file: " + info_file}
37
37
  Log.exception $!
38
- Open.rm info_file
39
- Misc.sensiblewrite(info_file, Step.serialize_info({:status => :error, :messages => ["Info file lost"]}))
38
+ #Open.rm info_file
39
+ #Misc.sensiblewrite(info_file, Step.serialize_info({:status => :error, :messages => ["Info file lost"]}))
40
40
  raise $!
41
41
  end
42
42
  end
@@ -134,7 +134,12 @@ TmpFile.with_file do |app_dir|
134
134
  options.each do |k,v| fixed_options[k.to_sym] = v end
135
135
  options = fixed_options
136
136
 
137
- Rack::Server.start(options)
137
+ begin
138
+ Rack::Server.start(options)
139
+ rescue LoadError
140
+ require 'rackup'
141
+ Rackup::Server.start(options)
142
+ end
138
143
  end
139
144
  end
140
145
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-util
3
3
  version: !ruby/object:Gem::Version
4
- version: 5.44.0
4
+ version: 5.44.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Miguel Vazquez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-05-28 00:00:00.000000000 Z
11
+ date: 2024-12-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -283,6 +283,7 @@ files:
283
283
  - lib/rbbt/util/misc/options.rb
284
284
  - lib/rbbt/util/misc/pipes.rb
285
285
  - lib/rbbt/util/misc/serialize.rb
286
+ - lib/rbbt/util/misc/ssw.rb
286
287
  - lib/rbbt/util/misc/system.rb
287
288
  - lib/rbbt/util/named_array.rb
288
289
  - lib/rbbt/util/open.rb
@@ -476,7 +477,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
476
477
  - !ruby/object:Gem::Version
477
478
  version: '0'
478
479
  requirements: []
479
- rubygems_version: 3.5.10
480
+ rubygems_version: 3.5.23
480
481
  signing_key:
481
482
  specification_version: 4
482
483
  summary: Utilities for the Ruby Bioinformatics Toolkit (rbbt)