rbbt-util 5.44.0 → 5.44.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1c39baa8ee0c2633bea3878720d29aa0f49a057db7ec6700187685c7a07f7eae
4
- data.tar.gz: ad0f8a09cb34faa3d0aa3388c26733d4849d307a02df02989656c0aced5ea72a
3
+ metadata.gz: a9269fd6f3ed8e0b03f575c59f50d42c45a11bc07e138f4377e5069f3050fb29
4
+ data.tar.gz: 488a57247f23d41f8c94aa64cb38cf666c99ee43c4fa14399ed77b8bb1854519
5
5
  SHA512:
6
- metadata.gz: dcfcf7f6ea2b45dd4ca5e1257fb22cec0f06280be2b6c9aac3b118e1e649a8f5948db0162f62735b9c113ee9a878cb89e8f98a99fd7026a7f58f061531b93f89
7
- data.tar.gz: 59e94fecbb50bf9d7635545aab508ee6b986e44f81acac59aac7e888f6c9c5748fb2d0324a0e83d784c78b249e67a50baaaba1d40b58612f194de111fd4c6579
6
+ metadata.gz: 3a288fcd2e11209621bc61f911097c104e75c22c759ca7ac6ca0a1f908bf1ea1711587bf776b262f082a60c0296a9004cfc8fad85de738dabd6bad0bc45f6fed
7
+ data.tar.gz: 91e775b843358c29df1d87e1d9d5db8a92c37d842fd05757cc810a5f4c77a38cfe154a0a368429abb921cf43014333361426546a43aac09eabe81f07ca4315a5
data/etc/app.d/base.rb CHANGED
@@ -19,7 +19,7 @@ end
19
19
  use Rack::Session::Cookie, :key => 'rack.session',
20
20
  :path => '/',
21
21
  :expire_after => 2592000,
22
- :secret => "#{self.to_s} secret!!"
22
+ :secret => Misc.digest("#{self.to_s} secret!!") * 4
23
23
 
24
24
  #{{{ DIRECTORIES
25
25
  global_var = Rbbt.var.sinatra
@@ -214,7 +214,7 @@ module Persist
214
214
  def [](key, clean=false)
215
215
  database = database(key)
216
216
  return nil if database.nil?
217
- v = database.send(:[], key)
217
+ database.send(:[], key)
218
218
  end
219
219
 
220
220
  def <<(p)
@@ -256,8 +256,9 @@ module Misc
256
256
  ref = m[1]
257
257
  num = m[2]
258
258
  alt = m[3]
259
+ alt = "*" if alt == "Ter"
259
260
  ref = THREE_TO_ONE_AA_CODE[ref.downcase]
260
- alt = THREE_TO_ONE_AA_CODE[alt.downcase]
261
+ alt = THREE_TO_ONE_AA_CODE[alt.downcase] unless alt == "*"
261
262
  mutation = [ref, num, alt] * ""
262
263
  end
263
264
  one_aa_code = THREE_TO_ONE_AA_CODE.values
@@ -0,0 +1,298 @@
1
+ require 'rbbt'
2
+
3
+ require 'inline'
4
+
5
+ # From: https://github.com/mengyao/Complete-Striped-Smith-Waterman-Library
6
+ # Citation: SSW Library: An SIMD Smith-Waterman C/C++ Library for Use in Genomic Applications
7
+ # Mengyao Zhao, Wan-Ping Lee, Gabor T. Marth
8
+ # http://arxiv.org/abs/1208.6350
9
+ module SmithWaterman
10
+
11
+ inline(:C) do |builder|
12
+ prefix =<<-EOF
13
+ #include <stdlib.h>
14
+ #include <stdio.h>
15
+ #include <stdint.h>
16
+ #include #{'"' + Rbbt.share.software.opt.ssw["ssw.h"].find + '"'}
17
+ #include #{'"' + Rbbt.share.software.opt.ssw["ssw.c"].find + '"'}
18
+ EOF
19
+
20
+ prefix +=<<-'EOF'
21
+ void ssw_write (s_align* a,
22
+ char* ref_seq,
23
+ char* read_seq,
24
+ int8_t* table,
25
+ int fd) {
26
+
27
+ int max_length = 1000000;
28
+ dprintf(fd, "optimal_alignment_score: %d\tsub-optimal_alignment_score: %d\t", a->score1, a->score2);
29
+ if (a->ref_begin1 + 1) dprintf(fd, "target_begin: %d\t", a->ref_begin1 + 1);
30
+ dprintf(fd, "target_end: %d\t", a->ref_end1 + 1);
31
+ if (a->read_begin1 + 1) dprintf(fd, "query_begin: %d\t", a->read_begin1 + 1);
32
+ dprintf(fd, "query_end: %d\n\n", a->read_end1 + 1);
33
+ if (a->cigar) {
34
+ int32_t i, c = 0, left = 0, e = 0, qb = a->ref_begin1, pb = a->read_begin1;
35
+ while (e < a->cigarLen || left > 0) {
36
+ int32_t count = 0;
37
+ int32_t q = qb;
38
+ int32_t p = pb;
39
+ dprintf(fd, "Target: %8d ", q + 1);
40
+ for (c = e; c < a->cigarLen; ++c) {
41
+ int32_t letter = 0xf&*(a->cigar + c);
42
+ int32_t length = (0xfffffff0&*(a->cigar + c))>>4;
43
+ int32_t l = (count == 0 && left > 0) ? left: length;
44
+ for (i = 0; i < l; ++i) {
45
+ if (letter == 1) dprintf(fd, "-");
46
+ else {
47
+ dprintf(fd, "%c", *(ref_seq + q));
48
+ ++ q;
49
+ }
50
+ ++ count;
51
+ if (count == max_length) goto step2;
52
+ }
53
+ }
54
+ step2:
55
+ dprintf(fd, " %d\n ", q);
56
+ q = qb;
57
+ count = 0;
58
+ for (c = e; c < a->cigarLen; ++c) {
59
+ int32_t letter = 0xf&*(a->cigar + c);
60
+ int32_t length = (0xfffffff0&*(a->cigar + c))>>4;
61
+ int32_t l = (count == 0 && left > 0) ? left: length;
62
+ for (i = 0; i < l; ++i){
63
+ if (letter == 0) {
64
+ if (table[(int)*(ref_seq + q)] == table[(int)*(read_seq + p)])dprintf(fd, "|");
65
+ else dprintf(fd, "*");
66
+ ++q;
67
+ ++p;
68
+ } else {
69
+ dprintf(fd, "*");
70
+ if (letter == 1) ++p;
71
+ else ++q;
72
+ }
73
+ ++ count;
74
+ if (count == max_length) {
75
+ qb = q;
76
+ goto step3;
77
+ }
78
+ }
79
+ }
80
+ step3:
81
+ p = pb;
82
+ dprintf(fd, "\nQuery: %8d ", p + 1);
83
+ count = 0;
84
+ for (c = e; c < a->cigarLen; ++c) {
85
+ int32_t letter = 0xf&*(a->cigar + c);
86
+ int32_t length = (0xfffffff0&*(a->cigar + c))>>4;
87
+ int32_t l = (count == 0 && left > 0) ? left: length;
88
+ for (i = 0; i < l; ++i) {
89
+ if (letter == 2) dprintf(fd, "-");
90
+ else {
91
+ dprintf(fd, "%c", *(read_seq + p));
92
+ ++p;
93
+ }
94
+ ++ count;
95
+ if (count == max_length) {
96
+ pb = p;
97
+ left = l - i - 1;
98
+ e = (left == 0) ? (c + 1) : c;
99
+ goto end;
100
+ }
101
+ }
102
+ }
103
+ e = c;
104
+ left = 0;
105
+ end:
106
+ dprintf(fd, " %d\n\n", p);
107
+ }
108
+ }
109
+ }
110
+
111
+ EOF
112
+
113
+ builder.prefix prefix
114
+
115
+
116
+ script = <<-EOF
117
+ int ssw_nt(char * read_seq, char * ref_seq){
118
+ int32_t l, m, k, match = 2, mismatch = 2, gap_open = 3, gap_extension = 1; // default parameters for genome sequence alignment
119
+ // reference sequence
120
+ //char ref_seq[40] = {'C', 'A', 'G', 'C', 'C', 'T', 'T', 'T', 'C', 'T', 'G', 'A', 'C', 'C', 'C', 'G', 'G', 'A', 'A', 'A', 'T',
121
+ // 'C', 'A', 'A', 'A', 'A', 'T', 'A', 'G', 'G', 'C', 'A', 'C', 'A', 'A', 'C', 'A', 'A', 'A', '\0'};
122
+ //char read_seq[16] = {'C', 'T', 'G', 'A', 'G', 'C', 'C', 'G', 'G', 'T', 'A', 'A', 'A', 'T', 'C', '\0'}; // read sequence
123
+
124
+ s_profile* profile;
125
+ int8_t* num = (int8_t*)malloc(16); // the read sequence represented in numbers
126
+ int8_t* ref_num = (int8_t*)malloc(64); // the read sequence represented in numbers
127
+ s_align* result;
128
+
129
+ /* This table is used to transform nucleotide letters into numbers. */
130
+ int8_t nt_table[128] = {
131
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
132
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
133
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
134
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
135
+ 4, 0, 4, 1, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4,
136
+ 4, 4, 4, 4, 3, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
137
+ 4, 0, 4, 1, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4,
138
+ 4, 4, 4, 4, 3, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
139
+ };
140
+
141
+ // initialize scoring matrix for genome sequences
142
+ // A C G T N (or other ambiguous code)
143
+ // 2 -2 -2 -2 0 A
144
+ // -2 2 -2 -2 0 C
145
+ // -2 -2 2 -2 0 G
146
+ // -2 -2 -2 2 0 T
147
+ // 0 0 0 0 0 N (or other ambiguous code)
148
+ int8_t* mat = (int8_t*)calloc(25, sizeof(int8_t));
149
+ for (l = k = 0; l < 4; ++l) {
150
+ for (m = 0; m < 4; ++m) mat[k++] = l == m ? match : - mismatch; /* weight_match : -weight_mismatch */
151
+ mat[k++] = 0; // ambiguous base: no penalty
152
+ }
153
+ for (m = 0; m < 5; ++m) mat[k++] = 0;
154
+
155
+ for (m = 0; m < 15; ++m) num[m] = nt_table[(int)read_seq[m]];
156
+ profile = ssw_init(num, 15, mat, 5, 2);
157
+ for (m = 0; m < 39; ++m) ref_num[m] = nt_table[(int)ref_seq[m]];
158
+
159
+ // Only the 8 bit of the flag is setted. ssw_align will always return the best alignment beginning position and cigar.
160
+ result = ssw_align (profile, ref_num, 39, gap_open, gap_extension, 1, 0, 0, 15);
161
+ //ssw_write(result, ref_seq, read_seq, nt_tablte);
162
+
163
+ free(mat);
164
+ free(ref_num);
165
+ free(num);
166
+ return(0);
167
+ }
168
+
169
+ EOF
170
+ builder.c_singleton script
171
+
172
+ script = <<-EOF
173
+ int ssw_aa(char * read_seq, char * ref_seq, int read_seq_len, int ref_seq_len, int fd){
174
+
175
+ int32_t l, m, k, match = 2, mismatch = 2, gap_open = 3, gap_extension = 1; // default parameters for genome sequence alignment
176
+
177
+ s_profile* profile;
178
+ int8_t* num = (int8_t*)malloc(read_seq_len); // the read sequence represented in numbers
179
+ int8_t* ref_num = (int8_t*)malloc(ref_seq_len); // the reference sequence represented in numbers
180
+ s_align* result;
181
+
182
+ /* This table is used to transform amino acid letters into numbers. */
183
+ int8_t aa_table[128] = {
184
+ 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
185
+ 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
186
+ 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
187
+ 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
188
+ 23, 0, 20, 4, 3, 6, 13, 7, 8, 9, 23, 11, 10, 12, 2, 23,
189
+ 14, 5, 1, 15, 16, 23, 19, 17, 22, 18, 21, 23, 23, 23, 23, 23,
190
+ 23, 0, 20, 4, 3, 6, 13, 7, 8, 9, 23, 11, 10, 12, 2, 23,
191
+ 14, 5, 1, 15, 16, 23, 19, 17, 22, 18, 21, 23, 23, 23, 23, 23
192
+ };
193
+
194
+ int8_t mat[] = {
195
+ // A R N D C Q E G H I L K M F P S T W Y V B Z X *
196
+ 5, -2, -1, -2, -1, -1, -1, 0, -2, -1, -2, -1, -1, -3, -1, 1, 0, -3, -2, 0, -2, -1, -1, -5, // A
197
+ -2, 7, -1, -2, -4, 1, 0, -3, 0, -4, -3, 3, -2, -3, -3, -1, -1, -3, -1, -3, -1, 0, -1, -5, // R
198
+ -1, -1, 7, 2, -2, 0, 0, 0, 1, -3, -4, 0, -2, -4, -2, 1, 0, -4, -2, -3, 5, 0, -1, -5, // N
199
+ -2, -2, 2, 8, -4, 0, 2, -1, -1, -4, -4, -1, -4, -5, -1, 0, -1, -5, -3, -4, 6, 1, -1, -5, // D
200
+ -1, -4, -2, -4, 13, -3, -3, -3, -3, -2, -2, -3, -2, -2, -4, -1, -1, -5, -3, -1, -3, -3, -1, -5, // C
201
+ -1, 1, 0, 0, -3, 7, 2, -2, 1, -3, -2, 2, 0, -4, -1, 0, -1, -1, -1, -3, 0, 4, -1, -5, // Q
202
+ -1, 0, 0, 2, -3, 2, 6, -3, 0, -4, -3, 1, -2, -3, -1, -1, -1, -3, -2, -3, 1, 5, -1, -5, // E
203
+ 0, -3, 0, -1, -3, -2, -3, 8, -2, -4, -4, -2, -3, -4, -2, 0, -2, -3, -3, -4, -1, -2, -1, -5, // G
204
+ -2, 0, 1, -1, -3, 1, 0, -2, 10, -4, -3, 0, -1, -1, -2, -1, -2, -3, 2, -4, 0, 0, -1, -5, // H
205
+ -1, -4, -3, -4, -2, -3, -4, -4, -4, 5, 2, -3, 2, 0, -3, -3, -1, -3, -1, 4, -4, -3, -1, -5, // I
206
+ -2, -3, -4, -4, -2, -2, -3, -4, -3, 2, 5, -3, 3, 1, -4, -3, -1, -2, -1, 1, -4, -3, -1, -5, // L
207
+ -1, 3, 0, -1, -3, 2, 1, -2, 0, -3, -3, 6, -2, -4, -1, 0, -1, -3, -2, -3, 0, 1, -1, -5, // K
208
+ -1, -2, -2, -4, -2, 0, -2, -3, -1, 2, 3, -2, 7, 0, -3, -2, -1, -1, 0, 1, -3, -1, -1, -5, // M
209
+ -3, -3, -4, -5, -2, -4, -3, -4, -1, 0, 1, -4, 0, 8, -4, -3, -2, 1, 4, -1, -4, -4, -1, -5, // F
210
+ -1, -3, -2, -1, -4, -1, -1, -2, -2, -3, -4, -1, -3, -4, 10, -1, -1, -4, -3, -3, -2, -1, -1, -5, // P
211
+ 1, -1, 1, 0, -1, 0, -1, 0, -1, -3, -3, 0, -2, -3, -1, 5, 2, -4, -2, -2, 0, 0, -1, -5, // S
212
+ 0, -1, 0, -1, -1, -1, -1, -2, -2, -1, -1, -1, -1, -2, -1, 2, 5, -3, -2, 0, 0, -1, -1, -5, // T
213
+ -3, -3, -4, -5, -5, -1, -3, -3, -3, -3, -2, -3, -1, 1, -4, -4, -3, 15, 2, -3, -5, -2, -1, -5, // W
214
+ -2, -1, -2, -3, -3, -1, -2, -3, 2, -1, -1, -2, 0, 4, -3, -2, -2, 2, 8, -1, -3, -2, -1, -5, // Y
215
+ 0, -3, -3, -4, -1, -3, -3, -4, -4, 4, 1, -3, 1, -1, -3, -2, 0, -3, -1, 5, -3, -3, -1, -5, // V
216
+ -2, -1, 5, 6, -3, 0, 1, -1, 0, -4, -4, 0, -3, -4, -2, 0, 0, -5, -3, -3, 6, 1, -1, -5, // B
217
+ -1, 0, 0, 1, -3, 4, 5, -2, 0, -3, -3, 1, -1, -4, -1, 0, -1, -2, -2, -3, 1, 5, -1, -5, // Z
218
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -5, // X
219
+ -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, 1 // *
220
+ };
221
+
222
+
223
+ for (m = 0; m < read_seq_len; ++m) num[m] = aa_table[(int)read_seq[m]];
224
+ profile = ssw_init(num, read_seq_len, mat, 24, 2);
225
+ for (m = 0; m < ref_seq_len; ++m) ref_num[m] = aa_table[(int)ref_seq[m]];
226
+
227
+ // Only the 8 bit of the flag is setted. ssw_align will always return the best alignment beginning position and cigar.
228
+ result = ssw_align(profile, ref_num, ref_seq_len, gap_open, gap_extension, 1, 0, 0, read_seq_len );
229
+ ssw_write(result, ref_seq, read_seq, aa_table, fd);
230
+
231
+ free(num);
232
+ free(ref_num);
233
+ return(0);
234
+ }
235
+
236
+ EOF
237
+ builder.c_singleton script
238
+
239
+ end
240
+
241
+ def self.align(query_sequence, target_sequence)
242
+ Log.low { "Aligning #{ Misc.fingerprint query_sequence } to #{ Misc.fingerprint target_sequence }" }
243
+
244
+ begin
245
+ raise "No query sequence" if query_sequence.nil?
246
+ raise "No target sequence" if target_sequence.nil?
247
+
248
+ s_out = Misc.open_pipe do |s_in|
249
+ SmithWaterman.ssw_aa(query_sequence, target_sequence, query_sequence.length, target_sequence.length, s_in.fileno)
250
+ end
251
+
252
+ txt = s_out.read
253
+ s_out.close
254
+ s_out.join
255
+ txt
256
+
257
+ target_start, target, target_end = txt.match(/Target:\s+(\d+)\s+([A-Z\-?*]+)\s+(\d+)/).values_at 1, 2, 3
258
+
259
+ query_start, query, query_end = txt.match(/Query:\s+(\d+)\s+([A-Z\-?*]+)\s+(\d+)/).values_at 1, 2, 3
260
+
261
+ txt.replace ""
262
+ [("_" * (query_start.to_i - 1)) + query, ("_" * (target_start.to_i - 1)) + target]
263
+ rescue
264
+ Log.warn("Error in aligmnent: #{$!.message}")
265
+ return ["-", "-"]
266
+ end
267
+ end
268
+
269
+ def self.alignment_map(source, target)
270
+ alignment_source, alignment_target = SmithWaterman.align(source, target)
271
+ map = {}
272
+
273
+ offset_source, alignment_source = alignment_source.match(/^(_*)(.*)/).values_at( 1, 2)
274
+ offset_target, alignment_target = alignment_target.match(/^(_*)(.*)/).values_at( 1, 2)
275
+
276
+ gaps_source = 0
277
+ gaps_target = 0
278
+ miss_match = 0
279
+ alignment_source.chars.zip(alignment_target.chars).each_with_index do |p,i|
280
+ char_source, char_target = p
281
+ gaps_source += 1 if char_source == '-'
282
+ gaps_target += 1 if char_target == '-'
283
+ source_pos = i + 1 + offset_source.length - gaps_source
284
+ target_pos = i + 1 + offset_target.length - gaps_target
285
+ if char_source != char_target or char_source == "-"
286
+ miss_match += 1
287
+ else
288
+ map[source_pos] = target_pos
289
+ end
290
+ end
291
+
292
+ if miss_match + gaps_source > alignment_source.length.to_f / 2
293
+ {}
294
+ else
295
+ map
296
+ end
297
+ end
298
+ end
@@ -35,8 +35,8 @@ class Step
35
35
  rescue Exception
36
36
  Log.debug{"Error loading info file: " + info_file}
37
37
  Log.exception $!
38
- Open.rm info_file
39
- Misc.sensiblewrite(info_file, Step.serialize_info({:status => :error, :messages => ["Info file lost"]}))
38
+ #Open.rm info_file
39
+ #Misc.sensiblewrite(info_file, Step.serialize_info({:status => :error, :messages => ["Info file lost"]}))
40
40
  raise $!
41
41
  end
42
42
  end
@@ -134,7 +134,12 @@ TmpFile.with_file do |app_dir|
134
134
  options.each do |k,v| fixed_options[k.to_sym] = v end
135
135
  options = fixed_options
136
136
 
137
- Rack::Server.start(options)
137
+ begin
138
+ Rack::Server.start(options)
139
+ rescue LoadError
140
+ require 'rackup'
141
+ Rackup::Server.start(options)
142
+ end
138
143
  end
139
144
  end
140
145
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-util
3
3
  version: !ruby/object:Gem::Version
4
- version: 5.44.0
4
+ version: 5.44.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Miguel Vazquez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-05-28 00:00:00.000000000 Z
11
+ date: 2024-12-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -283,6 +283,7 @@ files:
283
283
  - lib/rbbt/util/misc/options.rb
284
284
  - lib/rbbt/util/misc/pipes.rb
285
285
  - lib/rbbt/util/misc/serialize.rb
286
+ - lib/rbbt/util/misc/ssw.rb
286
287
  - lib/rbbt/util/misc/system.rb
287
288
  - lib/rbbt/util/named_array.rb
288
289
  - lib/rbbt/util/open.rb
@@ -476,7 +477,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
476
477
  - !ruby/object:Gem::Version
477
478
  version: '0'
478
479
  requirements: []
479
- rubygems_version: 3.5.10
480
+ rubygems_version: 3.5.23
480
481
  signing_key:
481
482
  specification_version: 4
482
483
  summary: Utilities for the Ruby Bioinformatics Toolkit (rbbt)