@datagrok/bio 2.4.51 → 2.4.53
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/detectors.js +19 -10
- package/dist/package-test.js +1 -1
- package/dist/package-test.js.map +1 -1
- package/dist/package.js +1 -1
- package/dist/package.js.map +1 -1
- package/package.json +2 -2
- package/src/tests/detectors-benchmark-tests.ts +9 -11
- package/src/utils/cell-renderer.ts +1 -1
- package/src/widgets/representations.ts +2 -2
package/detectors.js
CHANGED
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
*/
|
|
10
10
|
|
|
11
11
|
const SEQ_SAMPLE_LIMIT = 100;
|
|
12
|
-
const SEQ_SAMPLE_LENGTH_LIMIT =
|
|
12
|
+
const SEQ_SAMPLE_LENGTH_LIMIT = 100;
|
|
13
13
|
|
|
14
14
|
/** enum type to simplify setting "user-friendly" notation if necessary */
|
|
15
15
|
const NOTATION = {
|
|
@@ -85,6 +85,8 @@ class BioPackageDetectors extends DG.Package {
|
|
|
85
85
|
//input: column col
|
|
86
86
|
//output: string semType
|
|
87
87
|
detectMacromolecule(col) {
|
|
88
|
+
const tableName = col.dataFrame ? col.dataFrame.name : null;
|
|
89
|
+
console.debug(`Bio: detectMacromolecule( table: ${tableName}.${col.name} ), start`);
|
|
88
90
|
const t1 = Date.now();
|
|
89
91
|
try {
|
|
90
92
|
const colName = col.name;
|
|
@@ -95,8 +97,10 @@ class BioPackageDetectors extends DG.Package {
|
|
|
95
97
|
// Fail early
|
|
96
98
|
if (col.type !== DG.TYPE.STRING) return null;
|
|
97
99
|
|
|
98
|
-
const categoriesSample = col.
|
|
99
|
-
|
|
100
|
+
const categoriesSample = [...new Set((col.length < SEQ_SAMPLE_LIMIT ?
|
|
101
|
+
wu.count(0).take(Math.min(SEQ_SAMPLE_LIMIT, col.length)).map((rowI) => col.get(rowI)) :
|
|
102
|
+
this.sample(col, SEQ_SAMPLE_LIMIT)
|
|
103
|
+
).map((seq) => !!seq ? seq.substring(0, SEQ_SAMPLE_LENGTH_LIMIT * 5) : ''))];
|
|
100
104
|
|
|
101
105
|
// To collect alphabet freq three strategies can be used:
|
|
102
106
|
// as chars, as fasta (single or within square brackets), as with the separator.
|
|
@@ -209,9 +213,15 @@ class BioPackageDetectors extends DG.Package {
|
|
|
209
213
|
}
|
|
210
214
|
return DG.SEMTYPE.MACROMOLECULE;
|
|
211
215
|
}
|
|
216
|
+
} catch (err) {
|
|
217
|
+
let errMsg = err instanceof Error ? err.message : err.toString();
|
|
218
|
+
const colTops = wu.count(0).take(Math.max(col.length, 4)).map((rowI) => col.get(rowI))
|
|
219
|
+
.reduce((a, b) => a === undefined ? b : a + '\n' + b, undefined);
|
|
220
|
+
errMsg += `\n${colTops}`;
|
|
221
|
+
console.error(`Bio: detectMacromolecule( table: ${tableName}.${col.name} ), error:\n${errMsg}`);
|
|
212
222
|
} finally {
|
|
213
223
|
const t2 = Date.now();
|
|
214
|
-
console.debug(
|
|
224
|
+
console.debug(`Bio: detectMacromolecule( table: ${tableName}.${col.name} ), ` + `ET = ${t2 - t1} ms.`);
|
|
215
225
|
}
|
|
216
226
|
}
|
|
217
227
|
|
|
@@ -283,7 +293,7 @@ class BioPackageDetectors extends DG.Package {
|
|
|
283
293
|
let firstLength = null;
|
|
284
294
|
|
|
285
295
|
for (const seq of values) {
|
|
286
|
-
const mSeq = splitter(seq);
|
|
296
|
+
const mSeq = !!seq ? splitter(seq) : [];
|
|
287
297
|
|
|
288
298
|
if (firstLength === null) {
|
|
289
299
|
//
|
|
@@ -442,17 +452,16 @@ class BioPackageDetectors extends DG.Package {
|
|
|
442
452
|
}.bind(this);
|
|
443
453
|
}
|
|
444
454
|
|
|
445
|
-
sample(
|
|
446
|
-
if (
|
|
455
|
+
sample(col, n) {
|
|
456
|
+
if (col.length < n)
|
|
447
457
|
throw new Error('Sample source is less than n requested.');
|
|
448
|
-
}
|
|
449
458
|
|
|
450
459
|
const idxSet = new Set();
|
|
451
460
|
while (idxSet.size < n) {
|
|
452
|
-
const idx = Math.floor(Math.random() *
|
|
461
|
+
const idx = Math.floor(Math.random() * col.length);
|
|
453
462
|
if (!idxSet.has(idx)) idxSet.add(idx);
|
|
454
463
|
}
|
|
455
464
|
|
|
456
|
-
return
|
|
465
|
+
return wu(idxSet).map((idx) => col.get(idx));
|
|
457
466
|
}
|
|
458
467
|
}
|