@datagrok/bio 2.4.51 → 2.4.52

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/detectors.js CHANGED
@@ -9,7 +9,7 @@
9
9
  */
10
10
 
11
11
  const SEQ_SAMPLE_LIMIT = 100;
12
- const SEQ_SAMPLE_LENGTH_LIMIT = 500;
12
+ const SEQ_SAMPLE_LENGTH_LIMIT = 100;
13
13
 
14
14
  /** enum type to simplify setting "user-friendly" notation if necessary */
15
15
  const NOTATION = {
@@ -85,6 +85,8 @@ class BioPackageDetectors extends DG.Package {
85
85
  //input: column col
86
86
  //output: string semType
87
87
  detectMacromolecule(col) {
88
+ const tableName = col.dataFrame ? col.dataFrame.name : null;
89
+ console.debug(`Bio: detectMacromolecule( table: ${tableName}.${col.name} ), start`);
88
90
  const t1 = Date.now();
89
91
  try {
90
92
  const colName = col.name;
@@ -95,8 +97,10 @@ class BioPackageDetectors extends DG.Package {
95
97
  // Fail early
96
98
  if (col.type !== DG.TYPE.STRING) return null;
97
99
 
98
- const categoriesSample = col.categories.length < SEQ_SAMPLE_LIMIT ? col.categories :
99
- this.sample(col.categories, SEQ_SAMPLE_LIMIT);
100
+ const categoriesSample = [...new Set((col.length < SEQ_SAMPLE_LIMIT ?
101
+ wu.count(0).take(Math.min(SEQ_SAMPLE_LIMIT, col.length)).map((rowI) => col.get(rowI)) :
102
+ this.sample(col, SEQ_SAMPLE_LIMIT)
103
+ ).map((seq) => !!seq ? seq.substring(0, SEQ_SAMPLE_LENGTH_LIMIT * 5) : ''))];
100
104
 
101
105
  // To collect alphabet freq three strategies can be used:
102
106
  // as chars, as fasta (single or within square brackets), as with the separator.
@@ -209,9 +213,15 @@ class BioPackageDetectors extends DG.Package {
209
213
  }
210
214
  return DG.SEMTYPE.MACROMOLECULE;
211
215
  }
216
+ } catch (err) {
217
+ let errMsg = err instanceof Error ? err.message : err.toString();
218
+ const colTops = wu.count(0).take(Math.max(col.length, 4)).map((rowI) => col.get(rowI))
219
+ .reduce((a, b) => a === undefined ? b : a + '\n' + b, undefined);
220
+ errMsg += `\n${colTops}`;
221
+ console.error(`Bio: detectMacromolecule( table: ${tableName}.${col.name} ), error:\n${errMsg}`);
212
222
  } finally {
213
223
  const t2 = Date.now();
214
- console.debug('Bio: detectMacromolecule() ' + `ET = ${t2 - t1} ms.`);
224
+ console.debug(`Bio: detectMacromolecule( table: ${tableName}.${col.name} ), ` + `ET = ${t2 - t1} ms.`);
215
225
  }
216
226
  }
217
227
 
@@ -283,7 +293,7 @@ class BioPackageDetectors extends DG.Package {
283
293
  let firstLength = null;
284
294
 
285
295
  for (const seq of values) {
286
- const mSeq = splitter(seq);
296
+ const mSeq = !!seq ? splitter(seq) : [];
287
297
 
288
298
  if (firstLength === null) {
289
299
  //
@@ -442,17 +452,16 @@ class BioPackageDetectors extends DG.Package {
442
452
  }.bind(this);
443
453
  }
444
454
 
445
- sample(src, n) {
446
- if (src.length < n) {
455
+ sample(col, n) {
456
+ if (col.length < n)
447
457
  throw new Error('Sample source is less than n requested.');
448
- }
449
458
 
450
459
  const idxSet = new Set();
451
460
  while (idxSet.size < n) {
452
- const idx = Math.floor(Math.random() * src.length);
461
+ const idx = Math.floor(Math.random() * col.length);
453
462
  if (!idxSet.has(idx)) idxSet.add(idx);
454
463
  }
455
464
 
456
- return [...idxSet].map((idx) => src[idx]);
465
+ return wu(idxSet).map((idx) => col.get(idx));
457
466
  }
458
467
  }