@datagrok/bio 2.22.0 → 2.22.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,10 @@
1
1
  # Bio changelog
2
2
 
3
+ ## 2.22.2 (2025-06-11)
4
+
5
+ * Harmonized options for Similarity/Diversity viewers
6
+ * Force detection of macromolecules for arbitrary columns
7
+
3
8
  ## 2.22.0 (2025-06-03)
4
9
 
5
10
  * MSA header tracks (Conseration and WebLogo)
package/detectors.js CHANGED
@@ -1,3 +1,4 @@
1
+ /* eslint-disable max-len */
1
2
  /* eslint-disable max-lines-per-function */
2
3
  /* eslint-disable max-lines */
3
4
  'use strict';
@@ -172,7 +173,7 @@ class BioPackageDetectors extends DG.Package {
172
173
  this.sample(col, SEQ_SAMPLE_LIMIT))
173
174
  .map((seq) => !!seq ? seq.substring(0, SEQ_SAMPLE_LENGTH_LIMIT * 5) : '')
174
175
  .filter((seq) => seq.length !== 0/* skip empty values for detector */),
175
- )];
176
+ )].map((s) => s?.trim());
176
177
  last.categoriesSample = categoriesSample;
177
178
 
178
179
  // To collect alphabet freq three strategies can be used:
@@ -624,6 +625,60 @@ class BioPackageDetectors extends DG.Package {
624
625
  event.preventDefault();
625
626
  return true;
626
627
  }
628
+
629
+ if (event.args.item && event.args.item instanceof DG.GridColumn && event.args.item.column &&
630
+ event.args.item.column.type === DG.TYPE.STRING && !event.args.item.column.semType) {
631
+ const contextMenu = event.args.menu;
632
+ const column = event.args.item.column;
633
+ try {
634
+ this.addForceDetectionDialog(column, contextMenu);
635
+ } catch (err) {
636
+ console.error(err);
637
+ }
638
+ event.preventDefault();
639
+ return true;
640
+ }
641
+ });
642
+ }
643
+
644
+ addForceDetectionDialog(column, menu) {
645
+ const menuGroup = menu.group('Bio');
646
+ const notations = ['separator', 'fasta'];
647
+ const separators = ['-', '/', '.'];
648
+
649
+ menuGroup.item('Set As Macromolecule', () => {
650
+ const sampleCategories = column.categories.slice(0, 40).filter((c) => !!c);
651
+ // detect if the column is potentially a custom notation
652
+ //const isCustom = sampleCategories.filter((c) => /\(\d\)/.test(c)).length > sampleCategories.length / 2;
653
+
654
+ const detectedSeparator = separators
655
+ .map((sep) => ({sep, minCount: sampleCategories.map((c) => c.split(sep).length).reduce((a, b) => Math.min(a, b), Infinity)}))
656
+ .reduce((a, b) => (((b.minCount > a.minCount && b.minCount != Infinity) || a.minCount == Infinity) ? b : a), {sep: '', minCount: 0});
657
+ const defaultAlphabet = 'UN'; // no way that canonical alphabet is not detected, this will only be used for times when MM is not detected
658
+
659
+ const defaultSeparator = ((detectedSeparator.sep && detectedSeparator.minCount !== Infinity && detectedSeparator.minCount > 2) ? detectedSeparator.sep : undefined);
660
+ const defaultNotation = defaultSeparator ? 'separator' : 'fasta';
661
+ const notationInput = ui.input.choice('Notation', {value: defaultNotation, items: notations, nullable: false});
662
+ const separatorInput = ui.input.choice('Separator', {value: defaultSeparator, items: separators, nullable: true});
663
+ ui.dialog('Set Column As Macromolecule')
664
+ .add(notationInput)
665
+ .add(separatorInput)
666
+ .show()
667
+ .onOK(() => {
668
+ const splitSamples = sampleCategories.map((s) => s.split(separatorInput.value ?? ''));
669
+ const splitLengths = splitSamples.map((s) => s.length).filter((l) => l > 0);
670
+ const isMultichar = splitSamples.some((s) => s.some((ss) => ss.length > 1)); // if any of the split samples has a length more than 1, then it is multichar
671
+ //const medianLength = splitSamples[Math.floor(splitSamples.length / 2)];
672
+ const averageLength = splitLengths.reduce((a, b) => a + b, 0) / splitSamples.length;
673
+ const std = Math.sqrt(splitLengths.map((x) => Math.pow(x - averageLength, 2)).reduce((a, b) => a + b, 0) / splitSamples.length);
674
+ const isPotentiallyMSA = averageLength > 1 && std < 2; // if the average length is more than 1 and the std is less than 0.5, then it is potentially MSA
675
+ column.setTag('units', notationInput.value);
676
+ separatorInput.value && column.setTag('separator', separatorInput.value);
677
+ column.setTag('aligned', isPotentiallyMSA ? 'SEQ.MSA' : 'SEQ');
678
+ column.setTag('alphabet', defaultAlphabet);
679
+ isMultichar && column.setTag('.alphabetIsMultichar', 'true');
680
+ column.semType = 'Macromolecule';
681
+ });
627
682
  });
628
683
  }
629
684
  }