@datagrok/bio 2.15.13 → 2.16.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +25 -0
- package/detectors.js +16 -11
- package/dist/455.js.map +1 -1
- package/dist/980.js +1 -1
- package/dist/980.js.map +1 -1
- package/dist/package-test.js +6 -6
- package/dist/package-test.js.map +1 -1
- package/dist/package.js +3 -3
- package/dist/package.js.map +1 -1
- package/package.json +14 -14
- package/src/analysis/sequence-activity-cliffs.ts +9 -8
- package/src/analysis/sequence-diversity-viewer.ts +6 -4
- package/src/analysis/sequence-similarity-viewer.ts +9 -6
- package/src/analysis/sequence-space.ts +3 -2
- package/src/calculations/monomerLevelMols.ts +4 -5
- package/src/demo/bio01-similarity-diversity.ts +4 -1
- package/src/package-test.ts +1 -1
- package/src/package-types.ts +34 -2
- package/src/package.ts +60 -76
- package/src/substructure-search/substructure-search.ts +15 -9
- package/src/tests/WebLogo-layout-tests.ts +1 -1
- package/src/tests/WebLogo-positions-test.ts +11 -5
- package/src/tests/WebLogo-project-tests.ts +1 -1
- package/src/tests/activity-cliffs-utils.ts +11 -14
- package/src/tests/bio-tests.ts +85 -79
- package/src/tests/checkInputColumn-tests.ts +15 -10
- package/src/tests/converters-test.ts +12 -5
- package/src/tests/detectors-benchmark-tests.ts +5 -2
- package/src/tests/detectors-tests.ts +51 -44
- package/src/tests/detectors-weak-and-likely-tests.ts +12 -5
- package/src/tests/fasta-export-tests.ts +13 -5
- package/src/tests/helm-tests.ts +85 -0
- package/src/tests/mm-distance-tests.ts +14 -7
- package/src/tests/monomer-libraries-tests.ts +1 -1
- package/src/tests/msa-tests.ts +33 -24
- package/src/tests/renderers-monomer-placer-tests.ts +2 -5
- package/src/tests/renderers-test.ts +15 -9
- package/src/tests/scoring.ts +9 -6
- package/src/tests/seq-handler-get-helm-tests.ts +7 -5
- package/src/tests/seq-handler-get-region-tests.ts +9 -3
- package/src/tests/seq-handler-splitted-tests.ts +11 -5
- package/src/tests/seq-handler-tests.ts +17 -10
- package/src/tests/sequence-space-utils.ts +9 -4
- package/src/tests/splitters-test.ts +5 -4
- package/src/tests/substructure-filters-tests.ts +22 -23
- package/src/tests/to-atomic-level-tests.ts +5 -3
- package/src/tests/to-atomic-level-ui-tests.ts +4 -1
- package/src/tests/utils/detectors-utils.ts +4 -4
- package/src/utils/calculate-scores.ts +11 -9
- package/src/utils/cell-renderer-custom.ts +27 -17
- package/src/utils/cell-renderer.ts +14 -8
- package/src/utils/check-input-column.ts +13 -9
- package/src/utils/context-menu.ts +4 -4
- package/src/utils/convert.ts +21 -14
- package/src/utils/get-region-func-editor.ts +8 -5
- package/src/utils/get-region.ts +4 -5
- package/src/utils/helm-to-molfile/converter/helm.ts +4 -4
- package/src/utils/helm-to-molfile/utils.ts +5 -6
- package/src/utils/macromolecule-column-widget.ts +6 -7
- package/src/utils/monomer-cell-renderer-base.ts +8 -1
- package/src/utils/monomer-lib/lib-manager.ts +3 -2
- package/src/utils/monomer-lib/monomer-colors.ts +10 -10
- package/src/utils/monomer-lib/monomer-lib-base.ts +6 -1
- package/src/utils/monomer-lib/monomer-lib.ts +15 -9
- package/src/utils/multiple-sequence-alignment-ui.ts +30 -30
- package/src/utils/save-as-fasta.ts +19 -12
- package/src/utils/seq-helper/seq-handler.ts +836 -0
- package/src/utils/seq-helper/seq-helper.ts +43 -19
- package/src/utils/sequence-to-mol.ts +7 -8
- package/src/utils/split-to-monomers.ts +7 -2
- package/src/utils/types.ts +8 -7
- package/src/utils/ui-utils.ts +2 -2
- package/src/viewers/web-logo-viewer.ts +18 -16
- package/src/widgets/bio-substructure-filter-helm.ts +5 -2
- package/src/widgets/bio-substructure-filter.ts +14 -24
- package/src/widgets/composition-analysis-widget.ts +6 -6
- package/src/widgets/representations.ts +7 -4
- package/src/tests/detectors-custom-notation-tests.ts +0 -37
- package/src/utils/cyclized.ts +0 -89
- package/src/utils/dimerized.ts +0 -10
package/src/package.ts
CHANGED
|
@@ -9,8 +9,7 @@ import {getActivityCliffs} from '@datagrok-libraries/ml/src/viewers/activity-cli
|
|
|
9
9
|
import {MmDistanceFunctionsNames} from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
|
|
10
10
|
import {BitArrayMetrics, KnownMetrics} from '@datagrok-libraries/ml/src/typed-metrics';
|
|
11
11
|
import {NOTATION, TAGS as bioTAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
12
|
-
import {
|
|
13
|
-
import {IMonomerLib, IMonomerSet} from '@datagrok-libraries/bio/src/types';
|
|
12
|
+
import {IMonomerLib, IMonomerLibBase, IMonomerSet} from '@datagrok-libraries/bio/src/types';
|
|
14
13
|
import {SeqPalette} from '@datagrok-libraries/bio/src/seq-palettes';
|
|
15
14
|
import {FastaFileHandler} from '@datagrok-libraries/bio/src/utils/fasta-handler';
|
|
16
15
|
import {SCORE} from '@datagrok-libraries/bio/src/utils/macromolecule/scoring';
|
|
@@ -66,12 +65,11 @@ import {GetRegionApp} from './apps/get-region-app';
|
|
|
66
65
|
import {GetRegionFuncEditor} from './utils/get-region-func-editor';
|
|
67
66
|
import {sequenceToMolfile} from './utils/sequence-to-mol';
|
|
68
67
|
import {detectMacromoleculeProbeDo} from './utils/detect-macromolecule-probe';
|
|
69
|
-
import {CyclizedNotationProvider} from './utils/cyclized';
|
|
70
|
-
import {DimerizedNotationProvider} from './utils/dimerized';
|
|
71
68
|
import {getMolColumnFromHelm} from './utils/helm-to-molfile/utils';
|
|
72
69
|
import {MonomerManager} from './utils/monomer-lib/monomer-manager/monomer-manager';
|
|
73
70
|
import {calculateScoresWithEmptyValues} from './utils/calculate-scores';
|
|
74
71
|
import {SeqHelper} from './utils/seq-helper/seq-helper';
|
|
72
|
+
import {ISeqHandler} from '@datagrok-libraries/bio/src/utils/macromolecule/seq-handler';
|
|
75
73
|
|
|
76
74
|
export const _package = new BioPackage(/*{debug: true}/**/);
|
|
77
75
|
|
|
@@ -99,9 +97,6 @@ export class SeqPaletteCustom implements SeqPalette {
|
|
|
99
97
|
}
|
|
100
98
|
}
|
|
101
99
|
|
|
102
|
-
let monomerLib: IMonomerLib | null = null;
|
|
103
|
-
let monomerSets: IMonomerSet | null = null;
|
|
104
|
-
|
|
105
100
|
let initBioPromise: Promise<void> | null = null;
|
|
106
101
|
|
|
107
102
|
//tags: init
|
|
@@ -115,21 +110,26 @@ export async function initBio(): Promise<void> {
|
|
|
115
110
|
async function initBioInt() {
|
|
116
111
|
const logPrefix = 'Bio: _package.initBio()';
|
|
117
112
|
_package.logger.debug(`${logPrefix}, start`);
|
|
113
|
+
let monomerLib!: IMonomerLib;
|
|
114
|
+
let monomerSets!: IMonomerSet;
|
|
118
115
|
let rdKitModule!: RDModule;
|
|
119
|
-
let
|
|
116
|
+
let libHelper!: MonomerLibManager;
|
|
120
117
|
const t1: number = window.performance.now();
|
|
121
118
|
await Promise.all([
|
|
122
119
|
(async () => {
|
|
123
|
-
|
|
120
|
+
libHelper = await MonomerLibManager.getInstance();
|
|
124
121
|
// Fix user lib settings for explicit stuck from a terminated test
|
|
125
122
|
const libSettings = await getUserLibSettings();
|
|
126
123
|
if (libSettings.explicit) {
|
|
127
124
|
libSettings.explicit = [];
|
|
128
125
|
await setUserLibSettings(libSettings);
|
|
129
126
|
}
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
127
|
+
libHelper.awaitLoaded(Infinity).then(() => {
|
|
128
|
+
// Do not wait for monomers and sets loaded
|
|
129
|
+
return Promise.all([libHelper.loadMonomerLib(), libHelper.loadMonomerSets()]);
|
|
130
|
+
});
|
|
131
|
+
monomerLib = libHelper.getMonomerLib();
|
|
132
|
+
monomerSets = libHelper.getMonomerSets();
|
|
133
133
|
})(),
|
|
134
134
|
(async () => {
|
|
135
135
|
const pkgProps = await _package.getProperties();
|
|
@@ -141,8 +141,8 @@ async function initBioInt() {
|
|
|
141
141
|
const t2: number = window.performance.now();
|
|
142
142
|
_package.logger.debug(`${logPrefix}, loading ET: ${t2 - t1} ms`);
|
|
143
143
|
});
|
|
144
|
-
|
|
145
|
-
_package.completeInit(rdKitModule);
|
|
144
|
+
const seqHelper = new SeqHelper(libHelper, rdKitModule);
|
|
145
|
+
_package.completeInit(seqHelper, monomerLib, monomerSets, rdKitModule);
|
|
146
146
|
|
|
147
147
|
const monomers: string[] = [];
|
|
148
148
|
const logPs: number[] = [];
|
|
@@ -174,7 +174,7 @@ async function initBioInt() {
|
|
|
174
174
|
//input: column col {semType: Macromolecule}
|
|
175
175
|
//output: widget result
|
|
176
176
|
export function sequenceTooltip(col: DG.Column): DG.Widget<any> {
|
|
177
|
-
const resWidget = new MacromoleculeColumnWidget(col);
|
|
177
|
+
const resWidget = new MacromoleculeColumnWidget(col, _package.seqHelper);
|
|
178
178
|
const _resPromise = resWidget.init().then(() => { })
|
|
179
179
|
.catch((err: any) => {
|
|
180
180
|
const errMsg = err instanceof Error ? err.message : err.toString();
|
|
@@ -187,19 +187,14 @@ export function sequenceTooltip(col: DG.Column): DG.Widget<any> {
|
|
|
187
187
|
//name: getBioLib
|
|
188
188
|
//output: object monomerLib
|
|
189
189
|
export function getBioLib(): IMonomerLib {
|
|
190
|
-
return monomerLib
|
|
191
|
-
}
|
|
192
|
-
|
|
193
|
-
// For sync internal use, on initialized package
|
|
194
|
-
export function getMonomerLib(): IMonomerLib {
|
|
195
|
-
return monomerLib!;
|
|
190
|
+
return _package.monomerLib;
|
|
196
191
|
}
|
|
197
192
|
|
|
198
193
|
//name: getSeqHandler
|
|
199
194
|
//input: column sequence { semType: Macromolecule }
|
|
200
195
|
//output: object result
|
|
201
|
-
export function getSeqHandler(sequence: DG.Column<string>):
|
|
202
|
-
return
|
|
196
|
+
export function getSeqHandler(sequence: DG.Column<string>): ISeqHandler {
|
|
197
|
+
return _package.seqHelper.getSeqHandler(sequence);
|
|
203
198
|
}
|
|
204
199
|
|
|
205
200
|
// -- Panels --
|
|
@@ -215,7 +210,7 @@ export function getRegionPanel(seqCol: DG.Column<string>): DG.Widget {
|
|
|
215
210
|
if (funcList.length !== 1) throw new Error(`Package '${_package.name}' func '${funcName}' not found`);
|
|
216
211
|
const func = funcList[0];
|
|
217
212
|
const funcCall = func.prepare({table: seqCol.dataFrame, sequence: seqCol});
|
|
218
|
-
const funcEditor = new GetRegionFuncEditor(funcCall);
|
|
213
|
+
const funcEditor = new GetRegionFuncEditor(funcCall, _package.seqHelper);
|
|
219
214
|
return funcEditor.widget();
|
|
220
215
|
}
|
|
221
216
|
|
|
@@ -236,7 +231,7 @@ export async function libraryPanel(_seqColumn: DG.Column): Promise<DG.Widget> {
|
|
|
236
231
|
//input: funccall call
|
|
237
232
|
export function GetRegionEditor(call: DG.FuncCall): void {
|
|
238
233
|
try {
|
|
239
|
-
const funcEditor = new GetRegionFuncEditor(call);
|
|
234
|
+
const funcEditor = new GetRegionFuncEditor(call, _package.seqHelper);
|
|
240
235
|
funcEditor.dialog();
|
|
241
236
|
} catch (err: any) {
|
|
242
237
|
const errMsg = err instanceof Error ? err.message : err.toString();
|
|
@@ -364,7 +359,7 @@ export function macroMolColumnPropertyPanel(molColumn: DG.Column): DG.Widget {
|
|
|
364
359
|
//input: semantic_value sequence { semType: Macromolecule }
|
|
365
360
|
//output: widget result
|
|
366
361
|
export function compositionAnalysisWidget(sequence: DG.SemanticValue): DG.Widget {
|
|
367
|
-
return getCompositionAnalysisWidget(sequence, monomerLib
|
|
362
|
+
return getCompositionAnalysisWidget(sequence, _package.monomerLib, _package.seqHelper);
|
|
368
363
|
}
|
|
369
364
|
|
|
370
365
|
//name: MacromoleculeDifferenceCellRenderer
|
|
@@ -567,7 +562,7 @@ export async function macromoleculePreprocessingFunction(
|
|
|
567
562
|
export async function helmPreprocessingFunction(
|
|
568
563
|
col: DG.Column<string>, _metric: BitArrayMetrics): Promise<PreprocessFunctionReturnType> {
|
|
569
564
|
if (col.version !== col.temp[MONOMERIC_COL_TAGS.LAST_INVALIDATED_VERSION])
|
|
570
|
-
await invalidateMols(col, false);
|
|
565
|
+
await invalidateMols(col, _package.seqHelper, false);
|
|
571
566
|
const molCol = col.temp[MONOMERIC_COL_TAGS.MONOMERIC_MOLS];
|
|
572
567
|
const fingerPrints: DG.Column<DG.BitSet | null> =
|
|
573
568
|
await grok.functions.call('Chem:getMorganFingerprints', {molColumn: molCol});
|
|
@@ -635,8 +630,10 @@ export async function toAtomicLevel(
|
|
|
635
630
|
const pi = DG.TaskBarProgressIndicator.create('Converting to atomic level ...');
|
|
636
631
|
try {
|
|
637
632
|
await initBioPromise;
|
|
638
|
-
const monomerLib =
|
|
639
|
-
|
|
633
|
+
const monomerLib = _package.monomerLib;
|
|
634
|
+
const seqHelper = _package.seqHelper;
|
|
635
|
+
const rdKitModule = _package.rdKitModule;
|
|
636
|
+
await sequenceToMolfile(table, seqCol, nonlinear, highlight, monomerLib, seqHelper, rdKitModule);
|
|
640
637
|
} finally {
|
|
641
638
|
pi.close();
|
|
642
639
|
}
|
|
@@ -647,7 +644,7 @@ export async function toAtomicLevel(
|
|
|
647
644
|
//description: Performs multiple sequence alignment
|
|
648
645
|
//tags: bio, panel
|
|
649
646
|
export function multipleSequenceAlignmentDialog(): void {
|
|
650
|
-
multipleSequenceAlignmentUI()
|
|
647
|
+
multipleSequenceAlignmentUI({}, _package.seqHelper)
|
|
651
648
|
.catch((err: any) => {
|
|
652
649
|
const [errMsg, errStack] = errInfo(err);
|
|
653
650
|
if (err instanceof MsaWarning) {
|
|
@@ -666,10 +663,12 @@ export function multipleSequenceAlignmentDialog(): void {
|
|
|
666
663
|
//tags: bio
|
|
667
664
|
//input: column sequenceCol {semType: Macromolecule}
|
|
668
665
|
//input: column clustersCol
|
|
666
|
+
//input: object options = undefined { optional: true }
|
|
669
667
|
//output: column result
|
|
670
|
-
export async function alignSequences(
|
|
671
|
-
|
|
672
|
-
|
|
668
|
+
export async function alignSequences(
|
|
669
|
+
sequenceCol: DG.Column<string> | null = null, clustersCol: DG.Column | null = null, options?: any
|
|
670
|
+
): Promise<DG.Column<string>> {
|
|
671
|
+
return multipleSequenceAlignmentUI({col: sequenceCol, clustersCol: clustersCol, ...options}, _package.seqHelper);
|
|
673
672
|
}
|
|
674
673
|
|
|
675
674
|
//top-menu: Bio | Analyze | Composition
|
|
@@ -686,7 +685,7 @@ export async function compositionAnalysis(): Promise<void> {
|
|
|
686
685
|
if (col.semType != DG.SEMTYPE.MACROMOLECULE)
|
|
687
686
|
return false;
|
|
688
687
|
|
|
689
|
-
const _colSh =
|
|
688
|
+
const _colSh = _package.seqHelper.getSeqHandler(col);
|
|
690
689
|
// TODO: prevent for cyclic, branched or multiple chains in Helm
|
|
691
690
|
return true;
|
|
692
691
|
});
|
|
@@ -705,7 +704,7 @@ export async function compositionAnalysis(): Promise<void> {
|
|
|
705
704
|
return;
|
|
706
705
|
} else if (colList.length > 1) {
|
|
707
706
|
const colListNames: string [] = colList.map((col) => col.name);
|
|
708
|
-
const selectedCol = colList.find((c) => { return
|
|
707
|
+
const selectedCol = colList.find((c) => { return _package.seqHelper.getSeqHandler(c).isMsa(); });
|
|
709
708
|
const colInput: DG.InputBase = ui.input.choice(
|
|
710
709
|
'Column', {value: selectedCol ? selectedCol.name : colListNames[0], items: colListNames});
|
|
711
710
|
ui.dialog({
|
|
@@ -756,8 +755,8 @@ export function importBam(fileContent: string): DG.DataFrame [] {
|
|
|
756
755
|
//top-menu: Bio | Convert | Notation...
|
|
757
756
|
//name: convertDialog
|
|
758
757
|
export function convertDialog() {
|
|
759
|
-
const col = getMacromoleculeColumns()[0];
|
|
760
|
-
convert(col);
|
|
758
|
+
const col: DG.Column<string> | undefined = getMacromoleculeColumns()[0];
|
|
759
|
+
convert(col, _package.seqHelper);
|
|
761
760
|
}
|
|
762
761
|
|
|
763
762
|
//top-menu: Bio | Convert | TestConvert
|
|
@@ -774,7 +773,7 @@ export async function convertSeqNotation(sequence: string, targetNotation: NOTAT
|
|
|
774
773
|
const semType = await grok.functions.call('Bio:detectMacromolecule', {col: col});
|
|
775
774
|
if (semType)
|
|
776
775
|
col.semType = semType;
|
|
777
|
-
const converterSh =
|
|
776
|
+
const converterSh = _package.seqHelper.getSeqHandler(col);
|
|
778
777
|
const newColumn = converterSh.convert(targetNotation, separator);
|
|
779
778
|
return newColumn.get(0);
|
|
780
779
|
} catch (err: any) {
|
|
@@ -851,16 +850,15 @@ export async function testDetectMacromolecule(path: string): Promise<DG.DataFram
|
|
|
851
850
|
//input: column sequence { semType: Macromolecule }
|
|
852
851
|
//output: dataframe result
|
|
853
852
|
//editor: Bio:SplitToMonomersEditor
|
|
854
|
-
export async function splitToMonomersTopMenu(table: DG.DataFrame, sequence: DG.Column): Promise<
|
|
855
|
-
await splitToMonomersUI(table, sequence);
|
|
853
|
+
export async function splitToMonomersTopMenu(table: DG.DataFrame, sequence: DG.Column): Promise<DG.DataFrame> {
|
|
854
|
+
return await splitToMonomersUI(table, sequence);
|
|
856
855
|
}
|
|
857
856
|
|
|
858
857
|
//name: Bio: getHelmMonomers
|
|
859
858
|
//input: column sequence {semType: Macromolecule}
|
|
859
|
+
//output: object result
|
|
860
860
|
export function getHelmMonomers(sequence: DG.Column<string>): string[] {
|
|
861
|
-
|
|
862
|
-
const stats = sh.stats;
|
|
863
|
-
return Object.keys(stats.freq);
|
|
861
|
+
return _package.seqHelper.getSeqMonomers(sequence);
|
|
864
862
|
}
|
|
865
863
|
|
|
866
864
|
|
|
@@ -869,7 +867,7 @@ export function getHelmMonomers(sequence: DG.Column<string>): string[] {
|
|
|
869
867
|
//meta.icon: files/icons/sequence-similarity-viewer.svg
|
|
870
868
|
//output: viewer result
|
|
871
869
|
export function similaritySearchViewer(): SequenceSimilarityViewer {
|
|
872
|
-
return new SequenceSimilarityViewer();
|
|
870
|
+
return new SequenceSimilarityViewer(_package.seqHelper);
|
|
873
871
|
}
|
|
874
872
|
|
|
875
873
|
//top-menu: Bio | Search | Similarity Search
|
|
@@ -887,7 +885,7 @@ export function similaritySearchTopMenu(): void {
|
|
|
887
885
|
//meta.icon: files/icons/sequence-diversity-viewer.svg
|
|
888
886
|
//output: viewer result
|
|
889
887
|
export function diversitySearchViewer(): SequenceDiversityViewer {
|
|
890
|
-
return new SequenceDiversityViewer();
|
|
888
|
+
return new SequenceDiversityViewer(_package.seqHelper);
|
|
891
889
|
}
|
|
892
890
|
|
|
893
891
|
//top-menu: Bio | Search | Diversity Search
|
|
@@ -908,7 +906,7 @@ export function searchSubsequenceEditor(call: DG.FuncCall) {
|
|
|
908
906
|
if (columns.length === 1)
|
|
909
907
|
call.func.prepare({macromolecules: columns[0]}).call(true);
|
|
910
908
|
else
|
|
911
|
-
new SubstructureSearchDialog(columns);
|
|
909
|
+
new SubstructureSearchDialog(columns, _package.seqHelper);
|
|
912
910
|
}
|
|
913
911
|
|
|
914
912
|
//top-menu: Bio | Search | Subsequence Search ...
|
|
@@ -928,13 +926,14 @@ export function SubsequenceSearchTopMenu(macromolecules: DG.Column): void {
|
|
|
928
926
|
//name: Identity Scoring
|
|
929
927
|
//description: Adds a column with fraction of matching monomers
|
|
930
928
|
//input: dataframe table [Table containing Macromolecule column]
|
|
931
|
-
//input: column
|
|
929
|
+
//input: column macromolecule {semType: Macromolecule} [Sequences to score]
|
|
932
930
|
//input: string reference [Sequence, matching column format]
|
|
933
931
|
//output: column scores
|
|
934
932
|
export async function sequenceIdentityScoring(
|
|
935
933
|
table: DG.DataFrame, macromolecule: DG.Column, reference: string
|
|
936
934
|
): Promise<DG.Column<number>> {
|
|
937
|
-
const
|
|
935
|
+
const seqHelper = _package.seqHelper;
|
|
936
|
+
const scores = calculateScoresWithEmptyValues(table, macromolecule, reference, SCORE.IDENTITY, seqHelper);
|
|
938
937
|
return scores;
|
|
939
938
|
}
|
|
940
939
|
|
|
@@ -942,13 +941,14 @@ export async function sequenceIdentityScoring(
|
|
|
942
941
|
//name: Similarity Scoring
|
|
943
942
|
//description: Adds a column with similarity scores, calculated as sum of monomer fingerprint similarities
|
|
944
943
|
//input: dataframe table [Table containing Macromolecule column]
|
|
945
|
-
//input: column
|
|
944
|
+
//input: column macromolecule {semType: Macromolecule} [Sequences to score]
|
|
946
945
|
//input: string reference [Sequence, matching column format]
|
|
947
946
|
//output: column scores
|
|
948
947
|
export async function sequenceSimilarityScoring(
|
|
949
948
|
table: DG.DataFrame, macromolecule: DG.Column, reference: string
|
|
950
949
|
): Promise<DG.Column<number>> {
|
|
951
|
-
const
|
|
950
|
+
const seqHelper = _package.seqHelper;
|
|
951
|
+
const scores = calculateScoresWithEmptyValues(table, macromolecule, reference, SCORE.SIMILARITY, seqHelper);
|
|
952
952
|
return scores;
|
|
953
953
|
}
|
|
954
954
|
|
|
@@ -978,7 +978,7 @@ export function saveAsFasta() {
|
|
|
978
978
|
//output: filter result
|
|
979
979
|
//meta.semType: Macromolecule
|
|
980
980
|
export function bioSubstructureFilter(): BioSubstructureFilter {
|
|
981
|
-
return new BioSubstructureFilter();
|
|
981
|
+
return new BioSubstructureFilter(_package.seqHelper, _package.logger);
|
|
982
982
|
}
|
|
983
983
|
|
|
984
984
|
// -- Test apps --
|
|
@@ -1046,13 +1046,13 @@ export function longSeqTableSeparator(): void {
|
|
|
1046
1046
|
|
|
1047
1047
|
//name: longSeqTableFasta
|
|
1048
1048
|
export function longSeqTableFasta(): void {
|
|
1049
|
-
const df = DG.DataFrame.fromColumns([generateLongSequence2(NOTATION.FASTA)]);
|
|
1049
|
+
const df = DG.DataFrame.fromColumns([generateLongSequence2(_package.seqHelper, NOTATION.FASTA)]);
|
|
1050
1050
|
grok.shell.addTableView(df);
|
|
1051
1051
|
}
|
|
1052
1052
|
|
|
1053
1053
|
//name: longSeqTableHelm
|
|
1054
1054
|
export function longSeqTableHelm(): void {
|
|
1055
|
-
const df = DG.DataFrame.fromColumns([generateLongSequence2(NOTATION.HELM)]);
|
|
1055
|
+
const df = DG.DataFrame.fromColumns([generateLongSequence2(_package.seqHelper, NOTATION.HELM)]);
|
|
1056
1056
|
grok.shell.addTableView(df);
|
|
1057
1057
|
}
|
|
1058
1058
|
|
|
@@ -1062,7 +1062,7 @@ export function longSeqTableHelm(): void {
|
|
|
1062
1062
|
//input: object cell
|
|
1063
1063
|
//input: object menu
|
|
1064
1064
|
export function addCopyMenu(cell: DG.Cell, menu: DG.Menu): void {
|
|
1065
|
-
addCopyMenuUI(cell, menu);
|
|
1065
|
+
addCopyMenuUI(cell, menu, _package.seqHelper);
|
|
1066
1066
|
}
|
|
1067
1067
|
|
|
1068
1068
|
// -- Demo --
|
|
@@ -1148,7 +1148,9 @@ export async function seq2atomic(seq: string, nonlinear: boolean): Promise<strin
|
|
|
1148
1148
|
if (semType) seqCol.semType = semType;
|
|
1149
1149
|
|
|
1150
1150
|
const monomerLib = (await getMonomerLibHelper()).getMonomerLib();
|
|
1151
|
-
const
|
|
1151
|
+
const seqHelper = _package.seqHelper;
|
|
1152
|
+
const rdKitModule = await getRdKitModule();
|
|
1153
|
+
const res = (await sequenceToMolfile(df, seqCol, nonlinear, false, monomerLib, seqHelper, rdKitModule))?.molCol?.get(0);
|
|
1152
1154
|
return res ?? undefined;
|
|
1153
1155
|
} catch (err: any) {
|
|
1154
1156
|
const [errMsg, errStack] = errInfo(err);
|
|
@@ -1192,7 +1194,7 @@ export async function seqIdentity(seq: string, ref: string): Promise<number | nu
|
|
|
1192
1194
|
const semType = await grok.functions.call('Bio:detectMacromolecule', {col: seqCol});
|
|
1193
1195
|
if (!semType) throw new Error('Macromolecule required');
|
|
1194
1196
|
|
|
1195
|
-
const resCol = await calculateScoresWithEmptyValues(df, seqCol, ref, SCORE.IDENTITY);
|
|
1197
|
+
const resCol = await calculateScoresWithEmptyValues(df, seqCol, ref, SCORE.IDENTITY, _package.seqHelper);
|
|
1196
1198
|
return resCol.get(0);
|
|
1197
1199
|
} catch (err: any) {
|
|
1198
1200
|
const [errMsg, errStack] = errInfo(err);
|
|
@@ -1214,7 +1216,7 @@ export async function detectMacromoleculeProbe(file: DG.FileInfo, colName: strin
|
|
|
1214
1216
|
//name: getSeqHelper
|
|
1215
1217
|
//output: object result
|
|
1216
1218
|
export async function getSeqHelper(): Promise<ISeqHelper> {
|
|
1217
|
-
return
|
|
1219
|
+
return _package.seqHelper;
|
|
1218
1220
|
}
|
|
1219
1221
|
|
|
1220
1222
|
//name: getMolFromHelm
|
|
@@ -1225,25 +1227,7 @@ export async function getSeqHelper(): Promise<ISeqHelper> {
|
|
|
1225
1227
|
export function getMolFromHelm(
|
|
1226
1228
|
df: DG.DataFrame, helmCol: DG.Column<string>, chiralityEngine: boolean
|
|
1227
1229
|
): Promise<DG.Column<string>> {
|
|
1228
|
-
return getMolColumnFromHelm(df, helmCol, chiralityEngine,
|
|
1229
|
-
}
|
|
1230
|
-
|
|
1231
|
-
// -- Custom notation providers --
|
|
1232
|
-
|
|
1233
|
-
//name: applyNotationProviderForCyclized
|
|
1234
|
-
//input: column col
|
|
1235
|
-
//input: string separator
|
|
1236
|
-
export function applyNotationProviderForCyclized(col: DG.Column<string>, separator: string) {
|
|
1237
|
-
col.meta.units = NOTATION.CUSTOM;
|
|
1238
|
-
col.temp[SeqTemps.notationProvider] = new CyclizedNotationProvider(separator);
|
|
1239
|
-
}
|
|
1240
|
-
|
|
1241
|
-
//name: applyNotationProviderForDimerized
|
|
1242
|
-
//input: column col
|
|
1243
|
-
//input: string separator
|
|
1244
|
-
export function applyNotationProviderForDimerized(col: DG.Column<string>, separator: string) {
|
|
1245
|
-
col.meta.units = NOTATION.CUSTOM;
|
|
1246
|
-
col.temp[SeqTemps.notationProvider] = new DimerizedNotationProvider(separator);
|
|
1230
|
+
return getMolColumnFromHelm(df, helmCol, chiralityEngine, _package.monomerLib);
|
|
1247
1231
|
}
|
|
1248
1232
|
|
|
1249
1233
|
//name: test1
|
|
@@ -2,10 +2,12 @@ import * as grok from 'datagrok-api/grok';
|
|
|
2
2
|
import * as ui from 'datagrok-api/ui';
|
|
3
3
|
import * as DG from 'datagrok-api/dg';
|
|
4
4
|
|
|
5
|
-
import {getMonomericMols} from '../calculations/monomerLevelMols';
|
|
6
|
-
import {updateDivInnerHTML} from '../utils/ui-utils';
|
|
7
5
|
import {delay} from '@datagrok-libraries/utils/src/test';
|
|
8
6
|
import {TAGS as bioTAGS, NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
7
|
+
import {ISeqHelper} from '@datagrok-libraries/bio/src/utils/seq-helper';
|
|
8
|
+
|
|
9
|
+
import {getMonomericMols} from '../calculations/monomerLevelMols';
|
|
10
|
+
import {updateDivInnerHTML} from '../utils/ui-utils';
|
|
9
11
|
|
|
10
12
|
export const MONOMER_MOLS_COL = 'monomeric-mols';
|
|
11
13
|
|
|
@@ -29,7 +31,9 @@ export class SubstructureSearchDialog {
|
|
|
29
31
|
col: DG.Column;
|
|
30
32
|
dialog: DG.Dialog;
|
|
31
33
|
|
|
32
|
-
constructor(columns: DG.Column<string>[]
|
|
34
|
+
constructor(columns: DG.Column<string>[],
|
|
35
|
+
private readonly seqHelper: ISeqHelper
|
|
36
|
+
) {
|
|
33
37
|
this.col = columns[0];
|
|
34
38
|
this.createUI();
|
|
35
39
|
}
|
|
@@ -96,7 +100,7 @@ export class SubstructureSearchDialog {
|
|
|
96
100
|
substructure = substructure.replaceAll(this.separatorInput.value, this.separator);
|
|
97
101
|
let matches: DG.BitSet;
|
|
98
102
|
if (this.units === NOTATION.HELM)
|
|
99
|
-
matches = await helmSubstructureSearch(substructure, this.col);
|
|
103
|
+
matches = await helmSubstructureSearch(substructure, this.col, this.seqHelper);
|
|
100
104
|
else
|
|
101
105
|
matches = linearSubstructureSearch(substructure, this.col);
|
|
102
106
|
this.col.dataFrame.filter.and(matches);
|
|
@@ -129,14 +133,16 @@ function prepareSubstructureRegex(substructure: string, separator: string) {
|
|
|
129
133
|
return re;
|
|
130
134
|
}
|
|
131
135
|
|
|
132
|
-
export async function helmSubstructureSearch(
|
|
136
|
+
export async function helmSubstructureSearch(
|
|
137
|
+
substructure: string, col: DG.Column<string>, seqHelper: ISeqHelper
|
|
138
|
+
): Promise<DG.BitSet> {
|
|
133
139
|
if (col.version !== col.temp[MONOMERIC_COL_TAGS.LAST_INVALIDATED_VERSION])
|
|
134
|
-
await invalidateMols(col, true);
|
|
140
|
+
await invalidateMols(col, seqHelper, true);
|
|
135
141
|
const substructureCol: DG.Column<string> = DG.Column.string('helm', 1).init((_i) => substructure);
|
|
136
142
|
substructureCol.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
137
143
|
substructureCol.meta.units = NOTATION.HELM;
|
|
138
144
|
const substructureMolsCol =
|
|
139
|
-
await getMonomericMols(substructureCol, true, col.temp[MONOMERIC_COL_TAGS.MONOMERS_DICT]);
|
|
145
|
+
await getMonomericMols(substructureCol, seqHelper, true, col.temp[MONOMERIC_COL_TAGS.MONOMERS_DICT]);
|
|
140
146
|
const matchesCol = await grok.functions.call('Chem:searchSubstructure', {
|
|
141
147
|
molStringsColumn: col.temp[MONOMERIC_COL_TAGS.MONOMERIC_MOLS],
|
|
142
148
|
molString: substructureMolsCol.get(0),
|
|
@@ -145,12 +151,12 @@ export async function helmSubstructureSearch(substructure: string, col: DG.Colum
|
|
|
145
151
|
return matchesCol.get(0);
|
|
146
152
|
}
|
|
147
153
|
|
|
148
|
-
export async function invalidateMols(col: DG.Column<string>, pattern: boolean) {
|
|
154
|
+
export async function invalidateMols(col: DG.Column<string>, seqHelper: ISeqHelper, pattern: boolean): Promise<void> {
|
|
149
155
|
const progressBar = DG.TaskBarProgressIndicator.create(`Invalidating molfiles for ${col.name}`);
|
|
150
156
|
try {
|
|
151
157
|
await delay(10);
|
|
152
158
|
const monomersDict = new Map();
|
|
153
|
-
const monomericMolsCol = await getMonomericMols(col, pattern, monomersDict);
|
|
159
|
+
const monomericMolsCol = await getMonomericMols(col, seqHelper, pattern, monomersDict);
|
|
154
160
|
col.temp[MONOMERIC_COL_TAGS.MONOMERIC_MOLS] = monomericMolsCol;
|
|
155
161
|
col.temp[MONOMERIC_COL_TAGS.MONOMERS_DICT] = monomersDict;
|
|
156
162
|
col.temp[MONOMERIC_COL_TAGS.LAST_INVALIDATED_VERSION] = col.version;
|
|
@@ -11,7 +11,7 @@ import {Debounces, WebLogoViewer} from '../viewers/web-logo-viewer';
|
|
|
11
11
|
|
|
12
12
|
import {_package} from '../package-test';
|
|
13
13
|
|
|
14
|
-
category('WebLogo
|
|
14
|
+
category('WebLogo.layout', () => {
|
|
15
15
|
test('fasta', async () => {
|
|
16
16
|
const df = await _package.files.readCsv('tests/filter_FASTA.csv');
|
|
17
17
|
const col = df.getCol('fasta');
|
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
import * as grok from 'datagrok-api/grok';
|
|
2
2
|
import * as DG from 'datagrok-api/dg';
|
|
3
3
|
|
|
4
|
-
import {category, expect, expectArray, test, testEvent} from '@datagrok-libraries/utils/src/test';
|
|
4
|
+
import {before, category, expect, expectArray, test, testEvent} from '@datagrok-libraries/utils/src/test';
|
|
5
5
|
import {ALPHABET, NOTATION, TAGS as bioTAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
6
|
-
import {
|
|
6
|
+
import {getSeqHelper, ISeqHelper} from '@datagrok-libraries/bio/src/utils/seq-helper';
|
|
7
|
+
import {GAP_SYMBOL} from '@datagrok-libraries/bio/src/utils/macromolecule/consts';
|
|
7
8
|
|
|
8
9
|
import {
|
|
9
10
|
countForMonomerAtPosition,
|
|
@@ -11,11 +12,16 @@ import {
|
|
|
11
12
|
PositionMonomerInfo as PMI,
|
|
12
13
|
WebLogoViewer,
|
|
13
14
|
} from '../viewers/web-logo-viewer';
|
|
14
|
-
import {GAP_SYMBOL} from '@datagrok-libraries/bio/src/utils/macromolecule/consts';
|
|
15
15
|
|
|
16
16
|
const g: string = GAP_SYMBOL;
|
|
17
17
|
|
|
18
|
-
category('WebLogo
|
|
18
|
+
category('WebLogo.positions', () => {
|
|
19
|
+
let seqHelper: ISeqHelper;
|
|
20
|
+
|
|
21
|
+
before(async () => {
|
|
22
|
+
seqHelper = await getSeqHelper();
|
|
23
|
+
});
|
|
24
|
+
|
|
19
25
|
const csvDf1 = `seq
|
|
20
26
|
ATC-G-TTGC--
|
|
21
27
|
ATC-G-TTGC--
|
|
@@ -182,7 +188,7 @@ ATC-G-TTGC--
|
|
|
182
188
|
}
|
|
183
189
|
|
|
184
190
|
const atPI1: PI = resPosList[1];
|
|
185
|
-
const sh =
|
|
191
|
+
const sh = seqHelper.getSeqHandler(seqCol);
|
|
186
192
|
const countAt1 = countForMonomerAtPosition(df, sh, df.filter, 'G', atPI1);
|
|
187
193
|
expect(countAt1, 5);
|
|
188
194
|
await wlViewer.awaitRendered();
|
|
@@ -13,7 +13,7 @@ import {_package} from '../package-test';
|
|
|
13
13
|
|
|
14
14
|
const PROJECT_PREFIX: string = 'Tests.Bio.WebLogo-project';
|
|
15
15
|
|
|
16
|
-
category('WebLogo
|
|
16
|
+
category('WebLogo.project', () => {
|
|
17
17
|
test('fasta', async () => {
|
|
18
18
|
const prjName = `${PROJECT_PREFIX}.fasta`;
|
|
19
19
|
const df = await _package.files.readCsv('tests/filter_FASTA.csv');
|
|
@@ -2,7 +2,6 @@ import * as DG from 'datagrok-api/dg';
|
|
|
2
2
|
import * as grok from 'datagrok-api/grok';
|
|
3
3
|
|
|
4
4
|
import {expect} from '@datagrok-libraries/utils/src/test';
|
|
5
|
-
import {activityCliffs} from '../package';
|
|
6
5
|
import {MmDistanceFunctionsNames} from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
|
|
7
6
|
import {BitArrayMetrics} from '@datagrok-libraries/ml/src/typed-metrics';
|
|
8
7
|
import {BYPASS_LARGE_DATA_WARNING} from '@datagrok-libraries/ml/src/functionEditors/consts';
|
|
@@ -13,19 +12,17 @@ export async function _testActivityCliffsOpen(df: DG.DataFrame, drMethod: DimRed
|
|
|
13
12
|
similarityMetric: MmDistanceFunctionsNames | BitArrayMetrics, preprocessingFunction: DG.Func,
|
|
14
13
|
): Promise<void> {
|
|
15
14
|
await grok.data.detectSemanticTypes(df);
|
|
16
|
-
const scatterPlot = await activityCliffs
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
// const k = 11;
|
|
28
|
-
|
|
15
|
+
const scatterPlot = (await grok.functions.call('Bio:activityCliffs', {
|
|
16
|
+
table: df,
|
|
17
|
+
molecules: df.getCol(seqColName),
|
|
18
|
+
activities: df.getCol(activityColName),
|
|
19
|
+
similarity: similarityThr,
|
|
20
|
+
methodName: drMethod,
|
|
21
|
+
similarityMetric: similarityMetric,
|
|
22
|
+
preprocessingFunction: preprocessingFunction,
|
|
23
|
+
options: {[`${BYPASS_LARGE_DATA_WARNING}`]: true},
|
|
24
|
+
demo: false,
|
|
25
|
+
})) as DG.Viewer | undefined;
|
|
29
26
|
expect(scatterPlot != null, true);
|
|
30
27
|
|
|
31
28
|
const cliffsLink = Array.from(scatterPlot!.root.children).find((el) => {
|