@datagrok/bio 2.15.2 → 2.15.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. package/CHANGELOG.md +32 -0
  2. package/detectors.js +3 -1
  3. package/dist/284.js +1 -1
  4. package/dist/284.js.map +1 -1
  5. package/dist/455.js +2 -0
  6. package/dist/455.js.map +1 -0
  7. package/dist/980.js +1 -1
  8. package/dist/980.js.map +1 -1
  9. package/dist/package-test.js +3 -3
  10. package/dist/package-test.js.map +1 -1
  11. package/dist/package.js +2 -2
  12. package/dist/package.js.map +1 -1
  13. package/files/tests/to-atomic-level-msa-fasta-output.csv +683 -683
  14. package/files/tests/to-atomic-level-msa-separator-output.csv +104 -104
  15. package/package.json +9 -9
  16. package/src/analysis/sequence-activity-cliffs.ts +3 -1
  17. package/src/calculations/monomerLevelMols.ts +2 -1
  18. package/src/demo/bio03-atomic-level.ts +1 -1
  19. package/src/package-test.ts +6 -1
  20. package/src/package.ts +151 -38
  21. package/src/tests/WebLogo-positions-test.ts +1 -1
  22. package/src/tests/bio-tests.ts +1 -1
  23. package/src/tests/detectors-tests.ts +10 -10
  24. package/src/tests/monomer-libraries-tests.ts +1 -1
  25. package/src/tests/seq-handler-splitted-tests.ts +6 -2
  26. package/src/tests/splitters-test.ts +6 -6
  27. package/src/tests/to-atomic-level-tests.ts +21 -14
  28. package/src/tests/to-atomic-level-ui-tests.ts +75 -35
  29. package/src/tests/utils.ts +2 -2
  30. package/src/utils/cell-renderer-custom.ts +62 -0
  31. package/src/utils/cell-renderer.ts +58 -126
  32. package/src/utils/cyclized.ts +28 -14
  33. package/src/utils/dimerized.ts +0 -2
  34. package/src/utils/helm-to-molfile/converter/converter.ts +75 -54
  35. package/src/utils/helm-to-molfile/converter/monomer-wrapper.ts +2 -2
  36. package/src/utils/helm-to-molfile/converter/polymer.ts +23 -16
  37. package/src/utils/helm-to-molfile/converter/types.ts +0 -10
  38. package/src/utils/helm-to-molfile/utils.ts +10 -7
  39. package/src/utils/monomer-cell-renderer.ts +8 -4
  40. package/src/utils/monomer-lib/lib-manager.ts +2 -2
  41. package/src/utils/monomer-lib/monomer-colors.ts +68 -0
  42. package/src/utils/monomer-lib/monomer-lib-base.ts +165 -0
  43. package/src/utils/monomer-lib/monomer-lib.ts +19 -68
  44. package/src/utils/monomer-lib/web-editor-monomer-dummy.ts +121 -0
  45. package/src/utils/monomer-lib/web-editor-monomer-of-library.ts +102 -0
  46. package/src/utils/save-as-fasta.ts +1 -1
  47. package/src/utils/seq-helper/seq-helper.ts +20 -49
  48. package/src/utils/sequence-to-mol.ts +24 -28
  49. package/src/viewers/web-logo-viewer.ts +2 -1
  50. package/src/widgets/composition-analysis-widget.ts +4 -3
  51. package/src/widgets/representations.ts +8 -10
  52. package/dist/248.js +0 -2
  53. package/dist/248.js.map +0 -1
  54. package/src/utils/cell-renderer-consts.ts +0 -31
package/src/package.ts CHANGED
@@ -4,8 +4,7 @@ import * as ui from 'datagrok-api/ui';
4
4
  import * as DG from 'datagrok-api/dg';
5
5
 
6
6
  import {Options} from '@datagrok-libraries/utils/src/type-declarations';
7
- import {DimReductionBaseEditor, PreprocessFunctionReturnType}
8
- from '@datagrok-libraries/ml/src/functionEditors/dimensionality-reduction-editor';
7
+ import {DimReductionBaseEditor, PreprocessFunctionReturnType} from '@datagrok-libraries/ml/src/functionEditors/dimensionality-reduction-editor';
9
8
  import {getActivityCliffs} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
10
9
  import {MmDistanceFunctionsNames} from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
11
10
  import {BitArrayMetrics, KnownMetrics} from '@datagrok-libraries/ml/src/typed-metrics';
@@ -14,50 +13,38 @@ import {SeqHandler, SeqTemps} from '@datagrok-libraries/bio/src/utils/seq-handle
14
13
  import {IMonomerLib, IMonomerSet} from '@datagrok-libraries/bio/src/types';
15
14
  import {SeqPalette} from '@datagrok-libraries/bio/src/seq-palettes';
16
15
  import {FastaFileHandler} from '@datagrok-libraries/bio/src/utils/fasta-handler';
17
- import {_toAtomicLevel} from '@datagrok-libraries/bio/src/monomer-works/to-atomic-level';
18
- import {SCORE, calculateScores} from '@datagrok-libraries/bio/src/utils/macromolecule/scoring';
19
- import {
20
- createJsonMonomerLibFromSdf, IMonomerLibHelper
21
- } from '@datagrok-libraries/bio/src/monomer-works/monomer-utils';
16
+ import {SCORE} from '@datagrok-libraries/bio/src/utils/macromolecule/scoring';
17
+ import {createJsonMonomerLibFromSdf, IMonomerLibHelper} from '@datagrok-libraries/bio/src/monomer-works/monomer-utils';
22
18
  import {errInfo} from '@datagrok-libraries/bio/src/utils/err-info';
23
19
  import {ActivityCliffsEditor} from '@datagrok-libraries/ml/src/functionEditors/activity-cliffs-function-editor';
24
20
  import BitArray from '@datagrok-libraries/utils/src/bit-array';
25
21
  import {BYPASS_LARGE_DATA_WARNING} from '@datagrok-libraries/ml/src/functionEditors/consts';
26
- import {
27
- getEmbeddingColsNames, multiColReduceDimensionality
28
- } from '@datagrok-libraries/ml/src/multi-column-dimensionality-reduction/reduce-dimensionality';
22
+ import {getEmbeddingColsNames, multiColReduceDimensionality} from '@datagrok-libraries/ml/src/multi-column-dimensionality-reduction/reduce-dimensionality';
29
23
  import {DimReductionMethods} from '@datagrok-libraries/ml/src/multi-column-dimensionality-reduction/types';
30
- import {
31
- ITSNEOptions, IUMAPOptions
32
- } from '@datagrok-libraries/ml/src/multi-column-dimensionality-reduction/multi-column-dim-reducer';
24
+ import {ITSNEOptions, IUMAPOptions} from '@datagrok-libraries/ml/src/multi-column-dimensionality-reduction/multi-column-dim-reducer';
33
25
  import {generateLongSequence, generateLongSequence2} from '@datagrok-libraries/bio/src/utils/generator';
34
26
  import {getUserLibSettings, setUserLibSettings} from '@datagrok-libraries/bio/src/monomer-works/lib-settings';
35
27
  import {ISeqHelper} from '@datagrok-libraries/bio/src/utils/seq-helper';
36
28
  import {RDModule} from '@datagrok-libraries/chem-meta/src/rdkit-api';
37
29
  import {getRdKitModule} from '@datagrok-libraries/bio/src/chem/rdkit-module';
38
- import {getHelmHelper, IHelmHelper} from '@datagrok-libraries/bio/src/helm/helm-helper';
39
30
 
40
31
  import {getMacromoleculeColumns} from './utils/ui-utils';
41
- import {
42
- MacromoleculeDifferenceCellRenderer, MacromoleculeSequenceCellRenderer,
43
- } from './utils/cell-renderer';
32
+ import {MacromoleculeDifferenceCellRenderer, MacromoleculeSequenceCellRenderer,} from './utils/cell-renderer';
33
+ import {MacromoleculeCustomCellRenderer} from './utils/cell-renderer-custom';
44
34
  import {VdRegionsViewer} from './viewers/vd-regions-viewer';
45
35
  import {SequenceAlignment} from './seq_align';
46
36
  import {getEncodedSeqSpaceCol} from './analysis/sequence-space';
47
- import {
48
- createLinesGrid, createPropPanelElement, createTooltipElement,
49
- } from './analysis/sequence-activity-cliffs';
37
+ import {createLinesGrid, createPropPanelElement, createTooltipElement,} from './analysis/sequence-activity-cliffs';
50
38
  import {SequenceSimilarityViewer} from './analysis/sequence-similarity-viewer';
51
39
  import {SequenceDiversityViewer} from './analysis/sequence-diversity-viewer';
52
- import {MONOMERIC_COL_TAGS, SubstructureSearchDialog, invalidateMols} from './substructure-search/substructure-search';
40
+ import {invalidateMols, MONOMERIC_COL_TAGS, SubstructureSearchDialog} from './substructure-search/substructure-search';
53
41
  import {convert} from './utils/convert';
54
42
  import {getMacromoleculeColumnPropertyPanel} from './widgets/representations';
55
43
  import {saveAsFastaUI} from './utils/save-as-fasta';
56
44
  import {BioSubstructureFilter} from './widgets/bio-substructure-filter';
57
45
  import {WebLogoViewer} from './viewers/web-logo-viewer';
58
46
  import {MonomerLibManager} from './utils/monomer-lib/lib-manager';
59
- import {getMonomerLibraryManagerLink, showManageLibrariesDialog,
60
- showManageLibrariesView} from './utils/monomer-lib/library-file-manager/ui';
47
+ import {getMonomerLibraryManagerLink, showManageLibrariesDialog, showManageLibrariesView} from './utils/monomer-lib/library-file-manager/ui';
61
48
  import {demoBio01UI} from './demo/bio01-similarity-diversity';
62
49
  import {demoBio01aUI} from './demo/bio01a-hierarchical-clustering-and-sequence-space';
63
50
  import {demoBio01bUI} from './demo/bio01b-hierarchical-clustering-and-activity-cliffs';
@@ -115,8 +102,17 @@ export class SeqPaletteCustom implements SeqPalette {
115
102
  let monomerLib: IMonomerLib | null = null;
116
103
  let monomerSets: IMonomerSet | null = null;
117
104
 
105
+ let initBioPromise: Promise<void> | null = null;
106
+
118
107
  //tags: init
119
- export async function initBio() {
108
+ export async function initBio(): Promise<void> {
109
+ if (initBioPromise === null) {
110
+ initBioPromise = initBioInt();
111
+ }
112
+ await initBioPromise;
113
+ }
114
+
115
+ async function initBioInt() {
120
116
  const logPrefix = 'Bio: _package.initBio()';
121
117
  _package.logger.debug(`${logPrefix}, start`);
122
118
  let rdKitModule!: RDModule;
@@ -195,7 +191,7 @@ export function getBioLib(): IMonomerLib {
195
191
  }
196
192
 
197
193
  // For sync internal use, on initialized package
198
- export function getMonomerLib(): IMonomerLib | null {
194
+ export function getMonomerLib(): IMonomerLib {
199
195
  return monomerLib!;
200
196
  }
201
197
 
@@ -326,6 +322,15 @@ export function SeqActivityCliffsEditor(call: DG.FuncCall) {
326
322
 
327
323
  // -- Cell renderers --
328
324
 
325
+ //name: customSequenceCellRenderer
326
+ //tags: cellRenderer
327
+ //meta.cellType: sequence
328
+ //meta.columnTags: quality=Macromolecule, units=custom
329
+ //output: grid_cell_renderer result
330
+ export function customSequenceCellRenderer(): DG.GridCellRenderer {
331
+ return new MacromoleculeCustomCellRenderer();
332
+ }
333
+
329
334
  //name: fastaSequenceCellRenderer
330
335
  //tags: cellRenderer
331
336
  //meta.cellType: sequence
@@ -335,6 +340,15 @@ export function fastaSequenceCellRenderer(): MacromoleculeSequenceCellRenderer {
335
340
  return new MacromoleculeSequenceCellRenderer();
336
341
  }
337
342
 
343
+ //name: separatorSequenceCellRenderer
344
+ //tags: cellRenderer
345
+ //meta.cellType: sequence
346
+ //meta.columnTags: quality=Macromolecule, units=separator
347
+ //output: grid_cell_renderer result
348
+ export function separatorSequenceCellRenderer(): MacromoleculeSequenceCellRenderer {
349
+ return new MacromoleculeSequenceCellRenderer();
350
+ }
351
+
338
352
  // -- Property panels --
339
353
 
340
354
  //name: Bioinformatics | Sequence Renderer
@@ -353,15 +367,6 @@ export function compositionAnalysisWidget(sequence: DG.SemanticValue): DG.Widget
353
367
  return getCompositionAnalysisWidget(sequence);
354
368
  }
355
369
 
356
- //name: separatorSequenceCellRenderer
357
- //tags: cellRenderer
358
- //meta.cellType: sequence
359
- //meta.columnTags: quality=Macromolecule, units=separator
360
- //output: grid_cell_renderer result
361
- export function separatorSequenceCellRenderer(): MacromoleculeSequenceCellRenderer {
362
- return new MacromoleculeSequenceCellRenderer();
363
- }
364
-
365
370
  //name: MacromoleculeDifferenceCellRenderer
366
371
  //tags: cellRenderer
367
372
  //meta.cellType: MacromoleculeDifference
@@ -621,13 +626,17 @@ export async function sequenceSpaceTopMenu(table: DG.DataFrame, molecules: DG.Co
621
626
  //description: Converts sequences to molblocks
622
627
  //input: dataframe table [Input data table]
623
628
  //input: column seqCol {caption: Sequence; semType: Macromolecule}
624
- //input: bool nonlinear =false {caption: Non linear; description: Slower mode for cycling/branching HELM structures}
629
+ //input: bool nonlinear =false {caption: Non-linear; description: Slower mode for cycling/branching HELM structures}
630
+ //input: bool highlight =false {caption: Highlight monomers; description: Highlight monomers' substructures of the molecule }
625
631
  //output:
626
- export async function toAtomicLevel(table: DG.DataFrame, seqCol: DG.Column, nonlinear: boolean): Promise<void> {
632
+ export async function toAtomicLevel(
633
+ table: DG.DataFrame, seqCol: DG.Column, nonlinear: boolean, highlight: boolean = false
634
+ ): Promise<void> {
627
635
  const pi = DG.TaskBarProgressIndicator.create('Converting to atomic level ...');
628
636
  try {
637
+ await initBioPromise;
629
638
  const monomerLib = (await getMonomerLibHelper()).getMonomerLib();
630
- await sequenceToMolfile(table, seqCol, nonlinear, monomerLib);
639
+ await sequenceToMolfile(table, seqCol, nonlinear, highlight, monomerLib, _package.rdKitModule);
631
640
  } finally {
632
641
  pi.close();
633
642
  }
@@ -751,6 +760,31 @@ export function convertDialog() {
751
760
  convert(col);
752
761
  }
753
762
 
763
+ //top-menu: Bio | Convert | TestConvert
764
+ //name: convertSeqNotation
765
+ //description: RDKit-based conversion for SMILES, SMARTS, InChi, Molfile V2000 and Molfile V3000
766
+ //input: string sequence {semType: Macromolecule}
767
+ //input: string targetNotation
768
+ //input: string separator
769
+ //output: string result
770
+ export async function convertSeqNotation(sequence: string, targetNotation: NOTATION, separator?: string): Promise<string | undefined | null> {
771
+ try {
772
+ const col = DG.Column.fromStrings('sequence', [sequence]);
773
+ const df = DG.DataFrame.fromColumns([col]);
774
+ const semType = await grok.functions.call('Bio:detectMacromolecule', {col: col});
775
+ if (semType)
776
+ col.semType = semType;
777
+ const converterSh = SeqHandler.forColumn(col);
778
+ const newColumn = converterSh.convert(targetNotation, separator);
779
+ return newColumn.get(0);
780
+ } catch (err: any) {
781
+ const [errMsg, errStack] = errInfo(err);
782
+ _package.logger.error(errMsg, undefined, errStack);
783
+ throw err;
784
+ }
785
+ }
786
+
787
+
754
788
  //name: monomerCellRenderer
755
789
  //tags: cellRenderer
756
790
  //meta.cellType: Monomer
@@ -1098,6 +1132,76 @@ export async function sdfToJsonLib(table: DG.DataFrame) {
1098
1132
 
1099
1133
  // -- Utils --
1100
1134
 
1135
+ //name: seq2atomic
1136
+ //friendlyName: seq2atomic
1137
+ //description: Converts a `Macromolecule` sequence to its atomic level `Molecule` representation
1138
+ //input: string seq { semType: Macromolecule }
1139
+ //input: bool nonlinear
1140
+ //output: string molfile { semType: Molecule }
1141
+ //meta.role: converter
1142
+ export async function seq2atomic(seq: string, nonlinear: boolean): Promise<string | undefined> {
1143
+ if (!(seq.trim())) return '';
1144
+ try {
1145
+ const seqCol = DG.Column.fromList(DG.COLUMN_TYPE.STRING, `helm`, [seq]);
1146
+ const df = DG.DataFrame.fromColumns([seqCol]);
1147
+ const semType = await grok.functions.call('Bio:detectMacromolecule', {col: seqCol});
1148
+ if (semType) seqCol.semType = semType;
1149
+
1150
+ const monomerLib = (await getMonomerLibHelper()).getMonomerLib();
1151
+ const res = (await sequenceToMolfile(df, seqCol, nonlinear, false, monomerLib, _package.rdKitModule))?.molCol?.get(0);
1152
+ return res ?? undefined;
1153
+ } catch (err: any) {
1154
+ const [errMsg, errStack] = errInfo(err);
1155
+ _package.logger.error(errMsg, undefined, errStack);
1156
+ throw err;
1157
+ }
1158
+ }
1159
+
1160
+ // //description: Gets similarity to a reference sequence
1161
+ // //input: string seq { semType: Macromolecule }
1162
+ // //input: string ref { semType: Macromolecule }
1163
+ // //output: double result
1164
+ // export async function seqSimilarity(seq: string, ref: string): Promise<number> {
1165
+ // // if (!(seq.trim())) return null;
1166
+ // try {
1167
+ // const seqCol = DG.Column.fromList(DG.COLUMN_TYPE.STRING, `seq`, [seq]);
1168
+ // const df = DG.DataFrame.fromColumns([seqCol]);
1169
+ // const semType = await grok.functions.call('Bio:detectMacromolecule', {col: seqCol});
1170
+ // if (semType) seqCol.semType = semType;
1171
+ //
1172
+ // const resCol = await calculateScoresWithEmptyValues(df, seqCol, ref, SCORE.SIMILARITY);
1173
+ // return resCol.get(0)!;
1174
+ // } catch (err: any) {
1175
+ // const [errMsg, errStack] = errInfo(err);
1176
+ // _package.logger.error(errMsg, undefined, errStack);
1177
+ // throw err;
1178
+ // }
1179
+ // }
1180
+
1181
+ //name: seqIdentity
1182
+ //friendlyName: seqIdentity
1183
+ //description: Gets identity to a reference sequence
1184
+ //input: string seq { semType: Macromolecule }
1185
+ //input: string ref { semType: Macromolecule }
1186
+ //output: double result
1187
+ export async function seqIdentity(seq: string, ref: string): Promise<number | null> {
1188
+ if (!(seq.trim())) return null;
1189
+ try {
1190
+ const seqCol = DG.Column.fromList(DG.COLUMN_TYPE.STRING, `seq`, [seq]);
1191
+ const df = DG.DataFrame.fromColumns([seqCol]);
1192
+ const semType = await grok.functions.call('Bio:detectMacromolecule', {col: seqCol});
1193
+ if (!semType) throw new Error('Macromolecule required');
1194
+
1195
+ const resCol = await calculateScoresWithEmptyValues(df, seqCol, ref, SCORE.IDENTITY);
1196
+ return resCol.get(0);
1197
+ } catch (err: any) {
1198
+ const [errMsg, errStack] = errInfo(err);
1199
+ _package.logger.error(errMsg, undefined, errStack);
1200
+ throw err;
1201
+ }
1202
+ }
1203
+
1204
+
1101
1205
  //name: detectMacromoleculeProbe
1102
1206
  //input: file file
1103
1207
  //input: string colName = ''
@@ -1121,7 +1225,7 @@ export async function getSeqHelper(): Promise<ISeqHelper> {
1121
1225
  export function getMolFromHelm(
1122
1226
  df: DG.DataFrame, helmCol: DG.Column<string>, chiralityEngine: boolean
1123
1227
  ): Promise<DG.Column<string>> {
1124
- return getMolColumnFromHelm(df, helmCol, chiralityEngine);
1228
+ return getMolColumnFromHelm(df, helmCol, chiralityEngine, getMonomerLib());
1125
1229
  }
1126
1230
 
1127
1231
  // -- Custom notation providers --
@@ -1130,6 +1234,7 @@ export function getMolFromHelm(
1130
1234
  //input: column col
1131
1235
  //input: string separator
1132
1236
  export function applyNotationProviderForCyclized(col: DG.Column<string>, separator: string) {
1237
+ col.meta.units = NOTATION.CUSTOM;
1133
1238
  col.temp[SeqTemps.notationProvider] = new CyclizedNotationProvider(separator);
1134
1239
  }
1135
1240
 
@@ -1137,5 +1242,13 @@ export function applyNotationProviderForCyclized(col: DG.Column<string>, separat
1137
1242
  //input: column col
1138
1243
  //input: string separator
1139
1244
  export function applyNotationProviderForDimerized(col: DG.Column<string>, separator: string) {
1245
+ col.meta.units = NOTATION.CUSTOM;
1140
1246
  col.temp[SeqTemps.notationProvider] = new DimerizedNotationProvider(separator);
1141
1247
  }
1248
+
1249
+ //name: test1
1250
+ //output: object result
1251
+ export function test1(): any {
1252
+ _package.logger.debug('Bio:test1() function');
1253
+ return {value: 'value1'};
1254
+ }
@@ -4,7 +4,6 @@ import * as DG from 'datagrok-api/dg';
4
4
  import {category, expect, expectArray, test, testEvent} from '@datagrok-libraries/utils/src/test';
5
5
  import {ALPHABET, NOTATION, TAGS as bioTAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
6
6
  import {SeqHandler} from '@datagrok-libraries/bio/src/utils/seq-handler';
7
- import {GAP_SYMBOL} from '@datagrok-libraries/bio/src/utils/macromolecule/types';
8
7
 
9
8
  import {
10
9
  countForMonomerAtPosition,
@@ -12,6 +11,7 @@ import {
12
11
  PositionMonomerInfo as PMI,
13
12
  WebLogoViewer,
14
13
  } from '../viewers/web-logo-viewer';
14
+ import {GAP_SYMBOL} from '@datagrok-libraries/bio/src/utils/macromolecule/consts';
15
15
 
16
16
  const g: string = GAP_SYMBOL;
17
17
 
@@ -10,7 +10,7 @@ import {Nucleotides, NucleotidesPalettes} from '@datagrok-libraries/bio/src/nucl
10
10
  import {AminoacidsPalettes} from '@datagrok-libraries/bio/src/aminoacids';
11
11
  import {UnknownSeqPalette} from '@datagrok-libraries/bio/src/unknown';
12
12
  import {getStatsForCol} from '@datagrok-libraries/bio/src/utils/macromolecule/utils';
13
- import {GAP_SYMBOL} from '@datagrok-libraries/bio/src/utils/macromolecule/types';
13
+ import {GAP_SYMBOL} from '@datagrok-libraries/bio/src/utils/macromolecule/consts';
14
14
 
15
15
  /** GAP_SYMBOL */
16
16
  const g: string = GAP_SYMBOL;
@@ -147,16 +147,16 @@ YN[Re]VYNR[Ac]WYV
147
147
  },
148
148
  'sepSameLength': {
149
149
  csv: `seq
150
- Ac(1)-A-A-A-A-A-A-A-A-A-A-A-A-A-C(1)-G-NH2
151
- Ac(1)-A-A-A-A-A-A-A-A-A-A-A-A-A-C(1)-G-NH2
152
- Ac(1)-A-A-A-A-A-A-A-A-A-A-A-A-A-C(1)-G-NH2`,
150
+ Aca-A-A-A-A-A-A-A-A-A-A-A-A-A-C-G-NH2
151
+ Aca-A-A-A-A-A-A-A-A-A-A-A-A-A-C-G-NH2
152
+ Aca-A-A-A-A-A-A-A-A-A-A-A-A-A-C-G-NH2`,
153
153
  pos: {'seq': new PosCol(NOTATION.SEPARATOR, ALIGNMENT.SEQ_MSA, ALPHABET.UN, 5, true, '-')}
154
154
  },
155
155
  'sepMsaSameLength': {
156
156
  csv: `seq
157
- Ac(1)-A-A-A-A-A-A-A-A-A-A-A-A-A-C(1)-G-NH2
158
- Ac(1)-A-A(2)-A-A-A-C(2)-A-A-A-A-C(1)-G-NH2
159
- Ac(1)-A-A-A-A-A-A-A-A-A-A-A-A-A-C(1)-G-NH2`,
157
+ Aca-A-A-A-A-A-A-A-A-A-A-A-A-A-Aca-G-NH2
158
+ Aca-A-Aca-A-A-A-meI-A-A-A-A-A-Aca-G-NH2
159
+ Aca-A-A-A-A-A-A-A-A-A-A-A-A-A-Aca-G-NH2`,
160
160
  pos: {'seq': new PosCol(NOTATION.SEPARATOR, ALIGNMENT.SEQ, ALPHABET.UN, 5, true, '-')}
161
161
  },
162
162
  'helmSameLength': {
@@ -237,9 +237,9 @@ m1-mon2-m3-mon4-mon5-Num--MON8-N9-m1-mon2-m3-mon4-mon5-Num--MON8-N9
237
237
 
238
238
  mon1-M-mon3-mon4-mon5---MON8-N9-mon1-M-mon3-mon4-mon5---MON8-N9`;
239
239
  [csvTests.sepComplex]: string = `seq
240
- Ac(1)-F-K(AEEA-AEEA-R-Ac)-L-mF-V-Y-mNle-D-W-N-mF-C(1)-G-NH2
241
- Ac(1)-F-K(AEEA-ARRA-W-Ac)-L-mF-V-Y-mNle-D-W-N-mF-C(1)-G-NH2
242
- Ac(1)-F-K(AEEA-AEEA-Ac)-L-mF-V-Y-mNle-D-W-N-mF-C(1)-G-NH2`;
240
+ Aca-F-K(AEEA-AEEA-R-Ac)-L-mF-V-Y-mNle-D-W-N-mF-Aca-G-NH2
241
+ Aca-F-K(AEEA-ARRA-W-Ac)-L-mF-V-Y-mNle-D-W-N-mF-Aca-G-NH2
242
+ Aca-F-K(AEEA-AEEA-Ac)-L-mF-V-Y-mNle-D-W-N-mF-Aca-G-NH2`;
243
243
  [csvTests.fastaMsaDna1]: string = `seq
244
244
  AC-GT-CTAC-GT-CT
245
245
  CAC-T-GTCAC-T-GT
@@ -406,7 +406,7 @@ MWRSWY-CKHPMWRSWY-CKHP`;
406
406
 
407
407
  test('SepComplex', async () => {
408
408
  await _testPos(readCsv(csvTests.sepComplex), 'seq',
409
- NOTATION.SEPARATOR, ALIGNMENT.SEQ, ALPHABET.UN, 18, true);
409
+ NOTATION.SEPARATOR, ALIGNMENT.SEQ, ALPHABET.UN, 17, true);
410
410
  });
411
411
 
412
412
  test('samplesFastaCsv', async () => {
@@ -27,7 +27,7 @@ category('monomerLibraries', () => {
27
27
  test('default', async () => {
28
28
  // Clear settings to test default
29
29
  await setUserLibSettings({exclude: [], explicit: [], duplicateMonomerPreferences: {}});
30
- await monomerLibHelper.loadLibraries(true); // test defaultLib
30
+ await monomerLibHelper.loadMonomerLib(true); // test defaultLib
31
31
 
32
32
  // Currently default monomer lib set is of all files at LIB_PATH (at least HELMCoreLibrary.json)
33
33
  const currentMonomerLib = monomerLibHelper.getMonomerLib();
@@ -4,8 +4,9 @@ import * as DG from 'datagrok-api/dg';
4
4
  import wu from 'wu';
5
5
 
6
6
  import {category, expect, expectArray, test} from '@datagrok-libraries/utils/src/test';
7
- import {GapOriginals, SeqHandler} from '@datagrok-libraries/bio/src/utils/seq-handler';
7
+ import {SeqHandler} from '@datagrok-libraries/bio/src/utils/seq-handler';
8
8
  import {NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecule';
9
+ import {GapOriginals} from '@datagrok-libraries/bio/src/utils/macromolecule/consts';
9
10
 
10
11
  enum Tests {
11
12
  fasta = 'fasta',
@@ -135,7 +136,10 @@ PEPTIDE1{meI.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.T.dK.Thr_PO3H2}$$$$`
135
136
  expect(sh.separator === testData.tgt.separator, true);
136
137
 
137
138
  const resSplitted: string[][] = wu.count(0).take(sh.length)
138
- .map((rowIdx) => wu(sh.getSplitted(rowIdx).originals).toArray()).toArray();
139
+ .map((rowIdx) => {
140
+ const seqSS = sh.getSplitted(rowIdx);
141
+ return wu.count(0).take(seqSS.length).map((posIdx) => seqSS.getOriginal(posIdx)).toArray();
142
+ }).toArray();
139
143
  expectArray(resSplitted, testData.tgt.splitted);
140
144
  });
141
145
  }
@@ -144,13 +144,13 @@ PEPTIDE1{hHis.Aca.Cys_SEt}$$$,5.72388
144
144
  });
145
145
 
146
146
  export async function _testFastaSplitter(src: string, tgt: string[]) {
147
- const res: ISeqSplitted = splitterAsFasta(src);
148
- console.debug(`Bio: tests: splitters: src=${JSON.stringify(src)}, res=${JSON.stringify(res)} .`);
149
- expectArray(wu(res.originals).toArray(), tgt);
147
+ const resSS: ISeqSplitted = splitterAsFasta(src);
148
+ console.debug(`Bio: tests: splitters: src=${JSON.stringify(src)}, res=${JSON.stringify(resSS)} .`);
149
+ expectArray(wu.count(0).take(resSS.length).map((p) => resSS.getOriginal(p)).toArray(), tgt);
150
150
  }
151
151
 
152
152
  export async function _testHelmSplitter(src: string, tgt: string[]) {
153
- const res: ISeqSplitted = splitterAsHelm(src);
154
- console.debug(`Bio: tests: splitters: src=${JSON.stringify(src)}, res=${JSON.stringify(res)} .`);
155
- expectArray(wu(res.originals).toArray(), tgt);
153
+ const resSS: ISeqSplitted = splitterAsHelm(src);
154
+ console.debug(`Bio: tests: splitters: src=${JSON.stringify(src)}, res=${JSON.stringify(resSS)} .`);
155
+ expectArray(wu.count(0).take(resSS.length).map((p) => resSS.getOriginal(p)).toArray(), tgt);
156
156
  }
@@ -6,6 +6,7 @@ import * as DG from 'datagrok-api/dg';
6
6
  import wu from 'wu';
7
7
 
8
8
  import {before, after, category, test, expectArray, expect} from '@datagrok-libraries/utils/src/test';
9
+ import {RDModule} from '@datagrok-libraries/chem-meta/src/rdkit-api';
9
10
  import {_toAtomicLevel} from '@datagrok-libraries/bio/src/monomer-works/to-atomic-level';
10
11
  import {IMonomerLib} from '@datagrok-libraries/bio/src/types';
11
12
  import {ALPHABET, NOTATION, TAGS as bioTAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
@@ -15,10 +16,9 @@ import {
15
16
  } from '@datagrok-libraries/bio/src/monomer-works/lib-settings';
16
17
  import {UserLibSettings} from '@datagrok-libraries/bio/src/monomer-works/types';
17
18
  import {SeqHandler} from '@datagrok-libraries/bio/src/utils/seq-handler';
19
+ import {getRdKitModule} from '@datagrok-libraries/bio/src/chem/rdkit-module';
18
20
 
19
- import {toAtomicLevel} from '../package';
20
21
  import {_package} from '../package-test';
21
- import {getRdKitModule} from '@datagrok-libraries/bio/src/chem/rdkit-module';
22
22
 
23
23
  const appPath = 'System:AppData/Bio';
24
24
  const fileSource = new DG.FileSource(appPath);
@@ -60,13 +60,20 @@ category('toAtomicLevel', async () => {
60
60
  /** Backup actual user's monomer libraries settings */
61
61
  let userLibSettings: UserLibSettings;
62
62
 
63
+ let monomerLib: IMonomerLib;
64
+ let rdKitModule: RDModule;
65
+
63
66
  before(async () => {
67
+ rdKitModule = await getRdKitModule();
64
68
  monomerLibHelper = await getMonomerLibHelper();
65
69
  userLibSettings = await getUserLibSettings();
66
70
  // Clear settings to test default
67
71
  await setUserLibSettingsForTests();
72
+ await monomerLibHelper.awaitLoaded();
68
73
  await monomerLibHelper.loadMonomerLib(true);
69
74
 
75
+ monomerLib = monomerLibHelper.getMonomerLib();
76
+
70
77
  for (const [testName, testData] of Object.entries(TestsData)) {
71
78
  const inputPath = testData.inPath;
72
79
 
@@ -83,7 +90,8 @@ category('toAtomicLevel', async () => {
83
90
 
84
91
  async function getTestResult(source: DG.DataFrame, target: DG.DataFrame): Promise<void> {
85
92
  const inputCol = source.getCol(inputColName);
86
- await toAtomicLevel(source, inputCol, false);
93
+ // await toAtomicLevel(source, inputCol, false);
94
+ await grok.functions.call('Bio:toAtomicLevel', {table: source, seqCol: inputCol, nonlinear: false});
87
95
  const obtainedCol = source.getCol(outputColName);
88
96
  const expectedCol = target.getCol(outputColName);
89
97
  const obtainedArray: string[] = wu(obtainedCol.values()).map((mol) => polishMolfile(mol)).toArray();
@@ -212,19 +220,18 @@ PEPTIDE1{Lys_Boc.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.Aca.Tyr_PO3H2.Thr_PO3H2.Aca.Tyr
212
220
  const resCol = (await _testToAtomicLevel(srcDf, 'seq', monomerLibHelper))!;
213
221
  expect(polishMolfile(resCol.get(0)), polishMolfile(tgtMol));
214
222
  });
223
+
224
+ async function _testToAtomicLevel(
225
+ df: DG.DataFrame, seqColName: string = 'seq', monomerLibHelper: IMonomerLibHelper
226
+ ): Promise<DG.Column | null> {
227
+ const seqCol: DG.Column<string> = df.getCol(seqColName);
228
+ const res = await _toAtomicLevel(df, seqCol, monomerLib, rdKitModule);
229
+ if (res.warnings.length > 0)
230
+ _package.logger.warning(`_toAtomicLevel() warnings ${res.warnings.join('\n')}`);
231
+ return res.molCol;
232
+ }
215
233
  });
216
234
 
217
- async function _testToAtomicLevel(
218
- df: DG.DataFrame, seqColName: string = 'seq', monomerLibHelper: IMonomerLibHelper
219
- ): Promise<DG.Column | null> {
220
- const rdKitModule = await getRdKitModule();
221
- const seqCol: DG.Column<string> = df.getCol(seqColName);
222
- const monomerLib: IMonomerLib = monomerLibHelper.getMonomerLib();
223
- const res = await _toAtomicLevel(df, seqCol, monomerLib);
224
- if (res.warnings.length > 0)
225
- _package.logger.warning(`_toAtomicLevel() warnings ${res.warnings.join('\n')}`);
226
- return res.col;
227
- }
228
235
 
229
236
  function polishMolfile(mol: string): string {
230
237
  return mol.replaceAll('\r\n', '\n')
@@ -4,28 +4,38 @@ import * as grok from 'datagrok-api/grok';
4
4
  import {after, before, category, expect, expectArray, test} from '@datagrok-libraries/utils/src/test';
5
5
  import {IMonomerLib} from '@datagrok-libraries/bio/src/types';
6
6
  import {sequenceToMolfile} from '../utils/sequence-to-mol';
7
- import {getHelmHelper, IHelmHelper} from '@datagrok-libraries/bio/src/helm/helm-helper';
8
7
  import {getMonomerLibHelper, IMonomerLibHelper} from '@datagrok-libraries/bio/src/monomer-works/monomer-utils';
9
8
  import {getUserLibSettings, setUserLibSettings, setUserLibSettingsForTests} from '@datagrok-libraries/bio/src/monomer-works/lib-settings';
10
9
  import {UserLibSettings} from '@datagrok-libraries/bio/src/monomer-works/types';
11
10
 
12
- import {ConverterFunc} from './types';
13
- import {_package} from '../package';
11
+ import {NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecule';
12
+ import {getRdKitModule} from '@datagrok-libraries/bio/src/chem/rdkit-module';
13
+ import {RDModule} from '@datagrok-libraries/chem-meta/src/rdkit-api';
14
+
15
+ type TestDataTargetType = { atomCount: number, bondCount: number };
16
+ type TestDataType = {
17
+ src: { seq: string, units: NOTATION },
18
+ tgt: TestDataTargetType,
19
+ };
14
20
 
15
21
  category('toAtomicLevel-ui', () => {
16
22
 
17
23
  let monomerLibHelper: IMonomerLibHelper;
18
24
  let userLibSettings: UserLibSettings;
19
- let helmHelper: IHelmHelper;
25
+ let monomerLib: IMonomerLib;
26
+ let rdKitModule: RDModule;
20
27
 
21
28
  before(async () => {
22
- helmHelper = await getHelmHelper(); // init Helm package
29
+ rdKitModule = await getRdKitModule();
23
30
  monomerLibHelper = await getMonomerLibHelper();
24
31
  userLibSettings = await getUserLibSettings();
25
32
 
26
33
  // Test 'helm' requires default monomer library loaded
27
34
  await setUserLibSettingsForTests();
35
+ await monomerLibHelper.awaitLoaded();
28
36
  await monomerLibHelper.loadMonomerLib(true); // load default libraries
37
+
38
+ monomerLib = monomerLibHelper.getMonomerLib();
29
39
  });
30
40
 
31
41
  after(async () => {
@@ -33,42 +43,72 @@ category('toAtomicLevel-ui', () => {
33
43
  await setUserLibSettings(userLibSettings);
34
44
  await monomerLibHelper.loadMonomerLib(true); // load user settings libraries
35
45
  });
36
- const fastaCsv = `seq
37
- MDYKETLLMPKTDFPMRGGLPNKEPQIQEKW
38
- MIEVFLFGIVLGLIPITLAGLFVTAYLQYRRGDQLDL
39
- MMELVLKTIIGPIVVGVVLRIVDKWLNKDK
40
- `;
41
- const helmCsv = `seq
42
- PEPTIDE1{meI.hHis.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Aze.dV.E.N.dV.Phe_4Me}$$$$
43
- PEPTIDE1{meI.hHis.Aca.N.T.dK.Thr_PO3H2.Aca.meM.D-Chg.dV.E.N.D-Orn.D-aThr.Phe_4Me}$$$$
44
- PEPTIDE1{meI.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.D-Cit.N.D-Orn.D-aThr.Phe_4Me}$$$$
45
- `;
46
46
 
47
- test('toAtomicLevel-fasta-linear', async () => {
48
- const df = DG.DataFrame.fromCsv(fastaCsv);
49
- await grok.data.detectSemanticTypes(df);
50
- const seqCol = df.getCol('seq');
51
- await _testToAtomicLevelFunc(df, seqCol, false);
52
- });
47
+ const tests: { [testName: string]: TestDataType } = {
48
+ 'fasta': {
49
+ src: {seq: 'MDYKETLLMPK', units: NOTATION.FASTA,},
50
+ tgt: {atomCount: 94, bondCount: 95,},
51
+ },
52
+ 'fasta-with-gap': {
53
+ src: {seq: 'MD-YKETLLMPK', units: NOTATION.FASTA,},
54
+ tgt: {atomCount: 94, bondCount: 95,},
55
+ },
56
+ 'helm': {
57
+ src: {seq: 'PEPTIDE1{meI.hHis.Aca.N.T.dK.Thr_PO3H2}$$$$', units: NOTATION.HELM,},
58
+ tgt: {atomCount: 68, bondCount: 68,},
59
+ },
60
+ 'helm-with-gap': {
61
+ src: {seq: 'PEPTIDE1{meI.hHis.*.Aca.N.T.dK.Thr_PO3H2}$$$$', units: NOTATION.HELM,},
62
+ tgt: {atomCount: 68, bondCount: 68,},
63
+ },
64
+ };
53
65
 
54
- test('toAtomicLevel-fasta-nonlinear', async () => {
55
- const df = DG.DataFrame.fromCsv(fastaCsv);
56
- await grok.data.detectSemanticTypes(df);
57
- const seqCol = df.getCol('seq');
58
- await _testToAtomicLevelFunc(df, seqCol, true);
59
- });
66
+ // const fastaCsv = `seq
67
+ // MDYKETLLMPKTDFPMRGGLPNKEPQIQEKW
68
+ // MIEVFLFGIVLGLIPITLAGLFVTAYLQYRRGDQLDL
69
+ // MMELVLKTIIGPIVVGVVLRIVDKWLNKDK
70
+ // `;
71
+ // const helmCsv = `seq
72
+ // PEPTIDE1{meI.hHis.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Aze.dV.E.N.dV.Phe_4Me}$$$$
73
+ // PEPTIDE1{meI.hHis.Aca.N.T.dK.Thr_PO3H2.Aca.meM.D-Chg.dV.E.N.D-Orn.D-aThr.Phe_4Me}$$$$
74
+ // PEPTIDE1{meI.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.D-Cit.N.D-Orn.D-aThr.Phe_4Me}$$$$
75
+ // `;
60
76
 
61
- test('toAtomicLevel-helm', async () => {
62
- const df = DG.DataFrame.fromCsv(helmCsv);
77
+ const getSeqCol = async (testData: TestDataType): Promise<DG.Column<string>> => {
78
+ const seq = testData.src.seq;
79
+ const df = DG.DataFrame.fromColumns([DG.Column.fromList(DG.COLUMN_TYPE.STRING, 'seq', [seq])]);
63
80
  await grok.data.detectSemanticTypes(df);
64
- const seqCol = df.getCol('seq');
65
- await _testToAtomicLevelFunc(df, seqCol, true);
66
- });
81
+ return df.getCol('seq');
82
+ };
83
+
84
+ for (const [testName, testData] of Object.entries(tests)) {
85
+ test(`${testName}-linear`, async () => {
86
+ const seqCol = await getSeqCol(testData);
87
+ await _testToAtomicLevelFunc(seqCol, false, testData.tgt);
88
+ });
89
+ }
90
+ for (const [testName, testData] of Object.entries(tests)) {
91
+ test(`${testName}-nonlinear`, async () => {
92
+ const seqCol = await getSeqCol(testData);
93
+ await _testToAtomicLevelFunc(seqCol, true, testData.tgt);
94
+ }, {skipReason: 'To publish HelmHelper.removeGaps dependency'});
95
+ }
67
96
 
68
97
  async function _testToAtomicLevelFunc(
69
- table: DG.DataFrame, seqCol: DG.Column<string>, nonlinear: boolean
98
+ seqCol: DG.Column<string>, nonlinear: boolean, tgt: TestDataTargetType,
70
99
  ): Promise<void> {
71
- const molCol = await sequenceToMolfile(table, seqCol, nonlinear, monomerLibHelper.getMonomerLib());
72
- expect(molCol!.semType, DG.SEMTYPE.MOLECULE);
100
+ const res = (await sequenceToMolfile(seqCol.dataFrame, seqCol, nonlinear, false, monomerLib, rdKitModule))!;
101
+ expect(res.molCol!.semType, DG.SEMTYPE.MOLECULE);
102
+ const resMolStr = res.molCol!.get(0)!;
103
+ const resRdMol = rdKitModule.get_mol(resMolStr);
104
+ expect(resRdMol != null, true, 'No molecule generated');
105
+ try {
106
+ const resAtomCount = resRdMol.get_num_atoms();
107
+ const resBondCount = resRdMol.get_num_bonds();
108
+ expect(resAtomCount, tgt.atomCount);
109
+ expect(resBondCount, tgt.bondCount);
110
+ } finally {
111
+ resRdMol.delete();
112
+ }
73
113
  }
74
114
  });