@datagrok/bio 2.11.2 → 2.11.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -5,7 +5,7 @@
5
5
  "name": "Leonid Stolbov",
6
6
  "email": "lstolbov@datagrok.ai"
7
7
  },
8
- "version": "2.11.2",
8
+ "version": "2.11.5",
9
9
  "description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
10
10
  "repository": {
11
11
  "type": "git",
@@ -34,9 +34,9 @@
34
34
  ],
35
35
  "dependencies": {
36
36
  "@biowasm/aioli": "^3.1.0",
37
- "@datagrok-libraries/bio": "^5.39.0",
37
+ "@datagrok-libraries/bio": "^5.39.1",
38
38
  "@datagrok-libraries/chem-meta": "^1.0.1",
39
- "@datagrok-libraries/ml": "^6.3.51",
39
+ "@datagrok-libraries/ml": "^6.3.53",
40
40
  "@datagrok-libraries/tutorials": "^1.3.6",
41
41
  "@datagrok-libraries/utils": "^4.0.17",
42
42
  "cash-dom": "^8.0.0",
@@ -55,21 +55,19 @@ export async function sequenceSpaceByFingerprints(spaceParams: ISequenceSpacePar
55
55
  return result;
56
56
  }
57
57
 
58
- export async function getSequenceSpace(spaceParams: ISequenceSpaceParams,
59
- progressFunc?: (epochNum: number, epochsLength: number, embedding: number[][]) => void
60
- ): Promise<ISequenceSpaceResult> {
61
- const ncUH = UnitsHandler.getOrCreate(spaceParams.seqCol);
62
-
63
- //const distanceFName = ncUH.isMsa() ? MmDistanceFunctionsNames.HAMMING : MmDistanceFunctionsNames.LEVENSHTEIN;
64
- const seqList = spaceParams.seqCol.toList();
65
-
58
+ export async function getEncodedSeqSpaceCol(
59
+ seqCol: DG.Column, similarityMetric: BitArrayMetrics | MmDistanceFunctionsNames
60
+ ): Promise<{seqList:string[], options: {[_:string]: any}}> {
61
+ // encodes sequences using utf charachters to also support multichar and non fasta sequences
62
+ const ncUH = UnitsHandler.getOrCreate(seqCol);
63
+ const seqList = seqCol.toList();
66
64
  const splitter = ncUH.getSplitter();
67
65
  const seqColLength = seqList.length;
68
66
  let charCodeCounter = 36;
69
67
  const charCodeMap = new Map<string, string>();
70
68
  for (let i = 0; i < seqColLength; i++) {
71
69
  const seq = seqList[i];
72
- if (seqList[i] === null || spaceParams.seqCol.isNone(i)) {
70
+ if (seqList[i] === null || seqCol.isNone(i)) {
73
71
  seqList[i] = null;
74
72
  continue;
75
73
  }
@@ -84,8 +82,8 @@ export async function getSequenceSpace(spaceParams: ISequenceSpaceParams,
84
82
  seqList[i] += charCodeMap.get(char)!;
85
83
  }
86
84
  }
87
-
88
- if (spaceParams.similarityMetric === MmDistanceFunctionsNames.MONOMER_CHEMICAL_DISTANCE) {
85
+ let options = {};
86
+ if (similarityMetric === MmDistanceFunctionsNames.MONOMER_CHEMICAL_DISTANCE) {
89
87
  const monomers = Array.from(charCodeMap.keys());
90
88
  const monomerRes = await calculateMonomerSimilarity(monomers);
91
89
  // the susbstitution matrix contains similarity, but we need distances
@@ -98,10 +96,34 @@ export async function getSequenceSpace(spaceParams: ISequenceSpaceParams,
98
96
  Object.entries(monomerRes.alphabetIndexes).forEach(([key, value]) => {
99
97
  monomerHashToMatrixMap[charCodeMap.get(key)!] = value;
100
98
  });
101
- spaceParams.options.distanceFnArgs = {scoringMatrix: monomerRes.scoringMatrix,
99
+ // sets distance function args in place.
100
+ options = {scoringMatrix: monomerRes.scoringMatrix,
102
101
  alphabetIndexes: monomerHashToMatrixMap} satisfies mmDistanceFunctionArgs;
103
102
  }
103
+ // else if (similarityMetric === MmDistanceFunctionsNames.NEEDLEMANN_WUNSCH) {
104
+ // const alphabetIndexes: any = {};
105
+ // let i = 0;
106
+ // charCodeMap.forEach((value) => {
107
+ // alphabetIndexes[value] = i;
108
+ // i++;
109
+ // });
110
+ // options = {alphabetIndexes};
111
+ // }
112
+ return {seqList, options};
113
+ }
114
+
115
+ export async function getSequenceSpace(spaceParams: ISequenceSpaceParams,
116
+ progressFunc?: (epochNum: number, epochsLength: number, embedding: number[][]) => void
117
+ ): Promise<ISequenceSpaceResult> {
118
+ const ncUH = UnitsHandler.getOrCreate(spaceParams.seqCol);
119
+ if (ncUH.isHelm())
120
+ return await sequenceSpaceByFingerprints(spaceParams);
121
+
122
+
123
+ const {seqList, options} = await getEncodedSeqSpaceCol(spaceParams.seqCol, spaceParams.similarityMetric);
104
124
 
125
+ spaceParams.options = spaceParams.options ?? {};
126
+ spaceParams.options.distanceFnArgs = options;
105
127
  const sequenceSpaceResult = await reduceDimensinalityWithNormalization(
106
128
  seqList,
107
129
  spaceParams.methodName,
@@ -13,6 +13,7 @@ import {getDendrogramService, IDendrogramService} from '@datagrok-libraries/bio/
13
13
  import {handleError} from './utils';
14
14
  import {DemoScript} from '@datagrok-libraries/tutorials/src/demo-script';
15
15
  import {DimReductionMethods} from '@datagrok-libraries/ml/src/reduce-dimensionality';
16
+ import {MmDistanceFunctionsNames} from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
16
17
 
17
18
  const dataFn: string = 'data/sample_FASTA_PT_activity.csv';
18
19
 
@@ -53,7 +54,7 @@ export async function demoBio01bUI() {
53
54
  .step('Find activity cliffs', async () => {
54
55
  activityCliffsViewer = (await activityCliffs(
55
56
  df, df.getCol('Sequence'), df.getCol('Activity'),
56
- 80, dimRedMethod)) as DG.ScatterPlotViewer;
57
+ 80, dimRedMethod, MmDistanceFunctionsNames.LEVENSHTEIN)) as DG.ScatterPlotViewer;
57
58
  view.dockManager.dock(activityCliffsViewer, DG.DOCK_TYPE.RIGHT, null, 'Activity Cliffs', 0.35);
58
59
 
59
60
  // Show grid viewer with the cliffs
package/src/package.ts CHANGED
@@ -7,15 +7,14 @@ import * as DG from 'datagrok-api/dg';
7
7
  import {delay} from '@datagrok-libraries/utils/src/test';
8
8
  import {removeEmptyStringRows} from '@datagrok-libraries/utils/src/dataframe-utils';
9
9
  import {Options} from '@datagrok-libraries/utils/src/type-declarations';
10
- import {RDMol} from '@datagrok-libraries/chem-meta/src/rdkit-api';
11
10
  import {DimReductionMethods, ITSNEOptions, IUMAPOptions} from '@datagrok-libraries/ml/src/reduce-dimensionality';
12
11
  import {SequenceSpaceFunctionEditor} from '@datagrok-libraries/ml/src/functionEditors/seq-space-editor';
13
12
  import {ActivityCliffsFunctionEditor} from '@datagrok-libraries/ml/src/functionEditors/activity-cliffs-editor';
14
13
  import {
15
- ISequenceSpaceParams, getActivityCliffs, SequenceSpaceFunc
14
+ ISequenceSpaceParams, getActivityCliffs, SequenceSpaceFunc, CLIFFS_COL_ENCODE_FN
16
15
  } from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
17
16
  import {MmDistanceFunctionsNames} from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
18
- import {BitArrayMetrics, BitArrayMetricsNames} from '@datagrok-libraries/ml/src/typed-metrics';
17
+ import {BitArrayMetrics} from '@datagrok-libraries/ml/src/typed-metrics';
19
18
  import {
20
19
  TAGS as bioTAGS, ALPHABET, NOTATION,
21
20
  } from '@datagrok-libraries/bio/src/utils/macromolecule';
@@ -36,14 +35,14 @@ import {
36
35
  import {VdRegionsViewer} from './viewers/vd-regions-viewer';
37
36
  import {SequenceAlignment} from './seq_align';
38
37
  import {
39
- ISequenceSpaceResult, getEmbeddingColsNames, getSequenceSpace, sequenceSpaceByFingerprints
38
+ ISequenceSpaceResult, getEmbeddingColsNames, getEncodedSeqSpaceCol, getSequenceSpace, sequenceSpaceByFingerprints
40
39
  } from './analysis/sequence-space';
41
40
  import {
42
41
  createLinesGrid, createPropPanelElement, createTooltipElement, getChemSimilaritiesMatrix,
43
42
  } from './analysis/sequence-activity-cliffs';
44
43
  import {SequenceSimilarityViewer} from './analysis/sequence-similarity-viewer';
45
44
  import {SequenceDiversityViewer} from './analysis/sequence-diversity-viewer';
46
- import {SubstructureSearchDialog} from './substructure-search/substructure-search';
45
+ import {MONOMERIC_COL_TAGS, SubstructureSearchDialog, invalidateMols} from './substructure-search/substructure-search';
47
46
  import {convert} from './utils/convert';
48
47
  import {getMacromoleculeColumnPropertyPanel} from './widgets/representations';
49
48
  import {saveAsFastaUI} from './utils/save-as-fasta';
@@ -51,9 +50,6 @@ import {BioSubstructureFilter} from './widgets/bio-substructure-filter';
51
50
  import {WebLogoViewer} from './viewers/web-logo-viewer';
52
51
  import {
53
52
  MonomerLibHelper,
54
- getUserLibSettings,
55
- setUserLibSetting,
56
- getLibFileNameList,
57
53
  getLibraryPanelUI
58
54
  } from './utils/monomer-lib';
59
55
  import {demoBio01UI} from './demo/bio01-similarity-diversity';
@@ -72,18 +68,18 @@ import {PackageSettingsEditorWidget} from './widgets/package-settings-editor-wid
72
68
  import {getCompositionAnalysisWidget} from './widgets/composition-analysis-widget';
73
69
  import {MacromoleculeColumnWidget} from './utils/macromolecule-column-widget';
74
70
  import {addCopyMenuUI} from './utils/context-menu';
75
- import {getPolyToolDialog} from './utils/poly-tool/enumerator-tools';
71
+ import {getPolyToolDialog} from './utils/poly-tool/ui';
76
72
  import {_setPeptideColumn} from './utils/poly-tool/utils';
77
73
  import {getRegionDo} from './utils/get-region';
78
74
  import {GetRegionApp} from './apps/get-region-app';
79
75
  import {GetRegionFuncEditor} from './utils/get-region-func-editor';
80
- import {HelmToMolfileConverter} from './utils/helm-to-molfile';
81
76
  import {sequenceToMolfile} from './utils/sequence-to-mol';
82
77
  import {errInfo} from './utils/err-info';
83
78
 
84
79
  import {SHOW_SCATTERPLOT_PROGRESS} from '@datagrok-libraries/ml/src/functionEditors/seq-space-base-editor';
85
80
  import {DIMENSIONALITY_REDUCER_TERMINATE_EVENT}
86
81
  from '@datagrok-libraries/ml/src/workers/dimensionality-reducing-worker-creator';
82
+ import BitArray from '@datagrok-libraries/utils/src/bit-array';
87
83
 
88
84
  export const _package = new BioPackage();
89
85
 
@@ -394,12 +390,14 @@ export async function getRegionTopMenu(
394
390
  //input: column activities
395
391
  //input: double similarity = 80 [Similarity cutoff]
396
392
  //input: string methodName { choices:["UMAP", "t-SNE"] }
393
+ //input: string similarityMetric { choices:["Hamming", "Levenshtein", "Monomer chemical distance"] }
397
394
  //input: object options {optional: true}
398
395
  //output: viewer result
399
396
  //editor: Bio:SeqActivityCliffsEditor
400
397
  export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column<string>, activities: DG.Column,
401
- similarity: number, methodName: DimReductionMethods, options?: (IUMAPOptions | ITSNEOptions) & Options,
402
- ): Promise<DG.Viewer | undefined> {
398
+ similarity: number, methodName: DimReductionMethods,
399
+ similarityMetric: MmDistanceFunctionsNames | BitArrayMetrics,
400
+ options?: (IUMAPOptions | ITSNEOptions) & Options): Promise<DG.Viewer | undefined> {
403
401
  if (!checkInputColumnUI(macroMolecule, 'Activity Cliffs'))
404
402
  return;
405
403
  const axesNames = getEmbeddingColsNames(df);
@@ -409,21 +407,27 @@ export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column<
409
407
  'separator': macroMolecule.getTag(bioTAGS.separator),
410
408
  'alphabet': macroMolecule.getTag(bioTAGS.alphabet),
411
409
  };
410
+ let cliffsEncodeFunction: (seqCol: DG.Column, similarityMetric: MmDistanceFunctionsNames | BitArrayMetrics) => any =
411
+ getEncodedSeqSpaceCol;
412
412
  const ncUH = UnitsHandler.getOrCreate(macroMolecule);
413
- let columnDistanceMetric: BitArrayMetrics | MmDistanceFunctionsNames = BitArrayMetricsNames.Tanimoto;
414
- let seqCol = macroMolecule;
415
- let sequenceSpaceFunc: SequenceSpaceFunc = sequenceSpaceByFingerprints;
416
- if (ncUH.isFasta() || (ncUH.isSeparator() && ncUH.alphabet && ncUH.alphabet !== ALPHABET.UN)) {
417
- if (ncUH.isFasta()) {
418
- columnDistanceMetric = ncUH.getDistanceFunctionName();
419
- } else {
420
- seqCol = ncUH.convert(NOTATION.FASTA);
421
- const uh = UnitsHandler.getOrCreate(seqCol);
422
- columnDistanceMetric = uh.getDistanceFunctionName();
423
- tags.units = NOTATION.FASTA;
424
- }
425
- sequenceSpaceFunc = getSequenceSpace;
413
+ const columnDistanceMetric: MmDistanceFunctionsNames | BitArrayMetrics = similarityMetric;
414
+ const seqCol = macroMolecule;
415
+
416
+ let sequenceSpaceFunc: SequenceSpaceFunc = getSequenceSpace;
417
+ if (ncUH.isHelm()) {
418
+ sequenceSpaceFunc = sequenceSpaceByFingerprints;
419
+ cliffsEncodeFunction = async (seqCol: DG.Column, similarityMetric: MmDistanceFunctionsNames | BitArrayMetrics) => {
420
+ await invalidateMols(seqCol, false);
421
+ const molecularCol = seqCol.temp[MONOMERIC_COL_TAGS.MONOMERIC_MOLS];
422
+ const fingerPrints: DG.Column =
423
+ await grok.functions.call('Chem:getMorganFingerprints', {molColumn: molecularCol});
424
+ const fingerPrintsBitArray = fingerPrints.toList().map((f: DG.BitSet) =>
425
+ BitArray.fromUint32Array(f.length, new Uint32Array(f.getBuffer().buffer)));
426
+ return {seqList: fingerPrintsBitArray, options: {}};
427
+ };
426
428
  }
429
+
430
+
427
431
  const runCliffs = async () => {
428
432
  const sp = await getActivityCliffs(
429
433
  df,
@@ -442,25 +446,26 @@ export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column<
442
446
  createTooltipElement,
443
447
  createPropPanelElement,
444
448
  createLinesGrid,
445
- options);
449
+ {...(options ?? {}), [CLIFFS_COL_ENCODE_FN]: cliffsEncodeFunction});
446
450
  return sp;
447
451
  };
448
452
 
449
- const allowedRowCount = 20000;
450
- const fastRowCount = methodName === DimReductionMethods.UMAP ? 5000 : 2000;
453
+ const allowedRowCount = methodName === DimReductionMethods.UMAP ? 200_000 : 20_000;
454
+ const fastRowCount = methodName === DimReductionMethods.UMAP ? 5_000 : 2_000;
451
455
  if (df.rowCount > allowedRowCount) {
452
456
  grok.shell.warning(`Too many rows, maximum for sequence activity cliffs is ${allowedRowCount}`);
453
457
  return;
454
458
  }
455
459
 
456
- return new Promise<DG.Viewer>((resolve, reject) => {
460
+ return new Promise<DG.Viewer | undefined>((resolve, reject) => {
457
461
  if (df.rowCount > fastRowCount && !options?.[BYPASS_LARGE_DATA_WARNING]) {
458
462
  ui.dialog().add(ui.divText(`Activity cliffs analysis might take several minutes.
459
463
  Do you want to continue?`))
460
464
  .onOK(async () => {
461
- const progressBar = DG.TaskBarProgressIndicator.create(`Running sequence activity cliffs ...`);
462
- runCliffs().then((res) => resolve(res)).catch((err) => reject(err)).finally(() => { progressBar.close();});
465
+ //const progressBar = DG.TaskBarProgressIndicator.create(`Running sequence activity cliffs ...`);
466
+ runCliffs().then((res) => resolve(res)).catch((err) => reject(err)).finally(() => {});
463
467
  })
468
+ .onCancel(() => { resolve(undefined); })
464
469
  .show();
465
470
  } else {
466
471
  runCliffs().then((res) => resolve(res)).catch((err) => reject(err));
@@ -530,7 +535,7 @@ export async function sequenceSpaceTopMenu(
530
535
  methodName: methodName,
531
536
  similarityMetric: similarityMetric,
532
537
  embedAxesNames: embedColsNames,
533
- options: {...options, sparseMatrixThreshold: sparseMatrixThreshold ?? 0.8,
538
+ options: {...options, sparseMatrixThreshold: sparseMatrixThreshold ?? 0.5,
534
539
  usingSparseMatrix: table.rowCount > 20000},
535
540
  };
536
541
 
@@ -1089,10 +1094,10 @@ export async function demoBioHelmMsaSequenceSpace(): Promise<void> {
1089
1094
  await demoBio05UI();
1090
1095
  }
1091
1096
 
1092
- //name: enumeratorColumnChoice
1097
+ //name: polyToolColumnChoice
1093
1098
  //input: dataframe df [Input data table]
1094
1099
  //input: column macroMolecule
1095
- export async function enumeratorColumnChoice(df: DG.DataFrame, macroMolecule: DG.Column): Promise<void> {
1100
+ export async function polyToolColumnChoice(df: DG.DataFrame, macroMolecule: DG.Column): Promise<void> {
1096
1101
  _setPeptideColumn(macroMolecule);
1097
1102
  await grok.data.detectSemanticTypes(df);
1098
1103
  }
@@ -8,6 +8,8 @@ import {_testActivityCliffsOpen} from './activity-cliffs-utils';
8
8
  import {DimReductionMethods} from '@datagrok-libraries/ml/src/reduce-dimensionality';
9
9
 
10
10
  import {_package} from '../package-test';
11
+ import {MmDistanceFunctionsNames} from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
12
+ import {BitArrayMetricsNames} from '@datagrok-libraries/ml/src/typed-metrics';
11
13
 
12
14
 
13
15
  category('activityCliffs', async () => {
@@ -39,7 +41,7 @@ category('activityCliffs', async () => {
39
41
  const cliffsNum = DG.Test.isInBenchmark ? 6 : 3;
40
42
 
41
43
  await _testActivityCliffsOpen(actCliffsDf, DimReductionMethods.UMAP,
42
- 'sequence', 'Activity', 90, cliffsNum);
44
+ 'sequence', 'Activity', 90, cliffsNum, MmDistanceFunctionsNames.LEVENSHTEIN);
43
45
  });
44
46
 
45
47
  test('activityCliffsWithEmptyRows', async () => {
@@ -49,7 +51,7 @@ category('activityCliffs', async () => {
49
51
  viewList.push(actCliffsTableViewWithEmptyRows);
50
52
 
51
53
  await _testActivityCliffsOpen(actCliffsDfWithEmptyRows, DimReductionMethods.UMAP,
52
- 'sequence', 'Activity', 90, 3);
54
+ 'sequence', 'Activity', 90, 3, MmDistanceFunctionsNames.LEVENSHTEIN);
53
55
  });
54
56
 
55
57
  test('Helm', async () => {
@@ -57,6 +59,6 @@ category('activityCliffs', async () => {
57
59
  const view = grok.shell.addTableView(df);
58
60
 
59
61
  await _testActivityCliffsOpen(df, DimReductionMethods.UMAP,
60
- 'HELM', 'Activity', 90, 53);
62
+ 'HELM', 'Activity', 90, 53, BitArrayMetricsNames.Tanimoto);
61
63
  });
62
64
  });
@@ -4,14 +4,17 @@ import * as grok from 'datagrok-api/grok';
4
4
  import {expect} from '@datagrok-libraries/utils/src/test';
5
5
  import {activityCliffs, BYPASS_LARGE_DATA_WARNING} from '../package';
6
6
  import {DimReductionMethods} from '@datagrok-libraries/ml/src/reduce-dimensionality';
7
+ import {MmDistanceFunctionsNames} from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
8
+ import {BitArrayMetrics} from '@datagrok-libraries/ml/src/typed-metrics';
7
9
 
8
10
  export async function _testActivityCliffsOpen(df: DG.DataFrame, drMethod: DimReductionMethods,
9
- seqColName: string, activityColName: string, similarityThr: number, tgtNumberCliffs: number
11
+ seqColName: string, activityColName: string, similarityThr: number, tgtNumberCliffs: number,
12
+ similarityMetric: MmDistanceFunctionsNames | BitArrayMetrics
10
13
  ): Promise<void> {
11
14
  await grok.data.detectSemanticTypes(df);
12
15
  const scatterPlot = await activityCliffs(
13
16
  df, df.getCol(seqColName), df.getCol(activityColName),
14
- similarityThr, drMethod, {[`${BYPASS_LARGE_DATA_WARNING}`]: true});
17
+ similarityThr, drMethod, similarityMetric, {[`${BYPASS_LARGE_DATA_WARNING}`]: true});
15
18
  // const scatterPlot = (await grok.functions.call('Bio:activityCliffs', {
16
19
  // table: df, molecules: df.getCol(colName), activities: df.getCol('Activity'),
17
20
  // similarity: 50, methodName: method
@@ -43,18 +43,29 @@ type PositionInBonds = {
43
43
 
44
44
  /** Translate HELM column into molfile column and append to the dataframe */
45
45
  export async function helm2mol(df: DG.DataFrame, helmCol: DG.Column<string>): Promise<void> {
46
- // const df = await _package.files.readCsv('./samples/helm-to-molfile.csv');
47
- // grok.shell.addTableView(df);
48
- // const helmCol = df.col('HELM');
49
- // if (!helmCol) {
50
- // grok.shell.error('HELM column not found');
51
- // return;
52
- // }
46
+ const molCol = await getMolColumnFromHelm(df, helmCol);
47
+ df.columns.add(molCol, true);
48
+ await grok.data.detectSemanticTypes(df);
49
+ }
50
+
51
+
52
+ /** Translate HELM column into molfile column and append to the dataframe */
53
+ export async function getMolColumnFromHelm(
54
+ df: DG.DataFrame, helmCol: DG.Column<string>
55
+ ): Promise<DG.Column<string>> {
53
56
  const converter = new HelmToMolfileConverter(helmCol, df);
54
57
  const molCol = await converter.convertToRdKitBeautifiedMolfileColumn();
55
58
  molCol.semType = DG.SEMTYPE.MOLECULE;
56
- df.columns.add(molCol, true);
57
- await grok.data.detectSemanticTypes(df);
59
+ return molCol;
60
+ }
61
+
62
+ export async function getSmilesColumnFromHelm(
63
+ df: DG.DataFrame, helmCol: DG.Column<string>
64
+ ): Promise<DG.Column<string>> {
65
+ const converter = new HelmToMolfileConverter(helmCol, df);
66
+ const smilesCol = await converter.convertToSmiles();
67
+ smilesCol.semType = DG.SEMTYPE.MOLECULE;
68
+ return smilesCol;
58
69
  }
59
70
 
60
71
  export class HelmToMolfileConverter {
@@ -62,9 +73,24 @@ export class HelmToMolfileConverter {
62
73
  this.helmColumn = helmColumn;
63
74
  }
64
75
 
65
- async convertToRdKitBeautifiedMolfileColumn(): Promise<DG.Column<string>> {
76
+ async convertToSmiles(): Promise<DG.Column<string>> {
77
+ const smiles = await this.getSmilesList();
78
+ const columnName = this.df.columns.getUnusedName(`smiles(${this.helmColumn.name})`);
79
+ return DG.Column.fromStrings(columnName, smiles.map((molecule) => {
80
+ if (molecule === null)
81
+ return '';
82
+ return molecule;
83
+ }));
84
+ }
85
+
86
+ private async getSmilesList(): Promise<string[]> {
66
87
  const molfilesV2K = (await this.convertToMolfileV2KColumn()).toList();
67
88
  const smiles = molfilesV2K.map((mol) => DG.chem.convert(mol, DG.chem.Notation.MolBlock, DG.chem.Notation.Smiles));
89
+ return smiles;
90
+ }
91
+
92
+ async convertToRdKitBeautifiedMolfileColumn(): Promise<DG.Column<string>> {
93
+ const smiles = await this.getSmilesList();
68
94
  const rdKitModule: RDModule = await grok.functions.call('Chem:getRdKitModule');
69
95
  const beautifiedMols = smiles.map((item) =>{
70
96
  if (item === '')
@@ -75,8 +101,7 @@ export class HelmToMolfileConverter {
75
101
  mol.normalize_depiction(1);
76
102
  mol.straighten_depiction(true);
77
103
  return mol;
78
- }
79
- );
104
+ });
80
105
  const columnName = this.df.columns.getUnusedName(`molfile(${this.helmColumn.name})`);
81
106
  return DG.Column.fromStrings(columnName, beautifiedMols.map((mol) => {
82
107
  if (mol === null)
@@ -8,9 +8,7 @@ import {IMonomerLib, Monomer} from '@datagrok-libraries/bio/src/types/index';
8
8
  import {MolfileHandler} from '@datagrok-libraries/chem-meta/src/parsing-utils/molfile-handler';
9
9
  import {
10
10
  createJsonMonomerLibFromSdf,
11
- getJsonMonomerLibForEnumerator,
12
11
  IMonomerLibHelper,
13
- isValidEnumeratorLib,
14
12
  } from '@datagrok-libraries/bio/src/monomer-works/monomer-utils';
15
13
  import {
16
14
  HELM_REQUIRED_FIELDS as REQ, HELM_OPTIONAL_FIELDS as OPT, HELM_POLYMER_TYPE
@@ -18,12 +16,8 @@ import {
18
16
 
19
17
  import {_package} from '../package';
20
18
 
21
- const _HELM_REQUIRED_FIELDS_ARRAY = [
22
- REQ.SYMBOL, REQ.NAME, REQ.MOLFILE, REQ.AUTHOR, REQ.ID,
23
- REQ.RGROUPS, REQ.SMILES, REQ.POLYMER_TYPE, REQ.MONOMER_TYPE, REQ.CREATE_DATE,
24
- ] as const;
19
+ import {PolyToolMonomerLibHandler} from '@datagrok-libraries/bio/src/utils/poly-tool/monomer-lib-handler';
25
20
 
26
- const _HELM_OPTIONAL_FIELDS_ARRAY = [OPT.NATURAL_ANALOG, OPT.META] as const;
27
21
  // -- Monomer libraries --
28
22
  export const LIB_STORAGE_NAME = 'Libraries';
29
23
  export const LIB_PATH = 'System:AppData/Bio/libraries/';
@@ -291,8 +285,9 @@ export class MonomerLibHelper implements IMonomerLibHelper {
291
285
  }
292
286
  const df = await fileSource.readCsv(fileName);
293
287
  const json = toJson(df);
294
- if (isValidEnumeratorLib(json))
295
- rawLibData = getJsonMonomerLibForEnumerator(json);
288
+ const polyToolMonomerLib = new PolyToolMonomerLibHandler(json);
289
+ if (polyToolMonomerLib.isValid())
290
+ rawLibData = polyToolMonomerLib.getJsonMonomerLib();
296
291
  else
297
292
  throw new Error('Invalid format of CSV monomer lib');
298
293
  } else {
@@ -1,17 +1,14 @@
1
-
2
1
  import * as grok from 'datagrok-api/grok';
3
2
  import * as ui from 'datagrok-api/ui';
4
3
  import * as DG from 'datagrok-api/dg';
5
4
 
6
5
  import {NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecule';
7
6
  import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
8
- import {HELM_POLYMER_TYPE} from '@datagrok-libraries/bio/src/utils/const';
9
- import {MonomerLibHelper} from '../../utils/monomer-lib';
10
7
  import {_package} from '../../package';
11
8
  import {addCommonTags} from './utils';
12
- import * as rxjs from 'rxjs';
13
- import {HELM_WRAPPER, ALL_MONOMERS, CYCLIZATION_TYPE, TRANSFORMATION_TYPE} from './const';
9
+ import {HELM_WRAPPER, ALL_MONOMERS, CYCLIZATION_TYPE} from './const';
14
10
  import {MetaData, ConnectionData} from './types';
11
+ import {getMolColumnFromHelm} from '../helm-to-molfile';
15
12
 
16
13
  abstract class TransformationBase {
17
14
  constructor(helmColumn: DG.Column<string>, meta: MetaData) {
@@ -44,15 +41,21 @@ class TransformationNCys extends TransformationBase {
44
41
  }
45
42
 
46
43
  protected hasTerminals(helm: string): boolean {
47
- if (! helm.includes(this.rightTerminal + HELM_WRAPPER.RIGHT))
48
- return false;
49
- if (this.leftTerminal === ALL_MONOMERS)
50
- return true;
51
- return helm.includes(HELM_WRAPPER.LEFT + this.leftTerminal);
44
+ // if (! helm.includes(this.rightTerminal + HELM_WRAPPER.RIGHT))
45
+ // return false;
46
+ // if (this.leftTerminal === ALL_MONOMERS)
47
+ // return true;
48
+ // return helm.includes(HELM_WRAPPER.LEFT + this.leftTerminal);
49
+ const positions = this.getLinkedPositions(helm);
50
+ return positions.every((el) => el > 0);
52
51
  }
53
52
 
54
53
  protected getLinkedPositions(helm: string): [number, number] {
55
- return [1, getNumberOfMonomers(helm)];
54
+ const seq = helm.replace(HELM_WRAPPER.LEFT, '').replace(HELM_WRAPPER.RIGHT, '');
55
+ const monomers = seq.split('.');
56
+ const start = 0;
57
+ const end = monomers.findIndex((el, idx) => el === this.rightTerminal && idx > start);
58
+ return [start + 1, end + 1];
56
59
  }
57
60
 
58
61
  protected getTransformedHelm(helm: string): string {
@@ -145,133 +148,28 @@ function getHelmCycle(helm: string, source: ConnectionData, target: ConnectionDa
145
148
  );
146
149
  }
147
150
 
148
- async function addTransformedColumn(
149
- molColumn: DG.Column<string>, meta: MetaData
151
+ export async function addTransformedColumn(
152
+ molColumn: DG.Column<string>, meta: MetaData, addHelm: boolean
150
153
  ): Promise<void> {
151
154
  const df = molColumn.dataFrame;
152
155
  const uh = UnitsHandler.getOrCreate(molColumn);
153
156
  const sourceHelmCol = uh.convert(NOTATION.HELM);
154
157
  const pt = PolymerTransformation.getInstance(sourceHelmCol, meta);
155
158
  const targetList = pt.transform();
156
- const colName = df.columns.getUnusedName(`${meta.transformationType}(` + molColumn.name + ')');
157
- const targetHelmCol = DG.Column.fromList('string', colName, targetList);
159
+ const helmColName = df.columns.getUnusedName(`${meta.transformationType}(` + molColumn.name + ')');
160
+ const targetHelmCol = DG.Column.fromList('string', helmColName, targetList);
158
161
 
159
162
  addCommonTags(targetHelmCol);
160
163
  targetHelmCol.setTag('units', NOTATION.HELM);
161
- targetHelmCol.setTag('cell.renderer', 'helm');
162
164
 
163
- df.columns.add(targetHelmCol);
164
- await grok.data.detectSemanticTypes(df);
165
- }
166
-
167
- export function getPolyToolDialog(): DG.Dialog {
168
- function getMonomerList(cyclizationType: CYCLIZATION_TYPE): string[] {
169
- if (cyclizationType === cyclizationTypes[0]) {
170
- return [ALL_MONOMERS].concat(
171
- monomerLib.getMonomerSymbolsByType(HELM_POLYMER_TYPE.PEPTIDE)
172
- );
173
- }
174
- if (cyclizationType === cyclizationTypes[1]) {
175
- return [ALL_MONOMERS].concat(
176
- monomerLib.getMonomerSymbolsByRGroup(3, HELM_POLYMER_TYPE.PEPTIDE)
177
- );
178
- }
179
- return ['C'];
180
- }
165
+ const molCol = await getMolColumnFromHelm(df, targetHelmCol);
166
+ molCol.name = df.columns.getUnusedName(`${meta.transformationType}_molfile(` + molColumn.name + ')');
181
167
 
182
- function updateMonomerList(): void {
183
- if (cyclizationTypeChoice.value === CYCLIZATION_TYPE.NCys) {
184
- monomerList1 = getMonomerList(CYCLIZATION_TYPE.NO);
185
- monomerList2 = getMonomerList(CYCLIZATION_TYPE.NCys);
186
- } else {
187
- monomerList1 = getMonomerList(cyclizationTypeChoice.value as CYCLIZATION_TYPE);
188
- monomerList2 = [...monomerList1];
189
- }
190
-
191
- leftTerminalChoice = ui.choiceInput(
192
- 'R1:', monomerList1[0], monomerList1, () => { onRGroupValueChange.next(); }
193
- );
194
- rightTerminalChoice = ui.choiceInput('R2:', monomerList2[0], monomerList2, () => { onRGroupValueChange.next(); });
195
- onRGroupValueChange.next();
196
- ui.empty(terminalControls);
197
- [leftTerminalChoice, rightTerminalChoice].forEach((el) => { terminalControls.appendChild(el.root); });
168
+ if (addHelm) {
169
+ targetHelmCol.setTag('cell.renderer', 'helm');
170
+ df.columns.add(targetHelmCol);
198
171
  }
172
+ df.columns.add(molCol, true);
199
173
 
200
- const onCyclizationChoice = new rxjs.Subject<string>();
201
- const onRGroupValueChange = new rxjs.Subject<string>();
202
- onCyclizationChoice.subscribe(() => {
203
- meta.cyclizationType = cyclizationTypeChoice.value!;
204
- updateMonomerList();
205
- });
206
- onRGroupValueChange.subscribe(() => {
207
- meta.rightTerminal = rightTerminalChoice.value!;
208
- meta.leftTerminal = leftTerminalChoice.value!;
209
- });
210
-
211
-
212
- const meta = {} as MetaData;
213
- const transformations = [TRANSFORMATION_TYPE.CYCLIZATION];
214
- const transformationChoice = ui.choiceInput(
215
- 'Modification', transformations[0], transformations, () => meta.transformationType = transformationChoice.value!
216
- );
217
-
218
- const cyclizationTypes = [CYCLIZATION_TYPE.NO, CYCLIZATION_TYPE.R3, CYCLIZATION_TYPE.NCys];
219
- const cyclizationTypeChoice = ui.choiceInput(
220
- 'Type', cyclizationTypes[0], cyclizationTypes, () => { onCyclizationChoice.next(); }
221
- );
222
-
223
- const monomerLib = MonomerLibHelper.instance.getBioLib();
224
- let monomerList1: string[] = [];
225
- let monomerList2: string[] = [];
226
- let leftTerminalChoice = ui.choiceInput(
227
- 'R1:', monomerList1[0], monomerList1, () => {
228
- meta.leftTerminal = leftTerminalChoice.value!;
229
- }
230
- );
231
- let rightTerminalChoice = ui.choiceInput('R2:', monomerList2[0], monomerList2, () => {
232
- meta.rightTerminal = rightTerminalChoice.value!;
233
- });
234
- const terminalControls = ui.divV([leftTerminalChoice.root, rightTerminalChoice.root]);
235
-
236
- function updateMeta() {
237
- meta.cyclizationType = cyclizationTypeChoice.value!;
238
- meta.leftTerminal = leftTerminalChoice.value!;
239
- meta.rightTerminal = rightTerminalChoice.value!;
240
- meta.transformationType = transformationChoice.value!;
241
- }
242
-
243
- updateMonomerList();
244
-
245
- updateMeta();
246
-
247
- const targetColumns = grok.shell.t.columns.bySemTypeAll(DG.SEMTYPE.MACROMOLECULE);
248
- if (!targetColumns)
249
- throw new Error('No dataframe with maceomolecule columns open');
250
-
251
-
252
- const targetColumnInput = ui.columnInput(
253
- 'Column', grok.shell.t, targetColumns[0], null,
254
- {filter: (col: DG.Column) => col.semType === DG.SEMTYPE.MACROMOLECULE}
255
- );
256
-
257
- const div = ui.div([
258
- targetColumnInput,
259
- transformationChoice,
260
- cyclizationTypeChoice,
261
- terminalControls,
262
- ]);
263
-
264
- const dialog = ui.dialog('Poly Tool')
265
- .add(div)
266
- .onOK(async () => {
267
- const molCol = targetColumnInput.value;
268
- if (!molCol) {
269
- grok.shell.warning('No marcomolecule column chosen!');
270
- return;
271
- }
272
- addTransformedColumn(molCol!, meta);
273
- }
274
- );
275
-
276
- return dialog;
174
+ await grok.data.detectSemanticTypes(df);
277
175
  }