@datagrok/bio 2.4.19 → 2.4.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -5,7 +5,7 @@
5
5
  "name": "Leonid Stolbov",
6
6
  "email": "lstolbov@datagrok.ai"
7
7
  },
8
- "version": "2.4.19",
8
+ "version": "2.4.24",
9
9
  "description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
10
10
  "repository": {
11
11
  "type": "git",
@@ -14,10 +14,10 @@
14
14
  },
15
15
  "dependencies": {
16
16
  "@biowasm/aioli": "^3.1.0",
17
- "@datagrok-libraries/bio": "^5.29.3",
17
+ "@datagrok-libraries/bio": "^5.30.0",
18
18
  "@datagrok-libraries/chem-meta": "^1.0.1",
19
19
  "@datagrok-libraries/ml": "^6.3.23",
20
- "@datagrok-libraries/tutorials": "^1.3.1",
20
+ "@datagrok-libraries/tutorials": "^1.3.2",
21
21
  "@datagrok-libraries/utils": "^2.1.3",
22
22
  "cash-dom": "^8.0.0",
23
23
  "css-loader": "^6.7.3",
@@ -9,7 +9,7 @@ import {handleError} from './utils';
9
9
  import {SequenceDiversityViewer} from '../analysis/sequence-diversity-viewer';
10
10
  import {SequenceSimilarityViewer} from '../analysis/sequence-similarity-viewer';
11
11
 
12
- const dataFn: string = 'data/sample_FASTA_DNA.csv';
12
+ const dataFn: string = 'data/sample_FASTA_PT_activity.csv';
13
13
 
14
14
  export async function demoBio01UI() {
15
15
  let view: DG.TableView;
@@ -27,8 +27,12 @@ export async function demoBio01UI() {
27
27
  df = await _package.files.readCsv(dataFn);
28
28
  view = grok.shell.addTableView(df);
29
29
 
30
- view.grid.columns.byName('id')!.width = 0;
31
- view.grid.columns.byName('sequence')!.width = 500;
30
+ view.grid.columns.byName('cluster')!.visible = false;
31
+ view.grid.columns.byName('sequence_id')!.visible = false;
32
+ view.grid.columns.byName('sequence')!.width = 300;
33
+ view.grid.columns.byName('activity')!.visible = false;
34
+ view.grid.columns.byName('is_cliff')!.visible = false;
35
+
32
36
  // TODO: Fix column width
33
37
  }, {
34
38
  description: `Load dataset with macromolecules of 'fasta' notation, 'DNA' alphabet.`,
@@ -11,7 +11,7 @@ import {getDendrogramService, IDendrogramService} from '@datagrok-libraries/bio/
11
11
  import {demoSequenceSpace, handleError} from './utils';
12
12
  import {DemoScript} from '@datagrok-libraries/tutorials/src/demo-script';
13
13
 
14
- const dataFn = 'data/sample_FASTA_DNA.csv';
14
+ const dataFn = 'data/sample_FASTA_PT_activity.csv';
15
15
  const seqColName = 'sequence';
16
16
 
17
17
  export async function demoBio01aUI() {
@@ -21,7 +21,7 @@ export async function demoBio01aUI() {
21
21
  let df: DG.DataFrame;
22
22
  let spViewer: DG.ScatterPlotViewer;
23
23
 
24
- const method: string = 'UMAP';
24
+ const dimRedMethod: string = 'UMAP';
25
25
  const idRows: { [id: number]: number } = {};
26
26
  const embedCols: { [colName: string]: DG.Column<number> } = {};
27
27
 
@@ -38,6 +38,9 @@ export async function demoBio01aUI() {
38
38
  ]);
39
39
  view = grok.shell.addTableView(df);
40
40
  view.grid.props.rowHeight = 22;
41
+ view.grid.columns.byName('cluster')!.visible = false;
42
+ view.grid.columns.byName('sequence')!.width = 200;
43
+ view.grid.columns.byName('is_cliff')!.visible = false;
41
44
 
42
45
  grok.shell.windows.showContextPanel = false;
43
46
  grok.shell.windows.showProperties = false;
@@ -46,7 +49,7 @@ export async function demoBio01aUI() {
46
49
  delay: 2000,
47
50
  })
48
51
  .step('Build sequence space', async () => {
49
- spViewer = await demoSequenceSpace(view, df, seqColName, method);
52
+ spViewer = await demoSequenceSpace(view, df, seqColName, dimRedMethod);
50
53
  }, {
51
54
  description: `Reduce sequence space dimensionality to display on 2D representation.`,
52
55
  delay: 2000
@@ -71,7 +74,10 @@ export async function demoBio01aUI() {
71
74
  delay: 2000,
72
75
  })
73
76
  .step('Select a bunch of sequences', async () => {
74
- df.selection.init((idx: number) => [21, 9, 58].includes(idx));
77
+ const seqIdCol: DG.Column<string> = df.getCol('sequence_id');
78
+ df.selection.init((rowI: number) => {
79
+ return ['c0_seq120', 'c0_seq105', 'c0_seq121', 'c0_seq93'].includes(seqIdCol.get(rowI)!);
80
+ });
75
81
  df.currentRowIdx = 27;
76
82
  }, {
77
83
  description: 'Selecting a group of rows from a data frame to show their similarity and proximity to each other on a viewer..',
@@ -13,7 +13,7 @@ import {getDendrogramService, IDendrogramService} from '@datagrok-libraries/bio/
13
13
  import {handleError} from './utils';
14
14
  import {DemoScript} from '@datagrok-libraries/tutorials/src/demo-script';
15
15
 
16
- const dataFn: string = 'samples/sample_FASTA.csv';
16
+ const dataFn: string = 'data/sample_FASTA_PT_activity.csv';
17
17
 
18
18
  export async function demoBio01bUI() {
19
19
  let treeHelper: ITreeHelper;
@@ -23,7 +23,7 @@ export async function demoBio01bUI() {
23
23
  let view: DG.TableView;
24
24
  let activityCliffsViewer: DG.ScatterPlotViewer;
25
25
 
26
- const method: string = 'UMAP';
26
+ const dimRedMethod: string = 'UMAP';
27
27
  const idRows: { [id: number]: number } = {};
28
28
 
29
29
  try {
@@ -43,10 +43,9 @@ export async function demoBio01bUI() {
43
43
 
44
44
  view = grok.shell.addTableView(df);
45
45
  view.grid.props.rowHeight = 22;
46
- const uniProtKbGCol = view.grid.columns.byName('UniProtKB')!;
47
- uniProtKbGCol.width = 75;
48
- const lengthGCol = view.grid.columns.byName('Length')!;
49
- lengthGCol.width = 0;
46
+ view.grid.columns.byName('cluster')!.visible = false;
47
+ view.grid.columns.byName('sequence')!.width = 300;
48
+ view.grid.columns.byName('is_cliff')!.visible = false;
50
49
  }, {
51
50
  description: 'Load dataset with macromolecules of \'fasta\' notation, \'DNA\' alphabet.',
52
51
  delay: 2000,
@@ -54,7 +53,7 @@ export async function demoBio01bUI() {
54
53
  .step('Find activity cliffs', async () => {
55
54
  activityCliffsViewer = (await activityCliffs(
56
55
  df, df.getCol('Sequence'), df.getCol('Activity'),
57
- 80, method)) as DG.ScatterPlotViewer;
56
+ 80, dimRedMethod)) as DG.ScatterPlotViewer;
58
57
  view.dockManager.dock(activityCliffsViewer, DG.DOCK_TYPE.RIGHT, null, 'Activity Cliffs', 0.35);
59
58
 
60
59
  // Show grid viewer with the cliffs
@@ -86,7 +85,7 @@ export async function demoBio01bUI() {
86
85
  //cliffsDfGrid.dataFrame.currentRowIdx = -1; // reset
87
86
  const cliffsDfGrid: DG.Grid = activityCliffsViewer.dataFrame.temp[acTEMPS.cliffsDfGrid];
88
87
  //cliffsDfGrid.dataFrame.selection.init((i) => i == currentCliffIdx);
89
- cliffsDfGrid.dataFrame.currentRowIdx = 0;
88
+ if (cliffsDfGrid.dataFrame.rowCount > 0) cliffsDfGrid.dataFrame.currentRowIdx = 0;
90
89
  //cliffsDfGrid.dataFrame.selection.set(currentCliffIdx, true, true);
91
90
 
92
91
  // /* workaround to select rows of the cliff */
@@ -22,6 +22,7 @@ export async function demoBio05UI(): Promise<void> {
22
22
 
23
23
  const helmColName: string = 'HELM';
24
24
  const msaHelmColName: string = 'msa(HELM)';
25
+ const dimRedMethod: string = 'UMAP';
25
26
 
26
27
  try {
27
28
  const demoScript = new DemoScript(
@@ -37,7 +38,7 @@ export async function demoBio05UI(): Promise<void> {
37
38
  description: 'Load dataset with macromolecules of \'Helm\' notation.',
38
39
  delay: 2000,
39
40
  })
40
- .step('Align paptides with non-natural aminoacids with PepSeA', async () => {
41
+ .step('Align peptides with non-natural aminoacids with PepSeA', async () => {
41
42
  helmCol = df.getCol(helmColName);
42
43
  const method: string = pepseaMethods[0];
43
44
  const gapOpen: number = 1.53;
@@ -50,9 +51,8 @@ export async function demoBio05UI(): Promise<void> {
50
51
  delay: 2000,
51
52
  })
52
53
  .step('Build sequence space', async () => {
53
- const method: string = 'UMAP';
54
54
  ssViewer = (await sequenceSpaceTopMenu(df, msaHelmCol,
55
- 'UMAP', StringMetricsNames.Levenshtein, true)) as DG.ScatterPlotViewer;
55
+ dimRedMethod, StringMetricsNames.Levenshtein, true)) as DG.ScatterPlotViewer;
56
56
  view.dockManager.dock(ssViewer, DG.DOCK_TYPE.RIGHT, null, 'Sequence Space', 0.35);
57
57
  }, {
58
58
  description: 'Reduce sequence space dimensionality to display on 2D representation.',
package/src/demo/utils.ts CHANGED
@@ -52,18 +52,6 @@ export async function demoSequenceSpace(
52
52
  embedCol.init((rowI) => { return embedColData[rowI]; });
53
53
  }
54
54
 
55
- const rowCount: number = df.rowCount;
56
- const idCol: DG.Column = df.getCol('id');
57
- for (let idRowI = 0; idRowI < rowCount; idRowI++) {
58
- const id = idCol.get(idRowI);
59
- //idRows[id] = idRowI;
60
- }
61
-
62
- for (const embedColName of Object.values(EMBED_COL_NAMES)) {
63
- const embedCol: DG.Column<number> = df.getCol(embedColName);
64
- //embedCols[embedColName] = embedCol;
65
- }
66
-
67
55
  const t3: number = Date.now();
68
56
  _package.logger.debug('MLB: MlbVrSpaceBrowser.buildView(), postprocess reduceDimensionality ' +
69
57
  `ET: ${((t3 - t2) / 1000)} s`);
@@ -22,6 +22,7 @@ import './tests/substructure-filters-tests';
22
22
  import './tests/pepsea-tests';
23
23
  import './tests/viewers';
24
24
  import './tests/units-handler-tests';
25
+ import './tests/to-atomic-level-tests';
25
26
  import './tests/mm-distance-tests';
26
27
 
27
28
  // Tests hanging github CI
package/src/package.ts CHANGED
@@ -9,7 +9,7 @@ import {
9
9
  } from './utils/cell-renderer';
10
10
  import {VdRegionsViewer} from './viewers/vd-regions-viewer';
11
11
  import {SequenceAlignment} from './seq_align';
12
- import {getEmbeddingColsNames, sequenceSpaceByFingerprints} from './analysis/sequence-space';
12
+ import {getEmbeddingColsNames, sequenceSpaceByFingerprints, getSequenceSpace} from './analysis/sequence-space';
13
13
  import {getActivityCliffs} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
14
14
  import {
15
15
  createLinesGrid,
@@ -290,19 +290,23 @@ export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column,
290
290
  'separator': macroMolecule.getTag(bioTAGS.separator),
291
291
  'alphabet': macroMolecule.getTag(bioTAGS.alphabet),
292
292
  };
293
+ const uh = new UnitsHandler(macroMolecule);
294
+ let columnDistanceMetric = 'Tanimoto';
295
+ if (uh.isFasta())
296
+ columnDistanceMetric = uh.getDistanceFunctionName();
293
297
  const sp = await getActivityCliffs(
294
298
  df,
295
299
  macroMolecule,
296
300
  null,
297
301
  axesNames,
298
- 'Activity cliffs',
302
+ 'Activity cliffs', //scatterTitle
299
303
  activities,
300
304
  similarity,
301
- 'Tanimoto',
305
+ columnDistanceMetric, //similarityMetric
302
306
  methodName,
303
307
  DG.SEMTYPE.MACROMOLECULE,
304
308
  tags,
305
- sequenceSpaceByFingerprints,
309
+ getSequenceSpace,
306
310
  getChemSimilaritiesMatrix,
307
311
  createTooltipElement,
308
312
  createPropPanelElement,
@@ -353,7 +357,7 @@ export async function sequenceSpaceTopMenu(table: DG.DataFrame, macroMolecule: D
353
357
  embedAxesNames: embedColsNames,
354
358
  options: options
355
359
  };
356
- const sequenceSpaceRes = await sequenceSpaceByFingerprints(chemSpaceParams);
360
+ const sequenceSpaceRes = await getSequenceSpace(chemSpaceParams);
357
361
  const embeddings = sequenceSpaceRes.coordinates;
358
362
  for (const col of embeddings) {
359
363
  const listValues = col.toList();
@@ -407,9 +411,15 @@ export async function toAtomicLevel(df: DG.DataFrame, macroMolecule: DG.Column):
407
411
  }
408
412
  if (!checkInputColumnUI(macroMolecule, 'To Atomic Level'))
409
413
  return;
410
- const monomersLibFile = await _package.files.readAsText(HELM_CORE_LIB_FILENAME);
411
- const monomersLibObject: any[] = JSON.parse(monomersLibFile);
412
- await _toAtomicLevel(df, macroMolecule, monomersLibObject);
414
+ const monomerLib: IMonomerLib = (await getMonomerLibHelper()).getBioLib();
415
+ const atomicLevelRes = await _toAtomicLevel(df, macroMolecule, monomerLib);
416
+ if (atomicLevelRes.col !== null) {
417
+ df.columns.add(atomicLevelRes.col, true);
418
+ await grok.data.detectSemanticTypes(df);
419
+ }
420
+
421
+ if (atomicLevelRes.warnings && atomicLevelRes.warnings.length > 0)
422
+ grok.shell.warning(ui.list(atomicLevelRes.warnings));
413
423
  }
414
424
 
415
425
  //top-menu: Bio | Alignment | MSA...
@@ -673,6 +683,7 @@ export function bioSubstructureFilter(): BioSubstructureFilter {
673
683
  //meta.demoPath: Bioinformatics | Similarity, Diversity
674
684
  //description: Sequence similarity tracking and evaluation dataset diversity
675
685
  //meta.path: /apps/Tutorials/Demo/Bioinformatics/Similarity,%20Diversity
686
+ //meta.isDemoScript: True
676
687
  export async function demoBioSimilarityDiversity(): Promise<void> {
677
688
  await demoBio01UI();
678
689
  }
@@ -682,6 +693,7 @@ export async function demoBioSimilarityDiversity(): Promise<void> {
682
693
  //meta.demoPath: Bioinformatics | Sequence Space
683
694
  //description: Exploring sequence space of Macromolecules, comparison with hierarchical clustering results
684
695
  //meta.path: /apps/Tutorials/Demo/Bioinformatics/Sequence%20Space
696
+ //meta.isDemoScript: True
685
697
  export async function demoBioSequenceSpace(): Promise<void> {
686
698
  await demoBio01aUI();
687
699
  }
@@ -691,6 +703,7 @@ export async function demoBioSequenceSpace(): Promise<void> {
691
703
  //meta.demoPath: Bioinformatics | Activity Cliffs
692
704
  //description: Activity Cliffs analysis on Macromolecules data
693
705
  //meta.path: /apps/Tutorials/Demo/Bioinformatics/Activity%20Cliffs
706
+ //meta.isDemoScript: True
694
707
  export async function demoBioActivityCliffs(): Promise<void> {
695
708
  await demoBio01bUI();
696
709
  }
@@ -700,6 +713,7 @@ export async function demoBioActivityCliffs(): Promise<void> {
700
713
  //meta.demoPath: Bioinformatics | Atomic Level
701
714
  //description: Atomic level structure of Macromolecules
702
715
  //meta.path: /apps/Tutorials/Demo/Bioinformatics/Atomic%20Level
716
+ //meta.isDemoScript: True
703
717
  export async function demoBioAtomicLevel(): Promise<void> {
704
718
  await demoBio03UI();
705
719
  }
@@ -709,6 +723,7 @@ export async function demoBioAtomicLevel(): Promise<void> {
709
723
  //meta.demoPath: Bioinformatics | Helm, MSA, Sequence Space
710
724
  //description: MSA and composition analysis on Helm data
711
725
  //meta.path: /apps/Tutorials/Demo/Bioinformatics/Helm,%20MSA,%20Sequence%20Space
726
+ //meta.isDemoScript: True
712
727
  export async function demoBioHelmMsaSequenceSpace(): Promise<void> {
713
728
  await demoBio05UI();
714
729
  }
@@ -53,9 +53,9 @@ Y-N-R-Q-W-Y-V
53
53
  M-K-P-S-E-Y-V
54
54
  `,
55
55
  helmPt: `seq
56
- PEPTIDE1{F.W.P.H.E.Y}$$$
57
- PEPTIDE1{Y.N.R.Q.W.Y.V}$$$
58
- PEPTIDE1{M.K.P.S.E.Y.V}$$$
56
+ PEPTIDE1{F.W.P.H.E.Y}$$$$
57
+ PEPTIDE1{Y.N.R.Q.W.Y.V}$$$$
58
+ PEPTIDE1{M.K.P.S.E.Y.V}$$$$
59
59
  `,
60
60
  fastaDna: `seq
61
61
  ACGTC
@@ -68,9 +68,9 @@ C/A/G/T/G/T
68
68
  T/T/C/A/A/C
69
69
  `,
70
70
  helmDna: `seq
71
- DNA1{D(A)P.D(C)P.D(G)P.D(T)P.D(C)P}$$$
72
- DNA1{D(C)P.D(A)P.D(G)P.D(T)P.D(G)P.D(T)P}$$$
73
- DNA1{D(T)P.D(T)P.D(C)P.D(A)P.D(A)P.D(C)P}$$$
71
+ DNA1{D(A)P.D(C)P.D(G)P.D(T)P.D(C)P}$$$$
72
+ DNA1{D(C)P.D(A)P.D(G)P.D(T)P.D(G)P.D(T)P}$$$$
73
+ DNA1{D(T)P.D(T)P.D(C)P.D(A)P.D(A)P.D(C)P}$$$$
74
74
  `,
75
75
  fastaRna: `seq
76
76
  ACGUC
@@ -83,9 +83,9 @@ C*A*G*U*G*U
83
83
  U*U*C*A*A*C
84
84
  `,
85
85
  helmRna: `seq
86
- RNA1{R(A)P.R(C)P.R(G)P.R(U)P.R(C)P}$$$
87
- RNA1{R(C)P.R(A)P.R(G)P.R(U)P.R(G)P.R(U)P}$$$
88
- RNA1{R(U)P.R(U)P.R(C)P.R(A)P.R(A)P.R(C)P}$$$
86
+ RNA1{R(A)P.R(C)P.R(G)P.R(U)P.R(C)P}$$$$
87
+ RNA1{R(C)P.R(A)P.R(G)P.R(U)P.R(G)P.R(U)P}$$$$
88
+ RNA1{R(U)P.R(U)P.R(C)P.R(A)P.R(A)P.R(C)P}$$$$
89
89
  `,
90
90
  fastaGaps: `seq
91
91
  FW-PH-EYY
@@ -98,9 +98,9 @@ F/Y/N/R/Q/W/Y/V/
98
98
  F/K/P//Q//S/E/Y/V
99
99
  `,
100
100
  helmGaps: `seq
101
- PEPTIDE1{F.W.*.P.H.*.E.Y.Y}$$$
102
- PEPTIDE1{F.Y.N.R.Q.W.Y.V.*}$$$
103
- PEPTIDE1{F.K.P.*.Q.*.S.E.Y.V}$$$
101
+ PEPTIDE1{F.W.*.P.H.*.E.Y.Y}$$$$
102
+ PEPTIDE1{F.Y.N.R.Q.W.Y.V.*}$$$$
103
+ PEPTIDE1{F.K.P.*.Q.*.S.E.Y.V}$$$$
104
104
  `,
105
105
 
106
106
  fastaUn: `seq
@@ -114,24 +114,24 @@ meI-hHis-Aca-Cys_SEt-T-dK-Thr_PO3H2-Aca-Tyr_PO3H2
114
114
  Lys_Boc-hHis-Aca-Cys_SEt-T-dK-Thr_PO3H2-Aca-Tyr_PO3H2
115
115
  `,
116
116
  helmUn: `seq
117
- PEPTIDE1{meI.hHis.Aca.N.T.dE.Thr_PO3H2.Aca.D}$$$
118
- PEPTIDE1{meI.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.Aca.Tyr_PO3H2}$$$
119
- PEPTIDE1{Lys_Boc.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.Aca.Tyr_PO3H2}$$$
117
+ PEPTIDE1{meI.hHis.Aca.N.T.dE.Thr_PO3H2.Aca.D}$$$$
118
+ PEPTIDE1{meI.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.Aca.Tyr_PO3H2}$$$$
119
+ PEPTIDE1{Lys_Boc.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.Aca.Tyr_PO3H2}$$$$
120
120
  `,
121
121
  helmLoneDeoxyribose: `seq
122
- DNA1{D(A).D(C).D(G).D(T).D(C)}$$$
123
- DNA1{D(C).D(A).D(G).D(T).D(G).D(T)P}$$$
124
- DNA1{D(T).D(T).D(C).D(A).D(A).D(C)P}$$$
122
+ DNA1{D(A).D(C).D(G).D(T).D(C)}$$$$
123
+ DNA1{D(C).D(A).D(G).D(T).D(G).D(T)P}$$$$
124
+ DNA1{D(T).D(T).D(C).D(A).D(A).D(C)P}$$$$
125
125
  `,
126
126
  helmLoneRibose: `seq
127
- RNA1{R(A).R(C).R(G).R(U).R(C)}$$$
128
- RNA1{R(C).R(A).R(G).R(U).R(G).R(U)P}$$$
129
- RNA1{R(U).R(U).R(C).R(A).R(A).R(C)P}$$$
127
+ RNA1{R(A).R(C).R(G).R(U).R(C)}$$$$
128
+ RNA1{R(C).R(A).R(G).R(U).R(G).R(U)P}$$$$
129
+ RNA1{R(U).R(U).R(C).R(A).R(A).R(C)P}$$$$
130
130
  `,
131
131
  helmLonePhosphorus: `seq
132
- RNA1{P.P.R(A)P.R(C)P.R(G)P.R(U)P.R(C)P}$$$
133
- RNA1{P.P.R(C)P.R(A)P.P.R(G)P.R(U)P.R(G)P.R(U)P}$$$
134
- RNA1{P.R(U)P.R(U)P.R(C)P.R(A)P.R(A)P.R(C)P.P.P}$$$
132
+ RNA1{P.P.R(A)P.R(C)P.R(G)P.R(U)P.R(C)P}$$$$
133
+ RNA1{P.P.R(C)P.R(A)P.P.R(G)P.R(U)P.R(G)P.R(U)P}$$$$
134
+ RNA1{P.R(U)P.R(U)P.R(C)P.R(A)P.R(A)P.R(C)P.P.P}$$$$
135
135
  `,
136
136
  };
137
137
 
@@ -0,0 +1,187 @@
1
+ /* Do not change these import lines to match external modules in webpack configuration */
2
+ import * as grok from 'datagrok-api/grok';
3
+ import * as ui from 'datagrok-api/ui';
4
+ import * as DG from 'datagrok-api/dg';
5
+
6
+ import {before, after, category, test, expectArray} from '@datagrok-libraries/utils/src/test';
7
+
8
+ import {getMonomerLibHelper, toAtomicLevel} from '../package';
9
+ import {_toAtomicLevel} from '@datagrok-libraries/bio/src/monomer-works/to-atomic-level';
10
+ import {IMonomerLib} from '@datagrok-libraries/bio/src/types/index';
11
+ import {IMonomerLibHelper} from '@datagrok-libraries/bio/src/monomer-works/monomer-utils';
12
+ import {LIB_STORAGE_NAME} from '../utils/monomer-lib';
13
+
14
+ const appPath = 'System:AppData/Bio';
15
+ const fileSource = new DG.FileSource(appPath);
16
+
17
+ const testNames: { [k: string]: string } = {
18
+ PT: 'peptides fasta',
19
+ DNA: 'dna fasta',
20
+ MSA: 'msa separator',
21
+ };
22
+
23
+ const inputPath: { [k: string]: string } = {
24
+ PT: 'tests/to-atomic-level-peptides-fasta-input.csv',
25
+ DNA: 'tests/to-atomic-level-dna-fasta-input.csv',
26
+ MSA: 'tests/to-atomic-level-msa-separator-input.csv',
27
+ };
28
+
29
+ const outputPath: { [k: string]: string } = {
30
+ PT: 'tests/to-atomic-level-peptides-output.csv',
31
+ DNA: 'tests/to-atomic-level-dna-output.csv',
32
+ MSA: 'tests/to-atomic-level-msa-output.csv',
33
+ };
34
+
35
+ const inputColName = 'sequence';
36
+ const outputColName = 'molfile(sequence)';
37
+
38
+ category('toAtomicLevel', async () => {
39
+ const sourceDf: { [key: string]: DG.DataFrame } = {};
40
+ const targetDf: { [key: string]: DG.DataFrame } = {};
41
+
42
+ let monomerLibHelper: IMonomerLibHelper;
43
+ /** Backup actual user's monomer libraries settings */
44
+ let userLibrariesSettings: any = null;
45
+
46
+ before(async () => {
47
+ monomerLibHelper = await getMonomerLibHelper();
48
+ userLibrariesSettings = await grok.dapi.userDataStorage.get(LIB_STORAGE_NAME, true);
49
+ // Clear settings to test default
50
+ await grok.dapi.userDataStorage.put(LIB_STORAGE_NAME, {}, true);
51
+ await monomerLibHelper.loadLibraries(true);
52
+
53
+ for (const key in testNames) {
54
+ sourceDf[key] = await fileSource.readCsv(inputPath[key]);
55
+ await grok.data.detectSemanticTypes(sourceDf[key]);
56
+ targetDf[key] = await fileSource.readCsv(outputPath[key]);
57
+ }
58
+ });
59
+
60
+ after(async () => {
61
+ await grok.dapi.userDataStorage.put(LIB_STORAGE_NAME, userLibrariesSettings, true);
62
+ await monomerLibHelper.loadLibraries(true);
63
+ });
64
+
65
+ async function getTestResult(source: DG.DataFrame, target: DG.DataFrame): Promise<void> {
66
+ const inputCol = source.getCol(inputColName);
67
+ await toAtomicLevel(source, inputCol);
68
+ const obtainedCol = source.getCol(outputColName);
69
+ const expectedCol = target.getCol(outputColName);
70
+ const obtainedArray = [...obtainedCol.values()];
71
+ const expectedArray = [...expectedCol.values()];
72
+ expectArray(obtainedArray, expectedArray);
73
+ }
74
+
75
+ for (const key in testNames) {
76
+ test(`${testNames[key]}`, async () => {
77
+ await getTestResult(sourceDf[key], targetDf[key]);
78
+ }, {skipReason: 'GROK-13100'});
79
+ }
80
+
81
+ enum csvTests {
82
+ fastaDna = 'fastaDna',
83
+ fastaRna = 'fastaRna',
84
+ fastaPt = 'fastaPt',
85
+
86
+ separatorDna = 'separatorDna',
87
+ separatorRna = 'separatorRna',
88
+ separatorPt = 'separatorPt',
89
+ separatorUn = 'separatorUn',
90
+
91
+ helm = 'helm',
92
+ }
93
+
94
+ const csvData: { [key in csvTests]: string } = {
95
+ [csvTests.fastaDna]: `seq
96
+ ACGTC
97
+ CAGTGT
98
+ TTCAAC
99
+ `,
100
+ [csvTests.fastaRna]: `seq
101
+ ACGUC
102
+ CAGUGU
103
+ UUCAAC
104
+ `,
105
+ [csvTests.fastaPt]: `seq
106
+ FWPHEY
107
+ YNRQWYV
108
+ MKPSEYV
109
+ `,
110
+ [csvTests.separatorDna]: `seq
111
+ A/C/G/T/C
112
+ C/A/G/T/G/T
113
+ T/T/C/A/A/C
114
+ `,
115
+ [csvTests.separatorRna]: `seq
116
+ A*C*G*U*C
117
+ C*A*G*U*G*U
118
+ U*U*C*A*A*C
119
+ `,
120
+ [csvTests.separatorPt]: `seq
121
+ F-W-P-H-E-Y
122
+ Y-N-R-Q-W-Y-V
123
+ M-K-P-S-E-Y-V
124
+ `,
125
+ [csvTests.separatorUn]: `seq
126
+ meI-hHis-Aca-N-T-dE-Thr_PO3H2-Aca-D
127
+ meI-hHis-Aca-Cys_SEt-T-dK-Thr_PO3H2-Aca-Tyr_PO3H2
128
+ Lys_Boc-hHis-Aca-Cys_SEt-T-dK-Thr_PO3H2-Aca-Tyr_PO3H2
129
+ `,
130
+
131
+ [csvTests.helm]: `seq
132
+ PEPTIDE1{meI.D-gGlu.Aca.N.T.dE.Thr_PO3H2.Aca.D}$$$
133
+ PEPTIDE1{meI.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.Aca.Tyr_PO3H2}$$$
134
+ PEPTIDE1{Lys_Boc.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.Aca.Tyr_PO3H2}$$$
135
+ `,
136
+ };
137
+
138
+ /** Also detects semantic types
139
+ * @param {string} key
140
+ * @return {Promise<DG.DataFrame>}
141
+ */
142
+ async function readCsv(key: csvTests): Promise<DG.DataFrame> {
143
+ // Always recreate test data frame from CSV for reproducible detector behavior in tests.
144
+ const csv: string = csvData[key];
145
+ const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
146
+ await grok.data.detectSemanticTypes(df);
147
+ return df;
148
+ }
149
+
150
+ test('fastaDna', async () => {
151
+ await _testToAtomicLevel(await readCsv(csvTests.fastaDna), 'seq', monomerLibHelper);
152
+ });
153
+
154
+ test('fastaRna', async () => {
155
+ await _testToAtomicLevel(await readCsv(csvTests.fastaRna), 'seq', monomerLibHelper);
156
+ });
157
+
158
+ test('fastaPt', async () => {
159
+ await _testToAtomicLevel(await readCsv(csvTests.fastaPt), 'seq', monomerLibHelper);
160
+ });
161
+
162
+ test('separatorDna', async () => {
163
+ await _testToAtomicLevel(await readCsv(csvTests.separatorDna), 'seq', monomerLibHelper);
164
+ });
165
+
166
+ test('separatorDna', async () => {
167
+ await _testToAtomicLevel(await readCsv(csvTests.separatorRna), 'seq', monomerLibHelper);
168
+ });
169
+
170
+ test('separatorPt', async () => {
171
+ await _testToAtomicLevel(await readCsv(csvTests.separatorPt), 'seq', monomerLibHelper);
172
+ });
173
+
174
+ test('separatorUn', async () => {
175
+ await _testToAtomicLevel(await readCsv(csvTests.separatorUn), 'seq', monomerLibHelper);
176
+ });
177
+
178
+ test('helm', async () => {
179
+ await _testToAtomicLevel(await readCsv(csvTests.helm), 'seq', monomerLibHelper);
180
+ });
181
+ });
182
+
183
+ async function _testToAtomicLevel(df: DG.DataFrame, seqColName: string = 'seq', monomerLibHelper: IMonomerLibHelper) {
184
+ const seqCol: DG.Column<string> = df.getCol(seqColName);
185
+ const monomerLib: IMonomerLib = monomerLibHelper.getBioLib();
186
+ const resCol = await _toAtomicLevel(df, seqCol, monomerLib);
187
+ }
@@ -44,8 +44,8 @@ export async function multipleSequenceAlignmentUI(options: multipleSequenceAlgin
44
44
  let performAlignment: (() => Promise<DG.Column<string>>) | undefined;
45
45
 
46
46
  // TODO: allow only macromolecule colums to be chosen
47
- const colInput = ui.columnInput('Sequence', table, seqCol, () => {
48
- performAlignment = onColInputChange(
47
+ const colInput = ui.columnInput('Sequence', table, seqCol, async () => {
48
+ performAlignment = await onColInputChange(
49
49
  colInput.value, table, inputRootStyles, methodInput, clustersColInput, gapOpenInput, gapExtendInput);
50
50
  }
51
51
  ) as DG.InputBase<DG.Column<string>>;
@@ -55,7 +55,7 @@ export async function multipleSequenceAlignmentUI(options: multipleSequenceAlgin
55
55
  colInput.fireChanged();
56
56
  //if column is specified (from tests), run alignment and resolve with the result
57
57
  if (options.col) {
58
- performAlignment = onColInputChange(
58
+ performAlignment = await onColInputChange(
59
59
  options.col, table, inputRootStyles, methodInput, clustersColInput, gapOpenInput, gapExtendInput);
60
60
 
61
61
  await onDialogOk(colInput, table, performAlignment, resolve, reject);
@@ -105,7 +105,7 @@ async function onDialogOk(
105
105
  }
106
106
 
107
107
 
108
- function onColInputChange(
108
+ async function onColInputChange(
109
109
  col: DG.Column<string>,
110
110
  table: DG.DataFrame,
111
111
  inputRootStyles: CSSStyleDeclaration[],
@@ -113,7 +113,7 @@ function onColInputChange(
113
113
  clustersColInput: DG.InputBase<DG.Column<any> | null>,
114
114
  gapOpenInput: DG.InputBase<number | null>,
115
115
  gapExtendInput: DG.InputBase<number | null>
116
- ): (() => Promise<DG.Column<string>>) | undefined {
116
+ ): Promise<(() => Promise<DG.Column<string>>) | undefined> {
117
117
  try {
118
118
  if (col.semType !== DG.SEMTYPE.MACROMOLECULE)
119
119
  return;
@@ -136,6 +136,19 @@ function onColInputChange(
136
136
 
137
137
  return async () => await runPepsea(col, unusedName, methodInput.value!,
138
138
  gapOpenInput.value!, gapExtendInput.value!, clustersColInput.value);
139
+ } else if (checkInputColumnUI(col, col.name, [NOTATION.SEPARATOR], [ALPHABET.UN], false)) {
140
+ //if the column is separator with unknown alphabet, it might be helm. check if it can be converted to helm
141
+ const potentialColNC = new NotationConverter(col);
142
+ if (!await potentialColNC.checkHelmCompatibility())
143
+ return;
144
+ const helmCol = potentialColNC.convert(NOTATION.HELM);
145
+ for (const inputRootStyle of inputRootStyles)
146
+ inputRootStyle.removeProperty('display');
147
+ console.log(helmCol.toList());
148
+ // convert to helm and assign alignment function to PepSea
149
+
150
+ return async () => await runPepsea(helmCol, unusedName, methodInput.value!,
151
+ gapOpenInput.value!, gapExtendInput.value!, clustersColInput.value);
139
152
  } else {
140
153
  for (const inputRootStyle of inputRootStyles)
141
154
  inputRootStyle.display = 'none';