@datagrok/bio 2.4.19 → 2.4.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -5,7 +5,7 @@
5
5
  "name": "Leonid Stolbov",
6
6
  "email": "lstolbov@datagrok.ai"
7
7
  },
8
- "version": "2.4.19",
8
+ "version": "2.4.23",
9
9
  "description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
10
10
  "repository": {
11
11
  "type": "git",
@@ -14,10 +14,10 @@
14
14
  },
15
15
  "dependencies": {
16
16
  "@biowasm/aioli": "^3.1.0",
17
- "@datagrok-libraries/bio": "^5.29.3",
17
+ "@datagrok-libraries/bio": "^5.30.0",
18
18
  "@datagrok-libraries/chem-meta": "^1.0.1",
19
19
  "@datagrok-libraries/ml": "^6.3.23",
20
- "@datagrok-libraries/tutorials": "^1.3.1",
20
+ "@datagrok-libraries/tutorials": "^1.3.2",
21
21
  "@datagrok-libraries/utils": "^2.1.3",
22
22
  "cash-dom": "^8.0.0",
23
23
  "css-loader": "^6.7.3",
@@ -9,7 +9,7 @@ import {handleError} from './utils';
9
9
  import {SequenceDiversityViewer} from '../analysis/sequence-diversity-viewer';
10
10
  import {SequenceSimilarityViewer} from '../analysis/sequence-similarity-viewer';
11
11
 
12
- const dataFn: string = 'data/sample_FASTA_DNA.csv';
12
+ const dataFn: string = 'data/sample_FASTA_PT_activity.csv';
13
13
 
14
14
  export async function demoBio01UI() {
15
15
  let view: DG.TableView;
@@ -27,8 +27,12 @@ export async function demoBio01UI() {
27
27
  df = await _package.files.readCsv(dataFn);
28
28
  view = grok.shell.addTableView(df);
29
29
 
30
- view.grid.columns.byName('id')!.width = 0;
31
- view.grid.columns.byName('sequence')!.width = 500;
30
+ view.grid.columns.byName('cluster')!.visible = false;
31
+ view.grid.columns.byName('sequence_id')!.visible = false;
32
+ view.grid.columns.byName('sequence')!.width = 300;
33
+ view.grid.columns.byName('activity')!.visible = false;
34
+ view.grid.columns.byName('is_cliff')!.visible = false;
35
+
32
36
  // TODO: Fix column width
33
37
  }, {
34
38
  description: `Load dataset with macromolecules of 'fasta' notation, 'DNA' alphabet.`,
@@ -11,7 +11,7 @@ import {getDendrogramService, IDendrogramService} from '@datagrok-libraries/bio/
11
11
  import {demoSequenceSpace, handleError} from './utils';
12
12
  import {DemoScript} from '@datagrok-libraries/tutorials/src/demo-script';
13
13
 
14
- const dataFn = 'data/sample_FASTA_DNA.csv';
14
+ const dataFn = 'data/sample_FASTA_PT_activity.csv';
15
15
  const seqColName = 'sequence';
16
16
 
17
17
  export async function demoBio01aUI() {
@@ -21,7 +21,7 @@ export async function demoBio01aUI() {
21
21
  let df: DG.DataFrame;
22
22
  let spViewer: DG.ScatterPlotViewer;
23
23
 
24
- const method: string = 'UMAP';
24
+ const dimRedMethod: string = 'UMAP';
25
25
  const idRows: { [id: number]: number } = {};
26
26
  const embedCols: { [colName: string]: DG.Column<number> } = {};
27
27
 
@@ -38,6 +38,9 @@ export async function demoBio01aUI() {
38
38
  ]);
39
39
  view = grok.shell.addTableView(df);
40
40
  view.grid.props.rowHeight = 22;
41
+ view.grid.columns.byName('cluster')!.visible = false;
42
+ view.grid.columns.byName('sequence')!.width = 200;
43
+ view.grid.columns.byName('is_cliff')!.visible = false;
41
44
 
42
45
  grok.shell.windows.showContextPanel = false;
43
46
  grok.shell.windows.showProperties = false;
@@ -46,7 +49,7 @@ export async function demoBio01aUI() {
46
49
  delay: 2000,
47
50
  })
48
51
  .step('Build sequence space', async () => {
49
- spViewer = await demoSequenceSpace(view, df, seqColName, method);
52
+ spViewer = await demoSequenceSpace(view, df, seqColName, dimRedMethod);
50
53
  }, {
51
54
  description: `Reduce sequence space dimensionality to display on 2D representation.`,
52
55
  delay: 2000
@@ -71,7 +74,10 @@ export async function demoBio01aUI() {
71
74
  delay: 2000,
72
75
  })
73
76
  .step('Select a bunch of sequences', async () => {
74
- df.selection.init((idx: number) => [21, 9, 58].includes(idx));
77
+ const seqIdCol: DG.Column<string> = df.getCol('sequence_id');
78
+ df.selection.init((rowI: number) => {
79
+ return ['c0_seq120', 'c0_seq105', 'c0_seq121', 'c0_seq93'].includes(seqIdCol.get(rowI)!);
80
+ });
75
81
  df.currentRowIdx = 27;
76
82
  }, {
77
83
  description: 'Selecting a group of rows from a data frame to show their similarity and proximity to each other on a viewer..',
@@ -13,7 +13,7 @@ import {getDendrogramService, IDendrogramService} from '@datagrok-libraries/bio/
13
13
  import {handleError} from './utils';
14
14
  import {DemoScript} from '@datagrok-libraries/tutorials/src/demo-script';
15
15
 
16
- const dataFn: string = 'samples/sample_FASTA.csv';
16
+ const dataFn: string = 'data/sample_FASTA_PT_activity.csv';
17
17
 
18
18
  export async function demoBio01bUI() {
19
19
  let treeHelper: ITreeHelper;
@@ -23,7 +23,7 @@ export async function demoBio01bUI() {
23
23
  let view: DG.TableView;
24
24
  let activityCliffsViewer: DG.ScatterPlotViewer;
25
25
 
26
- const method: string = 'UMAP';
26
+ const dimRedMethod: string = 'UMAP';
27
27
  const idRows: { [id: number]: number } = {};
28
28
 
29
29
  try {
@@ -43,10 +43,9 @@ export async function demoBio01bUI() {
43
43
 
44
44
  view = grok.shell.addTableView(df);
45
45
  view.grid.props.rowHeight = 22;
46
- const uniProtKbGCol = view.grid.columns.byName('UniProtKB')!;
47
- uniProtKbGCol.width = 75;
48
- const lengthGCol = view.grid.columns.byName('Length')!;
49
- lengthGCol.width = 0;
46
+ view.grid.columns.byName('cluster')!.visible = false;
47
+ view.grid.columns.byName('sequence')!.width = 300;
48
+ view.grid.columns.byName('is_cliff')!.visible = false;
50
49
  }, {
51
50
  description: 'Load dataset with macromolecules of \'fasta\' notation, \'DNA\' alphabet.',
52
51
  delay: 2000,
@@ -54,7 +53,7 @@ export async function demoBio01bUI() {
54
53
  .step('Find activity cliffs', async () => {
55
54
  activityCliffsViewer = (await activityCliffs(
56
55
  df, df.getCol('Sequence'), df.getCol('Activity'),
57
- 80, method)) as DG.ScatterPlotViewer;
56
+ 80, dimRedMethod)) as DG.ScatterPlotViewer;
58
57
  view.dockManager.dock(activityCliffsViewer, DG.DOCK_TYPE.RIGHT, null, 'Activity Cliffs', 0.35);
59
58
 
60
59
  // Show grid viewer with the cliffs
@@ -86,7 +85,7 @@ export async function demoBio01bUI() {
86
85
  //cliffsDfGrid.dataFrame.currentRowIdx = -1; // reset
87
86
  const cliffsDfGrid: DG.Grid = activityCliffsViewer.dataFrame.temp[acTEMPS.cliffsDfGrid];
88
87
  //cliffsDfGrid.dataFrame.selection.init((i) => i == currentCliffIdx);
89
- cliffsDfGrid.dataFrame.currentRowIdx = 0;
88
+ if (cliffsDfGrid.dataFrame.rowCount > 0) cliffsDfGrid.dataFrame.currentRowIdx = 0;
90
89
  //cliffsDfGrid.dataFrame.selection.set(currentCliffIdx, true, true);
91
90
 
92
91
  // /* workaround to select rows of the cliff */
@@ -22,6 +22,7 @@ export async function demoBio05UI(): Promise<void> {
22
22
 
23
23
  const helmColName: string = 'HELM';
24
24
  const msaHelmColName: string = 'msa(HELM)';
25
+ const dimRedMethod: string = 'UMAP';
25
26
 
26
27
  try {
27
28
  const demoScript = new DemoScript(
@@ -37,7 +38,7 @@ export async function demoBio05UI(): Promise<void> {
37
38
  description: 'Load dataset with macromolecules of \'Helm\' notation.',
38
39
  delay: 2000,
39
40
  })
40
- .step('Align paptides with non-natural aminoacids with PepSeA', async () => {
41
+ .step('Align peptides with non-natural aminoacids with PepSeA', async () => {
41
42
  helmCol = df.getCol(helmColName);
42
43
  const method: string = pepseaMethods[0];
43
44
  const gapOpen: number = 1.53;
@@ -50,9 +51,8 @@ export async function demoBio05UI(): Promise<void> {
50
51
  delay: 2000,
51
52
  })
52
53
  .step('Build sequence space', async () => {
53
- const method: string = 'UMAP';
54
54
  ssViewer = (await sequenceSpaceTopMenu(df, msaHelmCol,
55
- 'UMAP', StringMetricsNames.Levenshtein, true)) as DG.ScatterPlotViewer;
55
+ dimRedMethod, StringMetricsNames.Levenshtein, true)) as DG.ScatterPlotViewer;
56
56
  view.dockManager.dock(ssViewer, DG.DOCK_TYPE.RIGHT, null, 'Sequence Space', 0.35);
57
57
  }, {
58
58
  description: 'Reduce sequence space dimensionality to display on 2D representation.',
package/src/demo/utils.ts CHANGED
@@ -52,18 +52,6 @@ export async function demoSequenceSpace(
52
52
  embedCol.init((rowI) => { return embedColData[rowI]; });
53
53
  }
54
54
 
55
- const rowCount: number = df.rowCount;
56
- const idCol: DG.Column = df.getCol('id');
57
- for (let idRowI = 0; idRowI < rowCount; idRowI++) {
58
- const id = idCol.get(idRowI);
59
- //idRows[id] = idRowI;
60
- }
61
-
62
- for (const embedColName of Object.values(EMBED_COL_NAMES)) {
63
- const embedCol: DG.Column<number> = df.getCol(embedColName);
64
- //embedCols[embedColName] = embedCol;
65
- }
66
-
67
55
  const t3: number = Date.now();
68
56
  _package.logger.debug('MLB: MlbVrSpaceBrowser.buildView(), postprocess reduceDimensionality ' +
69
57
  `ET: ${((t3 - t2) / 1000)} s`);
@@ -22,6 +22,7 @@ import './tests/substructure-filters-tests';
22
22
  import './tests/pepsea-tests';
23
23
  import './tests/viewers';
24
24
  import './tests/units-handler-tests';
25
+ import './tests/to-atomic-level-tests';
25
26
  import './tests/mm-distance-tests';
26
27
 
27
28
  // Tests hanging github CI
package/src/package.ts CHANGED
@@ -9,7 +9,7 @@ import {
9
9
  } from './utils/cell-renderer';
10
10
  import {VdRegionsViewer} from './viewers/vd-regions-viewer';
11
11
  import {SequenceAlignment} from './seq_align';
12
- import {getEmbeddingColsNames, sequenceSpaceByFingerprints} from './analysis/sequence-space';
12
+ import {getEmbeddingColsNames, sequenceSpaceByFingerprints, getSequenceSpace} from './analysis/sequence-space';
13
13
  import {getActivityCliffs} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
14
14
  import {
15
15
  createLinesGrid,
@@ -290,19 +290,23 @@ export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column,
290
290
  'separator': macroMolecule.getTag(bioTAGS.separator),
291
291
  'alphabet': macroMolecule.getTag(bioTAGS.alphabet),
292
292
  };
293
+ const uh = new UnitsHandler(macroMolecule);
294
+ let columnDistanceMetric = 'Tanimoto';
295
+ if (uh.isFasta())
296
+ columnDistanceMetric = uh.getDistanceFunctionName();
293
297
  const sp = await getActivityCliffs(
294
298
  df,
295
299
  macroMolecule,
296
300
  null,
297
301
  axesNames,
298
- 'Activity cliffs',
302
+ 'Activity cliffs', //scatterTitle
299
303
  activities,
300
304
  similarity,
301
- 'Tanimoto',
305
+ columnDistanceMetric, //similarityMetric
302
306
  methodName,
303
307
  DG.SEMTYPE.MACROMOLECULE,
304
308
  tags,
305
- sequenceSpaceByFingerprints,
309
+ getSequenceSpace,
306
310
  getChemSimilaritiesMatrix,
307
311
  createTooltipElement,
308
312
  createPropPanelElement,
@@ -353,7 +357,7 @@ export async function sequenceSpaceTopMenu(table: DG.DataFrame, macroMolecule: D
353
357
  embedAxesNames: embedColsNames,
354
358
  options: options
355
359
  };
356
- const sequenceSpaceRes = await sequenceSpaceByFingerprints(chemSpaceParams);
360
+ const sequenceSpaceRes = await getSequenceSpace(chemSpaceParams);
357
361
  const embeddings = sequenceSpaceRes.coordinates;
358
362
  for (const col of embeddings) {
359
363
  const listValues = col.toList();
@@ -407,9 +411,15 @@ export async function toAtomicLevel(df: DG.DataFrame, macroMolecule: DG.Column):
407
411
  }
408
412
  if (!checkInputColumnUI(macroMolecule, 'To Atomic Level'))
409
413
  return;
410
- const monomersLibFile = await _package.files.readAsText(HELM_CORE_LIB_FILENAME);
411
- const monomersLibObject: any[] = JSON.parse(monomersLibFile);
412
- await _toAtomicLevel(df, macroMolecule, monomersLibObject);
414
+ const monomerLib: IMonomerLib = (await getMonomerLibHelper()).getBioLib();
415
+ const atomicLevelRes = await _toAtomicLevel(df, macroMolecule, monomerLib);
416
+ if (atomicLevelRes.col !== null) {
417
+ df.columns.add(atomicLevelRes.col, true);
418
+ await grok.data.detectSemanticTypes(df);
419
+ }
420
+
421
+ if (atomicLevelRes.warnings && atomicLevelRes.warnings.length > 0)
422
+ grok.shell.warning(ui.list(atomicLevelRes.warnings));
413
423
  }
414
424
 
415
425
  //top-menu: Bio | Alignment | MSA...
@@ -53,9 +53,9 @@ Y-N-R-Q-W-Y-V
53
53
  M-K-P-S-E-Y-V
54
54
  `,
55
55
  helmPt: `seq
56
- PEPTIDE1{F.W.P.H.E.Y}$$$
57
- PEPTIDE1{Y.N.R.Q.W.Y.V}$$$
58
- PEPTIDE1{M.K.P.S.E.Y.V}$$$
56
+ PEPTIDE1{F.W.P.H.E.Y}$$$$
57
+ PEPTIDE1{Y.N.R.Q.W.Y.V}$$$$
58
+ PEPTIDE1{M.K.P.S.E.Y.V}$$$$
59
59
  `,
60
60
  fastaDna: `seq
61
61
  ACGTC
@@ -68,9 +68,9 @@ C/A/G/T/G/T
68
68
  T/T/C/A/A/C
69
69
  `,
70
70
  helmDna: `seq
71
- DNA1{D(A)P.D(C)P.D(G)P.D(T)P.D(C)P}$$$
72
- DNA1{D(C)P.D(A)P.D(G)P.D(T)P.D(G)P.D(T)P}$$$
73
- DNA1{D(T)P.D(T)P.D(C)P.D(A)P.D(A)P.D(C)P}$$$
71
+ DNA1{D(A)P.D(C)P.D(G)P.D(T)P.D(C)P}$$$$
72
+ DNA1{D(C)P.D(A)P.D(G)P.D(T)P.D(G)P.D(T)P}$$$$
73
+ DNA1{D(T)P.D(T)P.D(C)P.D(A)P.D(A)P.D(C)P}$$$$
74
74
  `,
75
75
  fastaRna: `seq
76
76
  ACGUC
@@ -83,9 +83,9 @@ C*A*G*U*G*U
83
83
  U*U*C*A*A*C
84
84
  `,
85
85
  helmRna: `seq
86
- RNA1{R(A)P.R(C)P.R(G)P.R(U)P.R(C)P}$$$
87
- RNA1{R(C)P.R(A)P.R(G)P.R(U)P.R(G)P.R(U)P}$$$
88
- RNA1{R(U)P.R(U)P.R(C)P.R(A)P.R(A)P.R(C)P}$$$
86
+ RNA1{R(A)P.R(C)P.R(G)P.R(U)P.R(C)P}$$$$
87
+ RNA1{R(C)P.R(A)P.R(G)P.R(U)P.R(G)P.R(U)P}$$$$
88
+ RNA1{R(U)P.R(U)P.R(C)P.R(A)P.R(A)P.R(C)P}$$$$
89
89
  `,
90
90
  fastaGaps: `seq
91
91
  FW-PH-EYY
@@ -98,9 +98,9 @@ F/Y/N/R/Q/W/Y/V/
98
98
  F/K/P//Q//S/E/Y/V
99
99
  `,
100
100
  helmGaps: `seq
101
- PEPTIDE1{F.W.*.P.H.*.E.Y.Y}$$$
102
- PEPTIDE1{F.Y.N.R.Q.W.Y.V.*}$$$
103
- PEPTIDE1{F.K.P.*.Q.*.S.E.Y.V}$$$
101
+ PEPTIDE1{F.W.*.P.H.*.E.Y.Y}$$$$
102
+ PEPTIDE1{F.Y.N.R.Q.W.Y.V.*}$$$$
103
+ PEPTIDE1{F.K.P.*.Q.*.S.E.Y.V}$$$$
104
104
  `,
105
105
 
106
106
  fastaUn: `seq
@@ -114,24 +114,24 @@ meI-hHis-Aca-Cys_SEt-T-dK-Thr_PO3H2-Aca-Tyr_PO3H2
114
114
  Lys_Boc-hHis-Aca-Cys_SEt-T-dK-Thr_PO3H2-Aca-Tyr_PO3H2
115
115
  `,
116
116
  helmUn: `seq
117
- PEPTIDE1{meI.hHis.Aca.N.T.dE.Thr_PO3H2.Aca.D}$$$
118
- PEPTIDE1{meI.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.Aca.Tyr_PO3H2}$$$
119
- PEPTIDE1{Lys_Boc.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.Aca.Tyr_PO3H2}$$$
117
+ PEPTIDE1{meI.hHis.Aca.N.T.dE.Thr_PO3H2.Aca.D}$$$$
118
+ PEPTIDE1{meI.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.Aca.Tyr_PO3H2}$$$$
119
+ PEPTIDE1{Lys_Boc.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.Aca.Tyr_PO3H2}$$$$
120
120
  `,
121
121
  helmLoneDeoxyribose: `seq
122
- DNA1{D(A).D(C).D(G).D(T).D(C)}$$$
123
- DNA1{D(C).D(A).D(G).D(T).D(G).D(T)P}$$$
124
- DNA1{D(T).D(T).D(C).D(A).D(A).D(C)P}$$$
122
+ DNA1{D(A).D(C).D(G).D(T).D(C)}$$$$
123
+ DNA1{D(C).D(A).D(G).D(T).D(G).D(T)P}$$$$
124
+ DNA1{D(T).D(T).D(C).D(A).D(A).D(C)P}$$$$
125
125
  `,
126
126
  helmLoneRibose: `seq
127
- RNA1{R(A).R(C).R(G).R(U).R(C)}$$$
128
- RNA1{R(C).R(A).R(G).R(U).R(G).R(U)P}$$$
129
- RNA1{R(U).R(U).R(C).R(A).R(A).R(C)P}$$$
127
+ RNA1{R(A).R(C).R(G).R(U).R(C)}$$$$
128
+ RNA1{R(C).R(A).R(G).R(U).R(G).R(U)P}$$$$
129
+ RNA1{R(U).R(U).R(C).R(A).R(A).R(C)P}$$$$
130
130
  `,
131
131
  helmLonePhosphorus: `seq
132
- RNA1{P.P.R(A)P.R(C)P.R(G)P.R(U)P.R(C)P}$$$
133
- RNA1{P.P.R(C)P.R(A)P.P.R(G)P.R(U)P.R(G)P.R(U)P}$$$
134
- RNA1{P.R(U)P.R(U)P.R(C)P.R(A)P.R(A)P.R(C)P.P.P}$$$
132
+ RNA1{P.P.R(A)P.R(C)P.R(G)P.R(U)P.R(C)P}$$$$
133
+ RNA1{P.P.R(C)P.R(A)P.P.R(G)P.R(U)P.R(G)P.R(U)P}$$$$
134
+ RNA1{P.R(U)P.R(U)P.R(C)P.R(A)P.R(A)P.R(C)P.P.P}$$$$
135
135
  `,
136
136
  };
137
137
 
@@ -0,0 +1,187 @@
1
+ /* Do not change these import lines to match external modules in webpack configuration */
2
+ import * as grok from 'datagrok-api/grok';
3
+ import * as ui from 'datagrok-api/ui';
4
+ import * as DG from 'datagrok-api/dg';
5
+
6
+ import {before, after, category, test, expectArray} from '@datagrok-libraries/utils/src/test';
7
+
8
+ import {getMonomerLibHelper, toAtomicLevel} from '../package';
9
+ import {_toAtomicLevel} from '@datagrok-libraries/bio/src/monomer-works/to-atomic-level';
10
+ import {IMonomerLib} from '@datagrok-libraries/bio/src/types/index';
11
+ import {IMonomerLibHelper} from '@datagrok-libraries/bio/src/monomer-works/monomer-utils';
12
+ import {LIB_STORAGE_NAME} from '../utils/monomer-lib';
13
+
14
+ const appPath = 'System:AppData/Bio';
15
+ const fileSource = new DG.FileSource(appPath);
16
+
17
+ const testNames: { [k: string]: string } = {
18
+ PT: 'peptides fasta',
19
+ DNA: 'dna fasta',
20
+ MSA: 'msa separator',
21
+ };
22
+
23
+ const inputPath: { [k: string]: string } = {
24
+ PT: 'tests/to-atomic-level-peptides-fasta-input.csv',
25
+ DNA: 'tests/to-atomic-level-dna-fasta-input.csv',
26
+ MSA: 'tests/to-atomic-level-msa-separator-input.csv',
27
+ };
28
+
29
+ const outputPath: { [k: string]: string } = {
30
+ PT: 'tests/to-atomic-level-peptides-output.csv',
31
+ DNA: 'tests/to-atomic-level-dna-output.csv',
32
+ MSA: 'tests/to-atomic-level-msa-output.csv',
33
+ };
34
+
35
+ const inputColName = 'sequence';
36
+ const outputColName = 'molfile(sequence)';
37
+
38
+ category('toAtomicLevel', async () => {
39
+ const sourceDf: { [key: string]: DG.DataFrame } = {};
40
+ const targetDf: { [key: string]: DG.DataFrame } = {};
41
+
42
+ let monomerLibHelper: IMonomerLibHelper;
43
+ /** Backup actual user's monomer libraries settings */
44
+ let userLibrariesSettings: any = null;
45
+
46
+ before(async () => {
47
+ monomerLibHelper = await getMonomerLibHelper();
48
+ userLibrariesSettings = await grok.dapi.userDataStorage.get(LIB_STORAGE_NAME, true);
49
+ // Clear settings to test default
50
+ await grok.dapi.userDataStorage.put(LIB_STORAGE_NAME, {}, true);
51
+ await monomerLibHelper.loadLibraries(true);
52
+
53
+ for (const key in testNames) {
54
+ sourceDf[key] = await fileSource.readCsv(inputPath[key]);
55
+ await grok.data.detectSemanticTypes(sourceDf[key]);
56
+ targetDf[key] = await fileSource.readCsv(outputPath[key]);
57
+ }
58
+ });
59
+
60
+ after(async () => {
61
+ await grok.dapi.userDataStorage.put(LIB_STORAGE_NAME, userLibrariesSettings, true);
62
+ await monomerLibHelper.loadLibraries(true);
63
+ });
64
+
65
+ async function getTestResult(source: DG.DataFrame, target: DG.DataFrame): Promise<void> {
66
+ const inputCol = source.getCol(inputColName);
67
+ await toAtomicLevel(source, inputCol);
68
+ const obtainedCol = source.getCol(outputColName);
69
+ const expectedCol = target.getCol(outputColName);
70
+ const obtainedArray = [...obtainedCol.values()];
71
+ const expectedArray = [...expectedCol.values()];
72
+ expectArray(obtainedArray, expectedArray);
73
+ }
74
+
75
+ for (const key in testNames) {
76
+ test(`${testNames[key]}`, async () => {
77
+ await getTestResult(sourceDf[key], targetDf[key]);
78
+ }, {skipReason: 'GROK-13100'});
79
+ }
80
+
81
+ enum csvTests {
82
+ fastaDna = 'fastaDna',
83
+ fastaRna = 'fastaRna',
84
+ fastaPt = 'fastaPt',
85
+
86
+ separatorDna = 'separatorDna',
87
+ separatorRna = 'separatorRna',
88
+ separatorPt = 'separatorPt',
89
+ separatorUn = 'separatorUn',
90
+
91
+ helm = 'helm',
92
+ }
93
+
94
+ const csvData: { [key in csvTests]: string } = {
95
+ [csvTests.fastaDna]: `seq
96
+ ACGTC
97
+ CAGTGT
98
+ TTCAAC
99
+ `,
100
+ [csvTests.fastaRna]: `seq
101
+ ACGUC
102
+ CAGUGU
103
+ UUCAAC
104
+ `,
105
+ [csvTests.fastaPt]: `seq
106
+ FWPHEY
107
+ YNRQWYV
108
+ MKPSEYV
109
+ `,
110
+ [csvTests.separatorDna]: `seq
111
+ A/C/G/T/C
112
+ C/A/G/T/G/T
113
+ T/T/C/A/A/C
114
+ `,
115
+ [csvTests.separatorRna]: `seq
116
+ A*C*G*U*C
117
+ C*A*G*U*G*U
118
+ U*U*C*A*A*C
119
+ `,
120
+ [csvTests.separatorPt]: `seq
121
+ F-W-P-H-E-Y
122
+ Y-N-R-Q-W-Y-V
123
+ M-K-P-S-E-Y-V
124
+ `,
125
+ [csvTests.separatorUn]: `seq
126
+ meI-hHis-Aca-N-T-dE-Thr_PO3H2-Aca-D
127
+ meI-hHis-Aca-Cys_SEt-T-dK-Thr_PO3H2-Aca-Tyr_PO3H2
128
+ Lys_Boc-hHis-Aca-Cys_SEt-T-dK-Thr_PO3H2-Aca-Tyr_PO3H2
129
+ `,
130
+
131
+ [csvTests.helm]: `seq
132
+ PEPTIDE1{meI.D-gGlu.Aca.N.T.dE.Thr_PO3H2.Aca.D}$$$
133
+ PEPTIDE1{meI.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.Aca.Tyr_PO3H2}$$$
134
+ PEPTIDE1{Lys_Boc.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.Aca.Tyr_PO3H2}$$$
135
+ `,
136
+ };
137
+
138
+ /** Also detects semantic types
139
+ * @param {string} key
140
+ * @return {Promise<DG.DataFrame>}
141
+ */
142
+ async function readCsv(key: csvTests): Promise<DG.DataFrame> {
143
+ // Always recreate test data frame from CSV for reproducible detector behavior in tests.
144
+ const csv: string = csvData[key];
145
+ const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
146
+ await grok.data.detectSemanticTypes(df);
147
+ return df;
148
+ }
149
+
150
+ test('fastaDna', async () => {
151
+ await _testToAtomicLevel(await readCsv(csvTests.fastaDna), 'seq', monomerLibHelper);
152
+ });
153
+
154
+ test('fastaRna', async () => {
155
+ await _testToAtomicLevel(await readCsv(csvTests.fastaRna), 'seq', monomerLibHelper);
156
+ });
157
+
158
+ test('fastaPt', async () => {
159
+ await _testToAtomicLevel(await readCsv(csvTests.fastaPt), 'seq', monomerLibHelper);
160
+ });
161
+
162
+ test('separatorDna', async () => {
163
+ await _testToAtomicLevel(await readCsv(csvTests.separatorDna), 'seq', monomerLibHelper);
164
+ });
165
+
166
+ test('separatorDna', async () => {
167
+ await _testToAtomicLevel(await readCsv(csvTests.separatorRna), 'seq', monomerLibHelper);
168
+ });
169
+
170
+ test('separatorPt', async () => {
171
+ await _testToAtomicLevel(await readCsv(csvTests.separatorPt), 'seq', monomerLibHelper);
172
+ });
173
+
174
+ test('separatorUn', async () => {
175
+ await _testToAtomicLevel(await readCsv(csvTests.separatorUn), 'seq', monomerLibHelper);
176
+ });
177
+
178
+ test('helm', async () => {
179
+ await _testToAtomicLevel(await readCsv(csvTests.helm), 'seq', monomerLibHelper);
180
+ });
181
+ });
182
+
183
+ async function _testToAtomicLevel(df: DG.DataFrame, seqColName: string = 'seq', monomerLibHelper: IMonomerLibHelper) {
184
+ const seqCol: DG.Column<string> = df.getCol(seqColName);
185
+ const monomerLib: IMonomerLib = monomerLibHelper.getBioLib();
186
+ const resCol = await _toAtomicLevel(df, seqCol, monomerLib);
187
+ }