@datagrok/bio 2.4.31 → 2.4.40

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (100) hide show
  1. package/.eslintrc.json +6 -8
  2. package/README.md +22 -7
  3. package/css/msa.css +3 -0
  4. package/detectors.js +21 -12
  5. package/dist/1.js +2 -0
  6. package/dist/1.js.map +1 -0
  7. package/dist/18.js +2 -0
  8. package/dist/18.js.map +1 -0
  9. package/dist/190.js +2 -0
  10. package/dist/190.js.map +1 -0
  11. package/dist/452.js +2 -0
  12. package/dist/452.js.map +1 -0
  13. package/dist/729.js +2 -0
  14. package/dist/729.js.map +1 -0
  15. package/dist/package-test.js +1 -1
  16. package/dist/package-test.js.map +1 -1
  17. package/dist/package.js +1 -1
  18. package/dist/package.js.map +1 -1
  19. package/files/libraries/broken-lib.sdf +136 -0
  20. package/files/libraries/group1/mock-lib-3.json +74 -0
  21. package/files/libraries/mock-lib-2.json +48 -0
  22. package/files/tests/100_3_clustests.csv +100 -0
  23. package/files/tests/100_3_clustests_empty_vals.csv +100 -0
  24. package/files/tests/peptides_motif-with-random_10000.csv +9998 -0
  25. package/package.json +4 -4
  26. package/scripts/sequence_generator.py +164 -48
  27. package/src/analysis/sequence-activity-cliffs.ts +7 -9
  28. package/src/analysis/sequence-diversity-viewer.ts +8 -3
  29. package/src/analysis/sequence-search-base-viewer.ts +4 -3
  30. package/src/analysis/sequence-similarity-viewer.ts +13 -7
  31. package/src/analysis/sequence-space.ts +15 -12
  32. package/src/analysis/workers/mm-distance-array-service.ts +48 -0
  33. package/src/analysis/workers/mm-distance-array-worker.ts +29 -0
  34. package/src/analysis/workers/mm-distance-worker-creator.ts +6 -9
  35. package/src/apps/web-logo-app.ts +34 -0
  36. package/src/calculations/monomerLevelMols.ts +10 -12
  37. package/src/demo/bio01-similarity-diversity.ts +4 -5
  38. package/src/demo/bio01a-hierarchical-clustering-and-sequence-space.ts +6 -7
  39. package/src/demo/bio01b-hierarchical-clustering-and-activity-cliffs.ts +7 -8
  40. package/src/demo/bio03-atomic-level.ts +1 -4
  41. package/src/demo/bio05-helm-msa-sequence-space.ts +6 -4
  42. package/src/demo/utils.ts +3 -4
  43. package/src/package-test.ts +1 -2
  44. package/src/package.ts +135 -82
  45. package/src/seq_align.ts +482 -483
  46. package/src/substructure-search/substructure-search.ts +3 -3
  47. package/src/tests/Palettes-test.ts +1 -1
  48. package/src/tests/WebLogo-positions-test.ts +12 -35
  49. package/src/tests/_first-tests.ts +1 -1
  50. package/src/tests/activity-cliffs-tests.ts +10 -7
  51. package/src/tests/activity-cliffs-utils.ts +6 -5
  52. package/src/tests/bio-tests.ts +20 -25
  53. package/src/tests/checkInputColumn-tests.ts +5 -11
  54. package/src/tests/converters-test.ts +19 -37
  55. package/src/tests/detectors-benchmark-tests.ts +35 -37
  56. package/src/tests/detectors-tests.ts +29 -34
  57. package/src/tests/detectors-weak-and-likely-tests.ts +11 -21
  58. package/src/tests/fasta-export-tests.ts +3 -3
  59. package/src/tests/fasta-handler-test.ts +2 -3
  60. package/src/tests/lib-tests.ts +2 -4
  61. package/src/tests/mm-distance-tests.ts +25 -17
  62. package/src/tests/monomer-libraries-tests.ts +1 -1
  63. package/src/tests/msa-tests.ts +12 -9
  64. package/src/tests/pepsea-tests.ts +6 -3
  65. package/src/tests/renderers-test.ts +13 -11
  66. package/src/tests/sequence-space-test.ts +10 -8
  67. package/src/tests/sequence-space-utils.ts +6 -4
  68. package/src/tests/similarity-diversity-tests.ts +47 -61
  69. package/src/tests/splitters-test.ts +14 -20
  70. package/src/tests/to-atomic-level-tests.ts +9 -17
  71. package/src/tests/units-handler-splitted-tests.ts +106 -0
  72. package/src/tests/units-handler-tests.ts +22 -26
  73. package/src/tests/utils/sequences-generators.ts +6 -2
  74. package/src/tests/utils.ts +10 -4
  75. package/src/tests/viewers.ts +1 -1
  76. package/src/utils/atomic-works.ts +49 -57
  77. package/src/utils/cell-renderer.ts +25 -8
  78. package/src/utils/check-input-column.ts +19 -4
  79. package/src/utils/constants.ts +3 -3
  80. package/src/utils/convert.ts +56 -23
  81. package/src/utils/monomer-lib.ts +83 -64
  82. package/src/utils/multiple-sequence-alignment-ui.ts +35 -21
  83. package/src/utils/multiple-sequence-alignment.ts +2 -2
  84. package/src/utils/pepsea.ts +17 -7
  85. package/src/utils/save-as-fasta.ts +11 -4
  86. package/src/utils/ui-utils.ts +1 -1
  87. package/src/viewers/vd-regions-viewer.ts +21 -22
  88. package/src/viewers/web-logo-viewer.ts +189 -154
  89. package/src/widgets/bio-substructure-filter.ts +9 -6
  90. package/src/widgets/representations.ts +11 -12
  91. package/tsconfig.json +1 -1
  92. package/dist/258.js +0 -2
  93. package/dist/258.js.map +0 -1
  94. package/dist/457.js +0 -2
  95. package/dist/457.js.map +0 -1
  96. package/dist/562.js +0 -2
  97. package/dist/562.js.map +0 -1
  98. package/dist/925.js +0 -2
  99. package/dist/925.js.map +0 -1
  100. package/src/analysis/workers/mm-distance-worker.ts +0 -16
@@ -7,7 +7,6 @@ import {importFasta} from '../package';
7
7
  import {convertDo} from '../utils/convert';
8
8
  import * as C from '../utils/constants';
9
9
  import {generateLongSequence, generateManySequences, performanceTest} from './utils/sequences-generators';
10
- import {errorToConsole} from '@datagrok-libraries/utils/src/to-console';
11
10
  import {ALIGNMENT, ALPHABET, NOTATION, TAGS as bioTAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
12
11
  import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
13
12
  import {multipleSequenceAlignmentUI} from '../utils/multiple-sequence-alignment-ui';
@@ -29,35 +28,38 @@ category('renderers', () => {
29
28
 
30
29
  test('long sequence performance ', async () => {
31
30
  performanceTest(generateLongSequence, 'Long sequences');
32
- });
31
+ }, {skipReason: 'GROK-13300'});
33
32
 
34
33
  test('many sequence performance', async () => {
35
34
  performanceTest(generateManySequences, 'Many sequences');
36
- });
35
+ }, {skipReason: 'GROK-13300'});
36
+ test('many sequence performance', async () => {
37
+ performanceTest(generateManySequences, 'Many sequences');
38
+ }, {skipReason: 'GROK-13300'});
37
39
 
38
40
  test('rendererMacromoleculeFasta', async () => {
39
41
  await _rendererMacromoleculeFasta();
40
- });
42
+ }, {skipReason: 'GROK-13300'});
41
43
 
42
44
  test('rendererMacromoleculeSeparator', async () => {
43
45
  await _rendererMacromoleculeSeparator();
44
- });
46
+ }, {skipReason: 'GROK-13300'});
45
47
 
46
48
  test('rendererMacromoleculeDifference', async () => {
47
49
  await _rendererMacromoleculeDifference();
48
- });
50
+ }, {skipReason: 'GROK-13300'});
49
51
 
50
52
  test('afterMsa', async () => {
51
53
  await _testAfterMsa();
52
- });
54
+ }, {skipReason: 'GROK-13300'});
53
55
 
54
56
  test('afterConvert', async () => {
55
57
  await _testAfterConvert();
56
- });
58
+ }, {skipReason: 'GROK-13300'});
57
59
 
58
60
  test('selectRendererBySemType', async () => {
59
61
  await _selectRendererBySemType();
60
- });
62
+ }, {skipReason: 'GROK-13300'});
61
63
 
62
64
  test('setRendererManually', async () => {
63
65
  await _setRendererManually();
@@ -156,7 +158,7 @@ category('renderers', () => {
156
158
  expect(msaSeqCol.getTag(DG.TAGS.CELL_RENDERER), 'sequence');
157
159
 
158
160
  // check newColumn with UnitsHandler constructor
159
- const uh: UnitsHandler = new UnitsHandler(msaSeqCol);
161
+ const _uh: UnitsHandler = UnitsHandler.getOrCreate(msaSeqCol);
160
162
 
161
163
  dfList.push(df);
162
164
  tvList.push(tv);
@@ -184,7 +186,7 @@ category('renderers', () => {
184
186
  expect(resCellRenderer, 'sequence');
185
187
 
186
188
  // check tgtCol with UnitsHandler constructor
187
- const uh: UnitsHandler = new UnitsHandler(tgtCol);
189
+ const _uh: UnitsHandler = UnitsHandler.getOrCreate(tgtCol);
188
190
  }
189
191
 
190
192
  async function _selectRendererBySemType() {
@@ -2,10 +2,10 @@ import * as grok from 'datagrok-api/grok';
2
2
  import * as ui from 'datagrok-api/ui';
3
3
  import * as DG from 'datagrok-api/dg';
4
4
 
5
- import {after, before, category, test, expect, delay} from '@datagrok-libraries/utils/src/test';
5
+ import {category, test} from '@datagrok-libraries/utils/src/test';
6
6
  import {readDataframe} from './utils';
7
7
  import {_testSequenceSpaceReturnsResult} from './sequence-space-utils';
8
- import { DimReductionMethods } from '@datagrok-libraries/ml/src/reduce-dimensionality';
8
+ import {DimReductionMethods} from '@datagrok-libraries/ml/src/reduce-dimensionality';
9
9
 
10
10
  category('sequenceSpace', async () => {
11
11
  let testFastaDf: DG.DataFrame;
@@ -14,18 +14,20 @@ category('sequenceSpace', async () => {
14
14
  let testHelmWithEmptyRowsTableView: DG.TableView;
15
15
 
16
16
  test('sequenceSpaceOpens', async () => {
17
- testFastaDf = await readDataframe('tests/sample_MSA_data.csv');
17
+ testFastaDf = await readDataframe(
18
+ DG.Test.isInBenchmark ? 'test/peptides_motif-with-random_10000.csv' : 'tests/100_3_clustests.csv',
19
+ );
18
20
  testFastaTableView = grok.shell.addTableView(testFastaDf);
19
- await _testSequenceSpaceReturnsResult(testFastaDf, DimReductionMethods.UMAP, 'MSA');
21
+ await _testSequenceSpaceReturnsResult(testFastaDf, DimReductionMethods.UMAP, 'sequence');
20
22
  grok.shell.closeTable(testFastaDf);
21
23
  testFastaTableView.close();
22
- }, {skipReason: 'GROK-12775'});
24
+ });
23
25
 
24
26
  test('sequenceSpaceWithEmptyRows', async () => {
25
- testHelmWithEmptyRows = await readDataframe('tests/sample_MSA_data_empty_vals.csv');
27
+ testHelmWithEmptyRows = await readDataframe('tests/100_3_clustests_empty_vals.csv');
26
28
  testHelmWithEmptyRowsTableView = grok.shell.addTableView(testHelmWithEmptyRows);
27
- await _testSequenceSpaceReturnsResult(testHelmWithEmptyRows, DimReductionMethods.UMAP, 'MSA');
29
+ await _testSequenceSpaceReturnsResult(testHelmWithEmptyRows, DimReductionMethods.UMAP, 'sequence');
28
30
  grok.shell.closeTable(testHelmWithEmptyRows);
29
31
  testHelmWithEmptyRowsTableView.close();
30
- }, {skipReason: 'GROK-12775'});
32
+ });
31
33
  });
@@ -2,10 +2,12 @@ import * as DG from 'datagrok-api/dg';
2
2
  import * as grok from 'datagrok-api/grok';
3
3
  import {expect} from '@datagrok-libraries/utils/src/test';
4
4
  import {sequenceSpaceTopMenu} from '../package';
5
- import { MmDistanceFunctionsNames } from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
6
- import { DimReductionMethods } from '@datagrok-libraries/ml/src/reduce-dimensionality';
5
+ import {MmDistanceFunctionsNames} from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
6
+ import {DimReductionMethods} from '@datagrok-libraries/ml/src/reduce-dimensionality';
7
7
 
8
- export async function _testSequenceSpaceReturnsResult(df: DG.DataFrame, algorithm: DimReductionMethods, colName: string) {
8
+ export async function _testSequenceSpaceReturnsResult(
9
+ df: DG.DataFrame, algorithm: DimReductionMethods, colName: string,
10
+ ) {
9
11
  // await grok.data.detectSemanticTypes(df);
10
12
  const col: DG.Column = df.getCol(colName);
11
13
  const semType: string = await grok.functions.call('Bio:detectMacromolecule', {col: col});
@@ -14,4 +16,4 @@ export async function _testSequenceSpaceReturnsResult(df: DG.DataFrame, algorith
14
16
 
15
17
  const sp = await sequenceSpaceTopMenu(df, df.col(colName)!, algorithm, MmDistanceFunctionsNames.LEVENSHTEIN, true);
16
18
  expect(sp != null, true);
17
- }
19
+ }
@@ -1,75 +1,73 @@
1
- import {after, before, category, test, expect, delay} from '@datagrok-libraries/utils/src/test';
1
+ import {after, before, category, test, expect, awaitCheck} from '@datagrok-libraries/utils/src/test';
2
2
  import * as DG from 'datagrok-api/dg';
3
- import {createTableView, readDataframe} from './utils';
3
+ import {createTableView} from './utils';
4
4
  import * as grok from 'datagrok-api/grok';
5
5
  import {SequenceSimilarityViewer} from '../analysis/sequence-similarity-viewer';
6
6
 
7
- let viewList: DG.ViewBase[];
8
- let dfList: DG.DataFrame[];
9
-
10
-
11
7
  category('similarity/diversity', async () => {
12
-
13
8
  before(async () => {
14
- viewList = [];
15
- dfList = [];
9
+ // grok.shell.closeAll();
16
10
  });
17
11
 
18
12
  after(async () => {
19
- for (const view of viewList) view.close();
20
- for (const df of dfList) grok.shell.closeTable(df);
13
+ grok.shell.closeAll();
21
14
  });
22
15
 
23
-
24
16
  test('similaritySearchViewer', async () => {
25
17
  await _testSimilaritySearchViewer();
26
18
  });
19
+
27
20
  test('diversitySearchViewer', async () => {
28
21
  await _testDiversitySearchViewer();
29
22
  });
30
23
  });
31
24
 
32
25
  async function _testSimilaritySearchViewer() {
33
- const molecules = await createTableView('tests/sample_MSA_data.csv');
34
- const viewer = molecules.addViewer('Sequence Similarity Search');
35
- await delay(100);
36
- const similaritySearchViewer = getSearchViewer(viewer, 'Sequence Similarity Search');
37
- viewList.push(similaritySearchViewer);
38
- viewList.push(molecules);
39
- if (!similaritySearchViewer.molCol)
40
- await waitForCompute(similaritySearchViewer);
41
- expect(similaritySearchViewer.fingerprint, 'Morgan');
42
- expect(similaritySearchViewer.distanceMetric, 'Tanimoto');
43
- expect(similaritySearchViewer.scores!.get(0), DG.FLOAT_NULL);
44
- expect(similaritySearchViewer.idxs!.get(0), 0);
45
- expect(similaritySearchViewer.molCol!.get(0),
46
- 'D-Tyr_Et/Tyr_ab-dehydroMe/dV/E/N/D-Orn/D-aThr//Phe_4Me');
47
- expect(similaritySearchViewer.scores!.get(1), 0.4722222089767456);
48
- expect(similaritySearchViewer.idxs!.get(1), 11);
49
- expect(similaritySearchViewer.molCol!.get(1),
50
- 'meI/hHis//Aca/meM/Tyr_ab-dehydroMe/dV/E/N/D-Orn/D-aThr//Phe_4Me');
51
- const waiter = waitForCompute(similaritySearchViewer); /* subscribe for computeCompleted event before start compute */
52
- molecules.dataFrame.currentRowIdx = 1;
53
- await waiter;
54
- expect(similaritySearchViewer.targetMoleculeIdx, 1);
55
- expect(similaritySearchViewer.molCol!.get(0),
56
- 'meI/hHis/Aca/Cys_SEt/T/dK/Thr_PO3H2/Aca/Tyr_PO3H2/D-Chg/dV/Phe_ab-dehydro/N/D-Orn/D-aThr//Phe_4Me');
57
- }
26
+ try {
27
+ const molecules = await createTableView('tests/sample_MSA_data.csv');
28
+ const viewer = molecules.addViewer('Sequence Similarity Search');
29
+ await awaitCheck(() => getSearchViewer(viewer, 'Sequence Similarity Search') !== undefined,
30
+ 'Sequence Similarity Search has not been created', 5000);
31
+ const similaritySearchViewer: SequenceSimilarityViewer = getSearchViewer(viewer, 'Sequence Similarity Search');
32
+ await awaitCheck(() => similaritySearchViewer.root.getElementsByClassName('d4-grid').length !== 0,
33
+ 'Sequence Similarity Search has not been created', 5000);
34
+ expect(similaritySearchViewer.fingerprint, 'Morgan');
35
+ expect(similaritySearchViewer.distanceMetric, 'Tanimoto');
36
+ expect(similaritySearchViewer.scores!.get(0), DG.FLOAT_NULL);
37
+ expect(similaritySearchViewer.idxs!.get(0), 0);
38
+ expect(similaritySearchViewer.molCol!.get(0),
39
+ 'D-Tyr_Et/Tyr_ab-dehydroMe/dV/E/N/D-Orn/D-aThr//Phe_4Me');
40
+ expect(similaritySearchViewer.scores!.get(1), 0.4722222089767456);
41
+ expect(similaritySearchViewer.idxs!.get(1), 11);
42
+ expect(similaritySearchViewer.molCol!.get(1),
43
+ 'meI/hHis//Aca/meM/Tyr_ab-dehydroMe/dV/E/N/D-Orn/D-aThr//Phe_4Me');
44
+ molecules.dataFrame.currentRowIdx = 1;
45
+ await awaitCheck(() => similaritySearchViewer.targetMoleculeIdx === 1, 'Target molecule has not been changed', 5000);
46
+ await awaitCheck(() => similaritySearchViewer.molCol!.get(0) ===
47
+ 'meI/hHis/Aca/Cys_SEt/T/dK/Thr_PO3H2/Aca/Tyr_PO3H2/D-Chg/dV/Phe_ab-dehydro/N/D-Orn/D-aThr//Phe_4Me',
48
+ 'Incorrect first similar molecule', 5000);
58
49
 
50
+ } finally {
51
+ grok.shell.closeAll();
52
+ }
53
+ }
59
54
 
60
55
  async function _testDiversitySearchViewer() {
61
- const molecules = await createTableView('tests/sample_MSA_data.csv');
62
- const viewer = molecules.addViewer('Sequence Diversity Search');
63
- await delay(10);
64
- const diversitySearchviewer = getSearchViewer(viewer, 'Sequence Diversity Search');
65
- viewList.push(diversitySearchviewer);
66
- viewList.push(molecules);
67
- if (!diversitySearchviewer.renderMolIds)
68
- await waitForCompute(diversitySearchviewer);
69
- expect(diversitySearchviewer.fingerprint, 'Morgan');
70
- expect(diversitySearchviewer.distanceMetric, 'Tanimoto');
71
- expect(diversitySearchviewer.initialized, true);
72
- expect(diversitySearchviewer.renderMolIds.length > 0, true);
56
+ try {
57
+ const molecules = await createTableView('tests/sample_MSA_data.csv');
58
+ const viewer = molecules.addViewer('Sequence Diversity Search');
59
+ await awaitCheck(() => getSearchViewer(viewer, 'Sequence Diversity Search') !== undefined,
60
+ 'Sequence Diversity Search has not been created', 5000);
61
+ const diversitySearchviewer = getSearchViewer(viewer, 'Sequence Diversity Search');
62
+ await awaitCheck(() => diversitySearchviewer.root.getElementsByClassName('d4-grid').length !== 0,
63
+ 'Sequence Diversity Search has not been created', 5000);
64
+ expect(diversitySearchviewer.fingerprint, 'Morgan');
65
+ expect(diversitySearchviewer.distanceMetric, 'Tanimoto');
66
+ expect(diversitySearchviewer.initialized, true);
67
+ expect(diversitySearchviewer.renderMolIds.length > 0, true);
68
+ } finally {
69
+ grok.shell.closeAll();
70
+ }
73
71
  }
74
72
 
75
73
  function getSearchViewer(viewer: DG.Viewer, name: string) {
@@ -80,15 +78,3 @@ function getSearchViewer(viewer: DG.Viewer, name: string) {
80
78
  }
81
79
  }
82
80
 
83
- async function waitForCompute(viewer: SequenceSimilarityViewer) {
84
- const t = new Promise((resolve, reject) => {
85
- viewer.computeCompleted.subscribe(async (_: any) => {
86
- try {
87
- resolve(true);
88
- } catch (error) {
89
- reject(error);
90
- }
91
- });
92
- });
93
- await t;
94
- }
@@ -2,66 +2,58 @@ import * as grok from 'datagrok-api/grok';
2
2
  import * as ui from 'datagrok-api/ui';
3
3
  import * as DG from 'datagrok-api/dg';
4
4
 
5
- import {after, before, category, test, expect, expectArray, expectObject, delay} from '@datagrok-libraries/utils/src/test';
5
+ import {after, before, category, test, expect, expectArray, delay} from '@datagrok-libraries/utils/src/test';
6
6
  import * as C from '../utils/constants';
7
7
  import {_package, getHelmMonomers} from '../package';
8
- import {errorToConsole} from '@datagrok-libraries/utils/src/to-console';
9
8
  import {TAGS as bioTAGS, splitterAsFasta, splitterAsHelm} from '@datagrok-libraries/bio/src/utils/macromolecule';
10
9
 
11
10
 
12
- category('splitters', () => {
13
- let tvList: DG.TableView[];
14
- let dfList: DG.DataFrame[];
15
-
11
+ category('splitters', async () => {
16
12
  before(async () => {
17
- tvList = [];
18
- dfList = [];
19
13
  });
20
14
 
21
15
  after(async () => {
22
- dfList.forEach((df: DG.DataFrame) => { grok.shell.closeTable(df); });
23
- tvList.forEach((tv: DG.TableView) => tv.close());
24
16
  });
25
17
 
26
- const helm1 = 'PEPTIDE1{meI.hHis.Aca.N.T.dE.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.E.N.D-Orn.D-aThr.Phe_4Me}$$$';
18
+ const _helm1 = 'PEPTIDE1{meI.hHis.Aca.N.T.dE.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.E.N.D-Orn.D-aThr.Phe_4Me}$$$';
27
19
 
28
- const helm2 = 'PEPTIDE1{meI.hHis.Hcy.Q.T.W.Q.Phe_4NH2.D-Tyr_Et.Tyr_ab-dehydroMe.dV.E.N.N.meK}$$$';
20
+ const _helm2 = 'PEPTIDE1{meI.hHis.Hcy.Q.T.W.Q.Phe_4NH2.D-Tyr_Et.Tyr_ab-dehydroMe.dV.E.N.N.meK}$$$';
29
21
 
30
22
  const data: { [key: string]: [string, string[]] } = {
31
23
  fastaMulti: [
32
24
  'M[MeI]YKETLL[MeF]PKTDFPMRGGL[MeA]',
33
25
  ['M', 'MeI', 'Y', 'K', 'E', 'T', 'L', 'L', 'MeF', 'P',
34
- 'K', 'T', 'D', 'F', 'P', 'M', 'R', 'G', 'G', 'L', 'MeA']
26
+ 'K', 'T', 'D', 'F', 'P', 'M', 'R', 'G', 'G', 'L', 'MeA'],
35
27
  ],
36
28
  helm1: [
37
29
  'PEPTIDE1{meI.hHis.Aca.N.T.dE.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.E.N.D-Orn.D-aThr.Phe_4Me}$$$',
38
30
  ['meI', 'hHis', 'Aca', 'N', 'T', 'dE', 'Thr_PO3H2', 'Aca', 'D-Tyr_Et',
39
- 'Tyr_ab-dehydroMe', 'dV', 'E', 'N', 'D-Orn', 'D-aThr', 'Phe_4Me']
31
+ 'Tyr_ab-dehydroMe', 'dV', 'E', 'N', 'D-Orn', 'D-aThr', 'Phe_4Me'],
40
32
  ],
41
33
  helm2: [
42
34
  'PEPTIDE1{meI.hHis.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.D-Dap.dV.E.N.pnG.Phe_4Me}$$$',
43
35
  ['meI', 'hHis', 'Aca', 'N', 'T', 'dK', 'Thr_PO3H2', 'Aca',
44
- 'D-Tyr_Et', 'D-Dap', 'dV', 'E', 'N', 'pnG', 'Phe_4Me']
36
+ 'D-Tyr_Et', 'D-Dap', 'dV', 'E', 'N', 'pnG', 'Phe_4Me'],
45
37
  ],
46
38
  // HELM editor dialog returns HELM string with multichar monomer names in square brackets
47
39
  helm3: [
48
40
  'PEPTIDE1{[meI].[hHis].[Aca].N.T.[dK].[Thr_PO3H2].[Aca].[D-Tyr_Et].[D-Dap].[dV].E.N.[pnG].[Phe_4Me]}$$$',
49
41
  ['meI', 'hHis', 'Aca', 'N', 'T', 'dK', 'Thr_PO3H2', 'Aca',
50
- 'D-Tyr_Et', 'D-Dap', 'dV', 'E', 'N', 'pnG', 'Phe_4Me']
42
+ 'D-Tyr_Et', 'D-Dap', 'dV', 'E', 'N', 'pnG', 'Phe_4Me'],
51
43
  ],
52
44
 
53
45
  testHelm1: [
54
46
  'RNA1{R(U)P.R(T)P.R(G)P.R(C)P.R(A)}$$$$',
55
- ['R(U)P', 'R(T)P', 'R(G)P', 'R(C)P', 'R(A)']
47
+ ['R(U)P', 'R(T)P', 'R(G)P', 'R(C)P', 'R(A)'],
56
48
  ],
57
49
 
58
50
  testHelm2: [
59
51
  'RNA1{P.R(U)P.R(T)}$$$$',
60
- ['P', 'R(U)P', 'R(T)']
52
+ ['P', 'R(U)P', 'R(T)'],
61
53
  ],
62
54
  testHelm3: [
63
55
  'RNA1{P.R(U).P.R(T)}$$$$',
64
- ['P', 'R(U)', 'P', 'R(T)']
56
+ ['P', 'R(U)', 'P', 'R(T)'],
65
57
  ],
66
58
  };
67
59
 
@@ -85,7 +77,8 @@ category('splitters', () => {
85
77
  seqCol.semType = semType;
86
78
  seqCol.setTag(bioTAGS.aligned, C.MSA);
87
79
 
88
- const tv: DG.TableView = grok.shell.addTableView(df);
80
+ const _tv: DG.TableView = grok.shell.addTableView(df);
81
+ await delay(500); // needed to account for table adding
89
82
  // call to calculate 'cell.renderer' tag
90
83
  await grok.data.detectSemanticTypes(df);
91
84
 
@@ -99,6 +92,7 @@ category('splitters', () => {
99
92
  PEPTIDE1{hHis.N.T}$$$,5.30751
100
93
  PEPTIDE1{hHis.Aca.Cys_SEt}$$$,5.72388
101
94
  `);
95
+ await grok.data.detectSemanticTypes(df);
102
96
  const expectedMonomerList = ['hHis', 'Aca', 'Cys_SEt', 'N', 'T'];
103
97
 
104
98
  const helmCol: DG.Column = df.getCol('HELM');
@@ -95,44 +95,36 @@ category('toAtomicLevel', async () => {
95
95
  [csvTests.fastaDna]: `seq
96
96
  ACGTC
97
97
  CAGTGT
98
- TTCAAC
99
- `,
98
+ TTCAAC`,
100
99
  [csvTests.fastaRna]: `seq
101
100
  ACGUC
102
101
  CAGUGU
103
- UUCAAC
104
- `,
102
+ UUCAAC`,
105
103
  [csvTests.fastaPt]: `seq
106
104
  FWPHEY
107
105
  YNRQWYV
108
- MKPSEYV
109
- `,
106
+ MKPSEYV`,
110
107
  [csvTests.separatorDna]: `seq
111
108
  A/C/G/T/C
112
109
  C/A/G/T/G/T
113
- T/T/C/A/A/C
114
- `,
110
+ T/T/C/A/A/C`,
115
111
  [csvTests.separatorRna]: `seq
116
112
  A*C*G*U*C
117
113
  C*A*G*U*G*U
118
- U*U*C*A*A*C
119
- `,
114
+ U*U*C*A*A*C`,
120
115
  [csvTests.separatorPt]: `seq
121
116
  F-W-P-H-E-Y
122
117
  Y-N-R-Q-W-Y-V
123
- M-K-P-S-E-Y-V
124
- `,
118
+ M-K-P-S-E-Y-V`,
125
119
  [csvTests.separatorUn]: `seq
126
120
  meI-hHis-Aca-N-T-dE-Thr_PO3H2-Aca-D
127
121
  meI-hHis-Aca-Cys_SEt-T-dK-Thr_PO3H2-Aca-Tyr_PO3H2
128
- Lys_Boc-hHis-Aca-Cys_SEt-T-dK-Thr_PO3H2-Aca-Tyr_PO3H2
129
- `,
122
+ Lys_Boc-hHis-Aca-Cys_SEt-T-dK-Thr_PO3H2-Aca-Tyr_PO3H2`,
130
123
 
131
124
  [csvTests.helm]: `seq
132
125
  PEPTIDE1{meI.D-gGlu.Aca.N.T.dE.Thr_PO3H2.Aca.D}$$$
133
126
  PEPTIDE1{meI.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.Aca.Tyr_PO3H2}$$$
134
- PEPTIDE1{Lys_Boc.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.Aca.Tyr_PO3H2}$$$
135
- `,
127
+ PEPTIDE1{Lys_Boc.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.Aca.Tyr_PO3H2}$$$`,
136
128
  };
137
129
 
138
130
  /** Also detects semantic types
@@ -183,5 +175,5 @@ PEPTIDE1{Lys_Boc.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.Aca.Tyr_PO3H2}$$$
183
175
  async function _testToAtomicLevel(df: DG.DataFrame, seqColName: string = 'seq', monomerLibHelper: IMonomerLibHelper) {
184
176
  const seqCol: DG.Column<string> = df.getCol(seqColName);
185
177
  const monomerLib: IMonomerLib = monomerLibHelper.getBioLib();
186
- const resCol = await _toAtomicLevel(df, seqCol, monomerLib);
178
+ const _resCol = await _toAtomicLevel(df, seqCol, monomerLib);
187
179
  }
@@ -0,0 +1,106 @@
1
+ import * as grok from 'datagrok-api/grok';
2
+ import * as ui from 'datagrok-api/ui';
3
+ import * as DG from 'datagrok-api/dg';
4
+
5
+ import {category, test, expect, expectArray} from '@datagrok-libraries/utils/src/test';
6
+ import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
7
+
8
+ category('UnitsHandler', () => {
9
+ const data: { [testName: string]: { src: { csv: string }, tgt: { splitted: string[][] } } } = {
10
+ fasta: {
11
+ src: {
12
+ csv: `seq
13
+ ACGTC
14
+ CAGTGT
15
+ TTCAAC`
16
+ },
17
+ tgt: {
18
+ splitted: [
19
+ ['A', 'C', 'G', 'T', 'C'],
20
+ ['C', 'A', 'G', 'T', 'G', 'T'],
21
+ ['T', 'T', 'C', 'A', 'A', 'C']
22
+ ]
23
+ }
24
+ },
25
+ fastaMsa: {
26
+ src: {
27
+ csv: `seq
28
+ AC-GT-CT
29
+ CAC-T-GT
30
+ ACCGTACT`,
31
+ },
32
+ tgt: {
33
+ splitted: [
34
+ //@formatter:off
35
+ ['A', 'C', '' , 'G', 'T', '' , 'C', 'T'],
36
+ ['C', 'A', 'C', '' , 'T', '' , 'G', 'T'],
37
+ ['A', 'C', 'C', 'G', 'T', 'A', 'C', 'T'],
38
+ //@formatter:on
39
+ ]
40
+ }
41
+ },
42
+ separator: {
43
+ src: {
44
+ csv: `seq
45
+ abc-dfgg-abc1-cfr3-rty-wert
46
+ rut12-her2-rty-wert-abc-abc1-dfgg
47
+ rut12-rty-her2-abc-cfr3-wert-rut12`,
48
+ },
49
+ tgt: {
50
+ splitted: [
51
+ ['abc', 'dfgg', 'abc1', 'cfr3', 'rty', 'wert'],
52
+ ['rut12', 'her2', 'rty', 'wert', 'abc', 'abc1', 'dfgg'],
53
+ ['rut12', 'rty', 'her2', 'abc', 'cfr3', 'wert', 'rut12']
54
+ ]
55
+ }
56
+ },
57
+
58
+ separatorMsa: {
59
+ src: {
60
+ csv: `seq
61
+ abc-dfgg-abc1-cfr3-rty-wert
62
+ rut12-her2-rty--abc1-dfgg
63
+ rut12-rty-her2---wert`
64
+ },
65
+ tgt: {
66
+ splitted: [
67
+ ['abc', 'dfgg', 'abc1', 'cfr3', 'rty', 'wert'],
68
+ ['rut12', 'her2', 'rty', '', 'abc1', 'dfgg'],
69
+ ['rut12', 'rty', 'her2', '', '', 'wert'],
70
+ ]
71
+ }
72
+ },
73
+ helm: {
74
+ src: {
75
+ csv: `seq
76
+ PEPTIDE1{meI.hHis.Aca.N.T.dE.Thr_PO3H2.Aca.D-Tyr_Et}$$$$
77
+ PEPTIDE1{meI.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.Aca}$$$$
78
+ PEPTIDE1{Lys_Boc.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.Aca}$$$$
79
+ PEPTIDE1{meI.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2}$$$$`
80
+ },
81
+ tgt: {
82
+ splitted: [
83
+ ['meI', 'hHis', 'Aca', 'N', 'T', 'dE', 'Thr_PO3H2', 'Aca', 'D-Tyr_Et'],
84
+ ['meI', 'hHis', 'Aca', 'Cys_SEt', 'T', 'dK', 'Thr_PO3H2', 'Aca'],
85
+ ['Lys_Boc', 'hHis', 'Aca', 'Cys_SEt', 'T', 'dK', 'Thr_PO3H2', 'Aca'],
86
+ ['meI', 'hHis', 'Aca', 'Cys_SEt', 'T', 'dK', 'Thr_PO3H2'],
87
+ ]
88
+ }
89
+ }
90
+ };
91
+
92
+ for (const [testName, testData] of Object.entries(data)) {
93
+ test(`splitted-${testName}`, async () => {
94
+ const df: DG.DataFrame = DG.DataFrame.fromCsv(testData.src.csv);
95
+ const col: DG.Column = df.getCol('seq');
96
+
97
+ const semType = await grok.functions.call('Bio:detectMacromolecule', {col: col});
98
+ if (semType) col.semType = semType;
99
+ expect(col.semType, DG.SEMTYPE.MACROMOLECULE);
100
+
101
+ const uh = UnitsHandler.getOrCreate(col);
102
+ const splitted: string[][] = uh.splitted;
103
+ expectArray(splitted, testData.tgt.splitted);
104
+ });
105
+ }
106
+ });
@@ -2,75 +2,71 @@ import * as grok from 'datagrok-api/grok';
2
2
  import * as ui from 'datagrok-api/ui';
3
3
  import * as DG from 'datagrok-api/dg';
4
4
 
5
- import {after, before, category, test, expect, expectObject} from '@datagrok-libraries/utils/src/test';
5
+ import {category, test, expect} from '@datagrok-libraries/utils/src/test';
6
6
  import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
7
7
  import {ALPHABET, NOTATION, TAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
8
8
 
9
9
  const seqDna = `seq
10
10
  ACGTC
11
11
  CAGTGT
12
- TTCAAC
13
- `;
12
+ TTCAAC`;
14
13
 
15
14
  const seqDnaMsa = `seq
16
15
  AC-GT-CT
17
16
  CAC-T-GT
18
- ACCGTACT
19
- `;
17
+ ACCGTACT`;
20
18
 
21
19
  const seqUn = `seq
22
20
  abc-dfgg-abc1-cfr3-rty-wert
23
21
  rut12-her2-rty-wert-abc-abc1-dfgg
24
- rut12-rty-her2-abc-cfr3-wert-rut12
25
- `;
22
+ rut12-rty-her2-abc-cfr3-wert-rut12`;
26
23
 
27
24
  const seqHelm = `seq
28
25
  PEPTIDE1{meI.hHis.Aca.N.T.dE.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.E.N.D-Orn.D-aThr.Phe_4Me}$$$$
29
26
  PEPTIDE1{meI.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.Aca.Tyr_PO3H2.D-Chg.dV.Phe_ab-dehydro.N.D-Orn.D-aThr.Phe_4Me}$$$$
30
27
  PEPTIDE1{Lys_Boc.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.Aca.Tyr_PO3H2.D-Chg.dV.Thr_PO3H2.N.D-Orn.D-aThr.Phe_4Me}$$$$
31
- PEPTIDE1{meI.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.Aca.Tyr_PO3H2.D-Chg.dV.Thr_PO3H2.N.D-Orn.D-aThr.Phe_4Me}$$$$
32
- `;
28
+ PEPTIDE1{meI.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.Aca.Tyr_PO3H2.D-Chg.dV.Thr_PO3H2.N.D-Orn.D-aThr.Phe_4Me}$$$$`;
33
29
 
34
- category('UnitsHandler', () =>{
35
- test('Seq-Fasta', async () =>{
36
- const [df, uh] = await loadCsvWithDetection(seqDna);
30
+ category('UnitsHandler', () => {
31
+ test('Seq-Fasta', async () => {
32
+ const [_df, uh] = await loadCsvWithDetection(seqDna);
37
33
  expect(uh.notation, NOTATION.FASTA);
38
34
  expect(uh.isMsa(), false);
39
35
  });
40
36
 
41
- test('Seq-Fasta-MSA', async () =>{
42
- const [df, uh] = await loadCsvWithDetection(seqDnaMsa);
37
+ test('Seq-Fasta-MSA', async () => {
38
+ const [_df, uh] = await loadCsvWithDetection(seqDnaMsa);
43
39
  expect(uh.notation, NOTATION.FASTA);
44
40
  expect(uh.isMsa(), true);
45
41
  });
46
42
 
47
- test('Seq-Fasta-units', async () =>{
48
- const [df, uh] = await loadCsvWithTag(seqDna, DG.TAGS.UNITS, NOTATION.FASTA);
43
+ test('Seq-Fasta-units', async () => {
44
+ const [_df, uh] = await loadCsvWithTag(seqDna, DG.TAGS.UNITS, NOTATION.FASTA);
49
45
  expect(uh.notation, NOTATION.FASTA);
50
46
  expect(uh.isMsa(), false);
51
47
  });
52
48
 
53
- test('Seq-Fasta-MSA-units', async () =>{
54
- const [df, uh] = await loadCsvWithTag(seqDnaMsa, DG.TAGS.UNITS, NOTATION.FASTA);
49
+ test('Seq-Fasta-MSA-units', async () => {
50
+ const [_df, uh] = await loadCsvWithTag(seqDnaMsa, DG.TAGS.UNITS, NOTATION.FASTA);
55
51
  expect(uh.notation, NOTATION.FASTA);
56
52
  expect(uh.isMsa(), true);
57
53
  });
58
54
 
59
- test('Seq-Helm', async () =>{
60
- const [df, uh] = await loadCsvWithTag(seqHelm, DG.TAGS.UNITS, NOTATION.HELM);
55
+ test('Seq-Helm', async () => {
56
+ const [_df, uh] = await loadCsvWithTag(seqHelm, DG.TAGS.UNITS, NOTATION.HELM);
61
57
  expect(uh.notation, NOTATION.HELM);
62
58
  expect(uh.isHelm(), true);
63
59
  });
64
60
 
65
- test('Seq-UN', async () =>{
66
- const [df, uh] = await loadCsvWithTag(seqUn, DG.TAGS.UNITS, NOTATION.SEPARATOR);
61
+ test('Seq-UN', async () => {
62
+ const [_df, uh] = await loadCsvWithTag(seqUn, DG.TAGS.UNITS, NOTATION.SEPARATOR);
67
63
  expect(uh.notation, NOTATION.SEPARATOR);
68
64
  expect(uh.separator, '-');
69
65
  expect(uh.alphabet, ALPHABET.UN);
70
66
  });
71
67
 
72
- test('Seq-UN-auto', async () =>{
73
- const [df, uh] = await loadCsvWithDetection(seqUn);
68
+ test('Seq-UN-auto', async () => {
69
+ const [_df, uh] = await loadCsvWithDetection(seqUn);
74
70
  expect(uh.notation, NOTATION.SEPARATOR);
75
71
  expect(uh.separator, '-');
76
72
  expect(uh.alphabet, ALPHABET.UN);
@@ -79,7 +75,7 @@ category('UnitsHandler', () =>{
79
75
  async function loadCsvWithDetection(csv: string): Promise<[df: DG.DataFrame, uh: UnitsHandler]> {
80
76
  const df = DG.DataFrame.fromCsv(csv);
81
77
  await grok.data.detectSemanticTypes(df);
82
- const uh = new UnitsHandler(df.getCol('seq'));
78
+ const uh = UnitsHandler.getOrCreate(df.getCol('seq'));
83
79
  return [df, uh];
84
80
  }
85
81
 
@@ -91,7 +87,7 @@ category('UnitsHandler', () =>{
91
87
  col.semType = DG.SEMTYPE.MACROMOLECULE;
92
88
  if (value === NOTATION.SEPARATOR)
93
89
  col.setTag(TAGS.separator, '-');
94
- const uh = new UnitsHandler(df.getCol('seq'));
90
+ const uh = UnitsHandler.getOrCreate(df.getCol('seq'));
95
91
  return [df, uh];
96
92
  }
97
93
  });