@datagrok/bio 2.11.5 → 2.11.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -5,7 +5,7 @@
5
5
  "name": "Leonid Stolbov",
6
6
  "email": "lstolbov@datagrok.ai"
7
7
  },
8
- "version": "2.11.5",
8
+ "version": "2.11.7",
9
9
  "description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
10
10
  "repository": {
11
11
  "type": "git",
@@ -34,7 +34,7 @@
34
34
  ],
35
35
  "dependencies": {
36
36
  "@biowasm/aioli": "^3.1.0",
37
- "@datagrok-libraries/bio": "^5.39.1",
37
+ "@datagrok-libraries/bio": "^5.39.2",
38
38
  "@datagrok-libraries/chem-meta": "^1.0.1",
39
39
  "@datagrok-libraries/ml": "^6.3.53",
40
40
  "@datagrok-libraries/tutorials": "^1.3.6",
package/src/package.ts CHANGED
@@ -75,6 +75,7 @@ import {GetRegionApp} from './apps/get-region-app';
75
75
  import {GetRegionFuncEditor} from './utils/get-region-func-editor';
76
76
  import {sequenceToMolfile} from './utils/sequence-to-mol';
77
77
  import {errInfo} from './utils/err-info';
78
+ import {detectMacromoleculeProbeDo} from './utils/detect-macromolecule-probe';
78
79
 
79
80
  import {SHOW_SCATTERPLOT_PROGRESS} from '@datagrok-libraries/ml/src/functionEditors/seq-space-base-editor';
80
81
  import {DIMENSIONALITY_REDUCER_TERMINATE_EVENT}
@@ -427,7 +428,6 @@ export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column<
427
428
  };
428
429
  }
429
430
 
430
-
431
431
  const runCliffs = async () => {
432
432
  const sp = await getActivityCliffs(
433
433
  df,
@@ -457,13 +457,13 @@ export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column<
457
457
  return;
458
458
  }
459
459
 
460
+ const pi = DG.TaskBarProgressIndicator.create(`Running sequence activity cliffs ...`);
460
461
  return new Promise<DG.Viewer | undefined>((resolve, reject) => {
461
462
  if (df.rowCount > fastRowCount && !options?.[BYPASS_LARGE_DATA_WARNING]) {
462
463
  ui.dialog().add(ui.divText(`Activity cliffs analysis might take several minutes.
463
464
  Do you want to continue?`))
464
465
  .onOK(async () => {
465
- //const progressBar = DG.TaskBarProgressIndicator.create(`Running sequence activity cliffs ...`);
466
- runCliffs().then((res) => resolve(res)).catch((err) => reject(err)).finally(() => {});
466
+ runCliffs().then((res) => resolve(res)).catch((err) => reject(err));
467
467
  })
468
468
  .onCancel(() => { resolve(undefined); })
469
469
  .show();
@@ -474,7 +474,7 @@ export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column<
474
474
  const [errMsg, errStack] = errInfo(err);
475
475
  _package.logger.error(errMsg, undefined, errStack);
476
476
  throw err;
477
- });
477
+ }).finally(() => { pi.close(); });
478
478
  }
479
479
 
480
480
  //top-menu: Bio | Analyze | Sequence Space...
@@ -526,6 +526,7 @@ export async function sequenceSpaceTopMenu(
526
526
  const progress = (_nEpoch / epochsLength * 100);
527
527
  pg.update(progress, `Running sequence space ... ${progress.toFixed(0)}%`);
528
528
  }
529
+
529
530
  const embedColsNames = getEmbeddingColsNames(table);
530
531
  const withoutEmptyValues = DG.DataFrame.fromColumns([macroMolecule]).clone();
531
532
  const emptyValsIdxs = removeEmptyStringRows(withoutEmptyValues, macroMolecule);
@@ -1109,3 +1110,14 @@ export async function sdfToJsonLib(table: DG.DataFrame) {
1109
1110
  const jsonMonomerLibrary = JSON.stringify(_jsonMonomerLibrary);
1110
1111
  DG.Utils.download(`${table.name}.json`, jsonMonomerLibrary);
1111
1112
  }
1113
+
1114
+ // -- Utils --
1115
+
1116
+ //name: detectMacromoleculeProbe
1117
+ //input: file file
1118
+ //input: string colName = ''
1119
+ //input: int probeCount = 100
1120
+ export async function detectMacromoleculeProbe(file: DG.FileInfo, colName: string, probeCount: number): Promise<void> {
1121
+ const csv: string = await file.readAsString();
1122
+ await detectMacromoleculeProbeDo(csv, colName, probeCount);
1123
+ }
@@ -41,79 +41,79 @@ category('converters', () => {
41
41
  }
42
42
 
43
43
  const _csvTxts: { [key: string]: string } = {
44
- fastaPt: `seq
45
- FWPHEY
46
- YNRQWYV
47
- MKPSEYV`,
48
- separatorPt: `seq
49
- F-W-P-H-E-Y
50
- Y-N-R-Q-W-Y-V
51
- M-K-P-S-E-Y-V`,
52
- helmPt: `seq
53
- PEPTIDE1{F.W.P.H.E.Y}$$$$
54
- PEPTIDE1{Y.N.R.Q.W.Y.V}$$$$
55
- PEPTIDE1{M.K.P.S.E.Y.V}$$$$`,
56
- fastaDna: `seq
57
- ACGTC
58
- CAGTGT
59
- TTCAAC`,
60
- separatorDna: `seq
61
- A/C/G/T/C
62
- C/A/G/T/G/T
63
- T/T/C/A/A/C`,
64
- helmDna: `seq
65
- RNA1{d(A)p.d(C)p.d(G)p.d(T)p.d(C)p}$$$$
66
- RNA1{d(C)p.d(A)p.d(G)p.d(T)p.d(G)p.d(T)p}$$$$
67
- RNA1{d(T)p.d(T)p.d(C)p.d(A)p.d(A)p.d(C)p}$$$$`,
68
- fastaRna: `seq
69
- ACGUC
70
- CAGUGU
71
- UUCAAC`,
72
- separatorRna: `seq
73
- A*C*G*U*C
74
- C*A*G*U*G*U
75
- U*U*C*A*A*C`,
76
- helmRna: `seq
77
- RNA1{r(A)p.r(C)p.r(G)p.r(U)p.r(C)p}$$$$
78
- RNA1{r(C)p.r(A)p.r(G)p.r(U)p.r(G)p.r(U)p}$$$$
79
- RNA1{r(U)p.r(U)p.r(C)p.r(A)p.r(A)p.r(C)p}$$$$`,
80
- fastaGaps: `seq
81
- FW-PH-EYY
82
- FYNRQWYV-
83
- FKP-Q-SEYV`,
84
- separatorGaps: `seq
85
- F/W//P/H//E/Y/Y
86
- F/Y/N/R/Q/W/Y/V/
87
- F/K/P//Q//S/E/Y/V`,
88
- helmGaps: `seq
89
- PEPTIDE1{F.W.*.P.H.*.E.Y.Y}$$$$
90
- PEPTIDE1{F.Y.N.R.Q.W.Y.V.*}$$$$
91
- PEPTIDE1{F.K.P.*.Q.*.S.E.Y.V}$$$$`,
44
+ [Samples.fastaPt]: `seq
45
+ FWPHEYFWPHEY
46
+ YNRQWYVYNRQWYV
47
+ MKPSEYVMKPSEYV`,
48
+ [Samples.separatorPt]: `seq
49
+ F-W-P-H-E-Y-F-W-P-H-E-Y
50
+ Y-N-R-Q-W-Y-V-Y-N-R-Q-W-Y-V
51
+ M-K-P-S-E-Y-V-M-K-P-S-E-Y-V`,
52
+ [Samples.helmPt]: `seq
53
+ PEPTIDE1{F.W.P.H.E.Y.F.W.P.H.E.Y}$$$$
54
+ PEPTIDE1{Y.N.R.Q.W.Y.V.Y.N.R.Q.W.Y.V}$$$$
55
+ PEPTIDE1{M.K.P.S.E.Y.V.M.K.P.S.E.Y.V}$$$$`,
56
+ [Samples.fastaDna]: `seq
57
+ ACGTCACGTC
58
+ CAGTGTCAGTGT
59
+ TTCAACTTCAAC`,
60
+ [Samples.separatorDna]: `seq
61
+ A/C/G/T/C/A/C/G/T/C
62
+ C/A/G/T/G/T/C/A/G/T/G/T
63
+ T/T/C/A/A/C/T/T/C/A/A/C`,
64
+ [Samples.helmDna]: `seq
65
+ RNA1{d(A)p.d(C)p.d(G)p.d(T)p.d(C)p.d(A)p.d(C)p.d(G)p.d(T)p.d(C)p}$$$$
66
+ RNA1{d(C)p.d(A)p.d(G)p.d(T)p.d(G)p.d(T)p.d(C)p.d(A)p.d(G)p.d(T)p.d(G)p.d(T)p}$$$$
67
+ RNA1{d(T)p.d(T)p.d(C)p.d(A)p.d(A)p.d(C)p.d(T)p.d(T)p.d(C)p.d(A)p.d(A)p.d(C)p}$$$$`,
68
+ [Samples.fastaRna]: `seq
69
+ ACGUCACGUC
70
+ CAGUGUCAGUGU
71
+ UUCAACUUCAAC`,
72
+ [Samples.separatorRna]: `seq
73
+ A*C*G*U*C*A*C*G*U*C
74
+ C*A*G*U*G*U*C*A*G*U*G*U
75
+ U*U*C*A*A*C*U*U*C*A*A*C`,
76
+ [Samples.helmRna]: `seq
77
+ RNA1{r(A)p.r(C)p.r(G)p.r(U)p.r(C)p.r(A)p.r(C)p.r(G)p.r(U)p.r(C)p}$$$$
78
+ RNA1{r(C)p.r(A)p.r(G)p.r(U)p.r(G)p.r(U)p.r(C)p.r(A)p.r(G)p.r(U)p.r(G)p.r(U)p}$$$$
79
+ RNA1{r(U)p.r(U)p.r(C)p.r(A)p.r(A)p.r(C)p.r(U)p.r(U)p.r(C)p.r(A)p.r(A)p.r(C)p}$$$$`,
80
+ [Samples.fastaGaps]: `seq
81
+ FW-PH-EYYFW-PH-EYY
82
+ FYNRQWYV-FYNRQWYV-
83
+ FKP-Q-SEYVFKP-Q-SEYV`,
84
+ [Samples.separatorGaps]: `seq
85
+ F/W//P/H//E/Y/Y/F/W//P/H//E/Y/Y
86
+ F/Y/N/R/Q/W/Y/V//F/Y/N/R/Q/W/Y/V/
87
+ F/K/P//Q//S/E/Y/V/F/K/P//Q//S/E/Y/V`,
88
+ [Samples.helmGaps]: `seq
89
+ PEPTIDE1{F.W.*.P.H.*.E.Y.Y.F.W.*.P.H.*.E.Y.Y}$$$$
90
+ PEPTIDE1{F.Y.N.R.Q.W.Y.V.*.F.Y.N.R.Q.W.Y.V.*}$$$$
91
+ PEPTIDE1{F.K.P.*.Q.*.S.E.Y.V.F.K.P.*.Q.*.S.E.Y.V}$$$$`,
92
92
 
93
- fastaUn: `seq
94
- [meI][hHis][Aca]NT[dE][Thr_PO3H2][Aca]D
95
- [meI][hHis][Aca][Cys_SEt]T[dK][Thr_PO3H2][Aca][Tyr_PO3H2]
96
- [Lys_Boc][hHis][Aca][Cys_SEt]T[dK][Thr_PO3H2][Aca][Tyr_PO3H2]`,
97
- separatorUn: `seq
98
- meI-hHis-Aca-N-T-dE-Thr_PO3H2-Aca-D
99
- meI-hHis-Aca-Cys_SEt-T-dK-Thr_PO3H2-Aca-Tyr_PO3H2
100
- Lys_Boc-hHis-Aca-Cys_SEt-T-dK-Thr_PO3H2-Aca-Tyr_PO3H2`,
101
- helmUn: `seq
102
- PEPTIDE1{meI.hHis.Aca.N.T.dE.Thr_PO3H2.Aca.D}$$$$
103
- PEPTIDE1{meI.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.Aca.Tyr_PO3H2}$$$$
104
- PEPTIDE1{Lys_Boc.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.Aca.Tyr_PO3H2}$$$$`,
105
- helmLoneDeoxyribose: `seq
106
- RNA1{d(A).d(C).d(G).d(T).d(C)}$$$$
107
- RNA1{d(C).d(A).d(G).d(T).d(G).d(T)p}$$$$
108
- RNA1{d(T).d(T).d(C).d(A).d(A).d(C)p}$$$$`,
109
- helmLoneRibose: `seq
110
- RNA1{r(A).r(C).r(G).r(U).r(C)}$$$$
111
- RNA1{r(C).r(A).r(G).r(U).r(G).r(U)p}$$$$
112
- RNA1{r(U).r(U).r(C).r(A).r(A).r(C)p}$$$$`,
113
- helmLonePhosphorus: `seq
114
- RNA1{p.p.r(A)p.r(C)p.r(G)p.r(U)p.r(C)p}$$$$
115
- RNA1{p.p.r(C)p.r(A)p.p.r(G)p.r(U)p.r(G)p.r(U)p}$$$$
116
- RNA1{p.r(U)p.r(U)p.r(C)p.r(A)p.r(A)p.r(C)p.p.p}$$$$`,
93
+ [Samples.fastaUn]: `seq
94
+ [meI][hHis][Aca]NT[dE][Thr_PO3H2][Aca]D[meI][hHis][Aca]NT[dE][Thr_PO3H2][Aca]D
95
+ [meI][hHis][Aca][Cys_SEt]T[dK][Thr_PO3H2][Aca][Tyr_PO3H2][meI][hHis][Aca][Cys_SEt]T[dK][Thr_PO3H2][Aca][Tyr_PO3H2]
96
+ [Lys_Boc][hHis][Aca][Cys_SEt]T[dK][Thr_PO3H2][Aca][Tyr_PO3H2][Lys_Boc][hHis][Aca][Cys_SEt]T[dK][Thr_PO3H2][Aca]`,
97
+ [Samples.separatorUn]: `seq
98
+ meI-hHis-Aca-N-T-dE-Thr_PO3H2-Aca-D-meI-hHis-Aca-N-T-dE-Thr_PO3H2-Aca-D
99
+ meI-hHis-Aca-Cys_SEt-T-dK-Thr_PO3H2-Aca-Tyr_PO3H2-meI-hHis-Aca-Cys_SEt-T-dK-Thr_PO3H2-Aca-Tyr_PO3H2
100
+ Lys_Boc-hHis-Aca-Cys_SEt-T-dK-Thr_PO3H2-Aca-Tyr_PO3H2-Lys_Boc-hHis-Aca-Cys_SEt-T-dK-Thr_PO3H2-Aca`,
101
+ [Samples.helmUn]: `seq
102
+ PEPTIDE1{meI.hHis.Aca.N.T.dE.Thr_PO3H2.Aca.D.meI.hHis.Aca.N.T.dE.Thr_PO3H2.Aca.D}$$$$
103
+ PEPTIDE1{meI.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.Aca.Tyr_PO3H2.meI.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.Aca.Tyr_PO3H2}$$$$
104
+ PEPTIDE1{Lys_Boc.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.Aca.Tyr_PO3H2.Lys_Boc.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.Aca}$$$$`,
105
+ [Samples.helmLoneDeoxyribose]: `seq
106
+ RNA1{d(A).d(C).d(G).d(T).d(C).d(A).d(C).d(G).d(T).d(C)}$$$$
107
+ RNA1{d(C).d(A).d(G).d(T).d(G).d(T)p.d(C).d(A).d(G).d(T).d(G).d(T)p}$$$$
108
+ RNA1{d(T).d(T).d(C).d(A).d(A).d(C)p.d(T).d(T).d(C).d(A).d(A).d(C)p}$$$$`,
109
+ [Samples.helmLoneRibose]: `seq
110
+ RNA1{r(A).r(C).r(G).r(U).r(C).r(A).r(C).r(G).r(U).r(C)}$$$$
111
+ RNA1{r(C).r(A).r(G).r(U).r(G).r(U)p.r(C).r(A).r(G).r(U).r(G).r(U)p}$$$$
112
+ RNA1{r(U).r(U).r(C).r(A).r(A).r(C)p.r(U).r(U).r(C).r(A).r(A).r(C)p}$$$$`,
113
+ [Samples.helmLonePhosphorus]: `seq
114
+ RNA1{p.p.r(A)p.r(C)p.r(G)p.r(U)p.r(C)p.r(A)p.r(C)p.r(G)p.r(U)p.r(C)p}$$$$
115
+ RNA1{p.p.r(C)p.r(A)p.p.r(G)p.r(U)p.r(G)p.r(U)p.r(C)p.r(A)p.p.r(G)p.r(U)p.r(G)p.r(U)p}$$$$
116
+ RNA1{p.r(U)p.r(U)p.r(C)p.r(A)p.r(A)p.r(C)p.r(U)p.r(U)p.r(C)p.r(A)p.r(A)p.r(C)p.p.p}$$$$`,
117
117
  };
118
118
 
119
119
  /** Also detects semantic types
@@ -39,11 +39,11 @@ category('detectorsBenchmark', () => {
39
39
  });
40
40
 
41
41
  test('separatorDnaShorts50Many1E6', async () => {
42
- await detectMacromoleculeBenchmark(10, NOTATION.SEPARATOR, ALPHABET.DNA, 50, 1E6, '/');
42
+ await detectMacromoleculeBenchmark(20, NOTATION.SEPARATOR, ALPHABET.DNA, 50, 1E6, '/');
43
43
  });
44
44
 
45
45
  test('separatorDnaLong1e6Few50', async () => {
46
- await detectMacromoleculeBenchmark(10, NOTATION.SEPARATOR, ALPHABET.DNA, 1E6, 50, '/');
46
+ await detectMacromoleculeBenchmark(20, NOTATION.SEPARATOR, ALPHABET.DNA, 1E6, 50, '/');
47
47
  });
48
48
 
49
49
  async function detectMacromoleculeBenchmark(
@@ -71,63 +71,63 @@ CCCCN1C(=O)CN=C(c2cc(F)ccc12)C3CCCCC3
71
71
  C1CCCCC1
72
72
  CCCCCC`;
73
73
  [csvTests.fastaDna1]: string = `seq
74
- ACGTC
75
- CAGTGT
76
- TTCAAC`;
74
+ ACGTCACGTC
75
+ CAGTGTCAGTGT
76
+ TTCAACTTCAAC`;
77
77
  [csvTests.fastaRna1]: string = `seq
78
- ACGUC
79
- CAGUGU
80
- UUCAAC`;
78
+ ACGUCACGUC
79
+ CAGUGUCAGUGU
80
+ UUCAACUUCAAC`;
81
81
  /** Pure amino acids sequence */
82
82
  [csvTests.fastaPt1]: string = `seq
83
83
  FWPHEY
84
84
  YNRQWYV
85
85
  MKPSEYV`;
86
86
  [csvTests.fastaUn]: string = `seq
87
- [meI][hHis][Aca]NT[dE][Thr_PO3H2][Aca]D
88
- [meI][hHis][Aca][Cys_SEt]T[dK][Thr_PO3H2][Aca][Tyr_PO3H2]
89
- [Lys_Boc][hHis][Aca][Cys_SEt]T[dK][Thr_PO3H2][Aca][Tyr_PO3H2]`;
87
+ [meI][hHis][Aca]NT[dE][Thr_PO3H2][Aca]DN
88
+ [meI][hHis][Aca][Cys_SEt]T[dK][Thr_PO3H2][Aca][Tyr_PO3H2][Aca]
89
+ [Lys_Boc][hHis][Aca][Cys_SEt]T[dK][Thr_PO3H2][Aca][Tyr_PO3H2][Aca]`;
90
90
  [csvTests.sepDna]: string = `seq
91
- A*C*G*T*C
92
- C*A*G*T*G*T
93
- T*T*C*A*A*C`;
91
+ A*C*G*T*C*A*C*G*T*C
92
+ C*A*G*T*G*T*C*A*G*T*G*T
93
+ T*T*C*A*A*C*T*T*C*A*A*C`;
94
94
  [csvTests.sepRna]: string = `seq
95
- A*C*G*U*C
96
- C*A*G*U*G*U
97
- U*U*C*A*A*C`;
95
+ A*C*G*U*C*A*C*G*U*C
96
+ C*A*G*U*G*U*C*A*G*U*G*U
97
+ U*U*C*A*A*C*U*U*C*A*A*C`;
98
98
  [csvTests.sepPt]: string = `seq
99
- F-W-P-H-E-Y
100
- Y-N-R-Q-W-Y-V
101
- M-K-P-S-E-Y-V`;
99
+ F-W-P-H-E-Y-F-W-P-H-E-Y
100
+ Y-N-R-Q-W-Y-V-Y-N-R-Q-W-Y-V
101
+ M-K-P-S-E-Y-V-M-K-P-S-E-Y-V`;
102
102
  [csvTests.sepUn1]: string = `seq
103
- abc-dfgg-abc1-cfr3-rty-wert
104
- rut12-her2-rty-wert-abc-abc1-dfgg
105
- rut12-rty-her2-abc-cfr3-wert-rut12`;
103
+ abc-dfgg-abc1-cfr3-rty-wert-cfr3-rty-wert
104
+ rut12-her2-rty-wert-abc-abc1-dfgg-abc-abc1-dfgg
105
+ rut12-rty-her2-abc-cfr3-wert-rut12-cfr3-wert-rut12`;
106
106
  [csvTests.sepUn2]: string = `seq
107
- abc/dfgg/abc1/cfr3/rty/wert
108
- rut12/her2/rty/wert//abc/abc1/dfgg
109
- rut12/rty/her2/abc/cfr3//wert/rut12`;
107
+ abc/dfgg/abc1/cfr3/rty/wert/abc/dfgg/abc1/cfr3/rty/wert
108
+ rut12/her2/rty/wert//abc/abc1/dfgg/rut12/her2/rty/wert//abc/abc1/dfgg
109
+ rut12/rty/her2/abc/cfr3//wert/rut12/rut12/rty/her2/abc/cfr3//wert/rut12`;
110
110
  [csvTests.sepMsaDna1]: string = `seq
111
- A-C--G-T--C-T
112
- C-A-C--T--G-T
113
- A-C-C-G-T-A-C-T`;
111
+ A-C--G-T--C-T-A-C--G-T--C-T
112
+ C-A-C--T--G-T-C-A-C--T--G-T
113
+ A-C-C-G-T-A-C-T-A-C-C-G-T-A-C-T`;
114
114
  [csvTests.sepMsaUnWEmpty]: string = `seq
115
- m1-M-m3-mon4-mon5-N-T-MON8-N9
116
- m1-mon2-m3-mon4-mon5-Num--MON8-N9
115
+ m1-M-m3-mon4-mon5-N-T-MON8-N9-m1-M-m3-mon4-mon5-N-T-MON8-N9
116
+ m1-mon2-m3-mon4-mon5-Num--MON8-N9-m1-mon2-m3-mon4-mon5-Num--MON8-N9
117
117
 
118
- mon1-M-mon3-mon4-mon5---MON8-N9`;
118
+ mon1-M-mon3-mon4-mon5---MON8-N9-mon1-M-mon3-mon4-mon5---MON8-N9`;
119
119
  [csvTests.sepComplex]: string = `seq
120
120
  Ac(1)-F-K(AEEA-AEEA-R-Ac)-L-mF-V-Y-mNle-D-W-N-mF-C(1)-G-NH2
121
121
  Ac(1)-F-K(AEEA-ARRA-W-Ac)-L-mF-V-Y-mNle-D-W-N-mF-C(1)-G-NH2
122
122
  Ac(1)-F-K(AEEA-AEEA-Ac)-L-mF-V-Y-mNle-D-W-N-mF-C(1)-G-NH2`;
123
123
  [csvTests.fastaMsaDna1]: string = `seq
124
- AC-GT-CT
125
- CAC-T-GT
126
- ACCGTACT`;
124
+ AC-GT-CTAC-GT-CT
125
+ CAC-T-GTCAC-T-GT
126
+ ACCGTACTACCGTACT`;
127
127
  [csvTests.fastaMsaPt1]: string = `seq
128
- FWR-WYV-KHP
129
- YNR-WYV-KHP
130
- MWRSWY-CKHP`;
128
+ FWR-WYV-KHPFWR-WYV-KHP
129
+ YNR-WYV-KHPYNR-WYV-KHP
130
+ MWRSWY-CKHPMWRSWY-CKHP`;
131
131
  }();
132
132
 
133
133
  const enum Samples {
@@ -28,35 +28,35 @@ category('detectors:weak-and-likely', () => {
28
28
 
29
29
  const csvData: { [name: string]: string } = {
30
30
  [csvTests.fastaDnaWeak1]: `id,colName
31
- 1,TTTTT
32
- 2,TTTTT
33
- 3,TTTTT
34
- 4,TTTTT`,
31
+ 1,TTTTTTTTTT
32
+ 2,TTTTTTTTTT
33
+ 3,TTTTTTTTTT
34
+ 4,TTTTTTTTTT`,
35
35
  [csvTests.fastaDnaWeak1LikelyName]: `id,seq
36
- 1,TTTTT
37
- 2,TTTTT
38
- 3,TTTTT
39
- 4,TTTTT`,
36
+ 1,TTTTTTT
37
+ 2,TTTTTTT
38
+ 3,TTTTTTT
39
+ 4,TTTTTTT`,
40
40
  [csvTests.fastaRnaWeak1]: `id,colName
41
- 1,UUUUU
42
- 2,UUUUU
43
- 3,UUUUU
44
- 4,UUUUU`,
41
+ 1,UUUUUUUUUU
42
+ 2,UUUUUUUUUU
43
+ 3,UUUUUUUUUU
44
+ 4,UUUUUUUUUU`,
45
45
  [csvTests.fastaRnaWeak1LikelyName]: `id,seq
46
- 1,UUUUU
47
- 2,UUUUU
48
- 3,UUUUU
49
- 4,UUUUU`,
46
+ 1,UUUUUUU
47
+ 2,UUUUUUU
48
+ 3,UUUUUUU
49
+ 4,UUUUUUU`,
50
50
  [csvTests.fastaPtWeak1]: `id,colName
51
- 1,SLSLSPGK
52
- 2,SLSLSPGK
53
- 3,SLSLSPGK
54
- 4,SLSLSPGK`,
51
+ 1,SLSLSPGKSLSLSPGK
52
+ 2,SLSLSPGKSLSLSPGK
53
+ 3,SLSLSPGKSLSLSPGK
54
+ 4,SLSLSPGKSLSLSPGK`,
55
55
  [csvTests.fastaPtWeak1LikelyName]: `id,seq
56
- 1,SLSLSPGK
57
- 2,SLSLSPGK
58
- 3,SLSLSPGK
59
- 4,SLSLSPGK`,
56
+ 1,SLSLSPGKSLSLSPGK
57
+ 2,SLSLSPGKSLSLSPGK
58
+ 3,SLSLSPGKSLSLSPGK
59
+ 4,SLSLSPGKSLSLSPGK`,
60
60
  [csvTests.fastaUn1]: `id,colName
61
61
  1,word
62
62
  2,other
@@ -27,19 +27,20 @@ category('Distance', async () => {
27
27
  const prot6 = 'FWRRRRY';
28
28
 
29
29
  const protTable = `seq
30
- FWRWYVKHP
31
- YNRWYVKHP
32
- MWRSWYCKHP`;
30
+ FWRWYVKHPFWRWYVKHP
31
+ YNRWYVKHPYNRWYVKHP
32
+ MWRSWYCKHPMWRSWYCKHP`;
33
33
 
34
34
  const DNATable = `seq
35
- ATAACG
36
- ATCGA
37
- ATCGA`;
35
+ ATAACGATAACG
36
+ ATCGAATCGA
37
+ ATCGAATCGA`;
38
38
 
39
39
  const MSATable = `seq
40
- ATAAC
41
- ATCGA
42
- ATCGA`;
40
+ ATAACATAAC
41
+ ATCGAATCGA
42
+ ATCGAATCGA`;
43
+
43
44
  test('protein-distance-function', async () => {
44
45
  const uh = await _initMacromoleculeColumn(protTable);
45
46
  const distFunc = uh.getDistanceFunctionName();
@@ -1,96 +1,120 @@
1
1
  import * as grok from 'datagrok-api/grok';
2
- import * as ui from 'datagrok-api/ui';
3
2
  import * as DG from 'datagrok-api/dg';
4
3
 
5
- import wu from 'wu';
6
-
7
- import {category, test, expect, expectArray} from '@datagrok-libraries/utils/src/test';
4
+ import {category, expect, expectArray, test} from '@datagrok-libraries/utils/src/test';
8
5
  import {GapSymbols, UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
9
6
  import {NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecule';
10
7
  import {ISeqSplitted} from '@datagrok-libraries/bio/src/utils/macromolecule/types';
11
8
 
9
+ enum Tests {
10
+ fasta = 'fasta',
11
+ fastaMsa = 'fastaMsa',
12
+ separator = 'separator',
13
+ separatorMsa = 'separatorMsa',
14
+ helm = 'helm',
15
+ }
16
+
12
17
  category('UnitsHandler', () => {
13
18
  const fG = GapSymbols[NOTATION.FASTA];
14
19
  const hG = GapSymbols[NOTATION.HELM];
15
20
  const sG = GapSymbols[NOTATION.SEPARATOR];
16
- const data: { [testName: string]: { src: { csv: string }, tgt: { splitted: (string[] | string)[] } } } = {
17
- fasta: {
21
+ const data: {
22
+ [testName: string]: {
23
+ src: { csv: string },
24
+ tgt: { notation: NOTATION, separator?: string, splitted: (string[] | string)[] }
25
+ }
26
+ } = {
27
+ [Tests.fasta]: {
18
28
  src: {
19
29
  csv: `seq
20
- ACGTC
21
- CAGTGT
22
- TTCAAC`
30
+ ACGTCACGTC
31
+ CAGTGTCAGTGT
32
+ TTCAACTTCAAC`
23
33
  },
24
34
  tgt: {
35
+ notation: NOTATION.FASTA,
25
36
  splitted: [
26
- 'ACGTC',
27
- 'CAGTGT',
28
- 'TTCAAC',
37
+ 'ACGTCACGTC',
38
+ 'CAGTGTCAGTGT',
39
+ 'TTCAACTTCAAC',
29
40
  ]
30
41
  }
31
42
  },
32
- fastaMsa: {
43
+ [Tests.fastaMsa]: {
33
44
  src: {
34
45
  csv: `seq
35
- AC-GT-CT
36
- CAC-T-GT
37
- ACCGTACT`,
46
+ AC-GT-CTAC-GT-CT
47
+ CAC-T-GTCAC-T-GT
48
+ ACCGTACTACCGTACT`,
38
49
  },
39
50
  tgt: {
51
+ notation: NOTATION.FASTA,
40
52
  splitted: [
41
53
  //@formatter:off
42
- 'AC-GT-CT',
43
- 'CAC-T-GT',
44
- 'ACCGTACT',
54
+ 'AC-GT-CTAC-GT-CT',
55
+ 'CAC-T-GTCAC-T-GT',
56
+ 'ACCGTACTACCGTACT',
45
57
  //@formatter:on
46
58
  ]
47
59
  }
48
60
  },
49
- separator: {
61
+ [Tests.separator]: {
50
62
  src: {
51
63
  csv: `seq
52
- abc-dfgg-abc1-cfr3-rty-wert
53
- rut12-her2-rty-wert-abc-abc1-dfgg
54
- rut12-rty-her2-abc-cfr3-wert-rut12`,
64
+ abc-dfgg-abc1-cfr3-rty-wert-abc-dfgg-abc1-cfr3-rty-wert
65
+ rut12-her2-rty-wert-abc-abc1-dfgg-rut12-her2-rty-wert-abc
66
+ rut12-rty-her2-abc-cfr3-wert-rut12-rut12-rty-her2-abc-cfr3`,
55
67
  },
56
68
  tgt: {
69
+ notation: NOTATION.SEPARATOR,
70
+ separator: '-',
57
71
  splitted: [
58
- ['abc', 'dfgg', 'abc1', 'cfr3', 'rty', 'wert'],
59
- ['rut12', 'her2', 'rty', 'wert', 'abc', 'abc1', 'dfgg'],
60
- ['rut12', 'rty', 'her2', 'abc', 'cfr3', 'wert', 'rut12']
72
+ ['abc', 'dfgg', 'abc1', 'cfr3', 'rty', 'wert', 'abc', 'dfgg', 'abc1', 'cfr3', 'rty', 'wert'],
73
+ ['rut12', 'her2', 'rty', 'wert', 'abc', 'abc1', 'dfgg', 'rut12', 'her2', 'rty', 'wert', 'abc'],
74
+ ['rut12', 'rty', 'her2', 'abc', 'cfr3', 'wert', 'rut12', 'rut12', 'rty', 'her2', 'abc', 'cfr3']
61
75
  ]
62
76
  }
63
77
  },
64
78
 
65
- separatorMsa: {
79
+ [Tests.separatorMsa]: {
66
80
  src: {
67
81
  csv: `seq
68
- abc-dfgg-abc1-cfr3-rty-wert
69
- rut12-her2-rty--abc1-dfgg
70
- rut12-rty-her2---wert`
82
+ rut0-dfgg-abc1-cfr3-rty-wert-abc-dfgg-abc1-cfr3-rty-wert
83
+ rut1-her2-rty--abc1-dfgg-rut12-her2-rty--abc1-dfgg
84
+ rut2-rty-her2---wert-rut12-rty-her2---wert
85
+ \"rut3-rty-her2-\"\"-\"\"-\"\"-\"\"-wert-rut12-rty-her2-\"\"-\"\"-\"\"-\"\"-wert\"
86
+ \"\"\"-\"\"-rut4-her2-wert-rut12-rty-her2-wert\"
87
+ \"rut5-rty-her2-wert-rut12-rty-her2-wert-\"\"-\"\"\"`
71
88
  },
72
89
  tgt: {
90
+ notation: NOTATION.SEPARATOR,
91
+ separator: '-',
73
92
  splitted: [
74
- ['abc', 'dfgg', 'abc1', 'cfr3', 'rty', 'wert'],
75
- ['rut12', 'her2', 'rty', sG, 'abc1', 'dfgg'],
76
- ['rut12', 'rty', 'her2', sG, sG, 'wert'],
93
+ ['rut0', 'dfgg', 'abc1', 'cfr3', 'rty', 'wert', 'abc', 'dfgg', 'abc1', 'cfr3', 'rty', 'wert'],
94
+ ['rut1', 'her2', 'rty', sG, 'abc1', 'dfgg', 'rut12', 'her2', 'rty', sG, 'abc1', 'dfgg'],
95
+ ['rut2', 'rty', 'her2', sG, sG, 'wert', 'rut12', 'rty', 'her2', sG, sG, 'wert'],
96
+ ['rut3', 'rty', 'her2', sG, sG, 'wert', 'rut12', 'rty', 'her2', sG, sG, 'wert'],
97
+ [sG, 'rut4', 'her2', 'wert', 'rut12', 'rty', 'her2', 'wert'],
98
+ ['rut5', 'rty', 'her2', 'wert', 'rut12', 'rty', 'her2', 'wert', sG],
77
99
  ]
78
100
  }
79
101
  },
80
- helm: {
102
+
103
+ [Tests.helm]: {
81
104
  src: {
82
105
  csv: `seq
83
- PEPTIDE1{meI.hHis.Aca.N.T.dE.Thr_PO3H2.Aca.D-Tyr_Et}$$$$
84
- PEPTIDE1{meI.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.Aca}$$$$
85
- PEPTIDE1{Lys_Boc.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.Aca}$$$$
86
- PEPTIDE1{meI.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2}$$$$`
106
+ PEPTIDE1{meI.hHis.Aca.N.T.dE.Thr_PO3H2.Aca.D-Tyr_Et.Thr_PO3H2.Aca.D-Tyr_Et}$$$$
107
+ PEPTIDE1{meI.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.Aca.dK.Thr_PO3H2.Aca}$$$$
108
+ PEPTIDE1{Lys_Boc.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.Aca.dK.Thr_PO3H2.Aca}$$$$
109
+ PEPTIDE1{meI.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.T.dK.Thr_PO3H2}$$$$`
87
110
  },
88
111
  tgt: {
112
+ notation: NOTATION.HELM,
89
113
  splitted: [
90
- ['meI', 'hHis', 'Aca', 'N', 'T', 'dE', 'Thr_PO3H2', 'Aca', 'D-Tyr_Et'],
91
- ['meI', 'hHis', 'Aca', 'Cys_SEt', 'T', 'dK', 'Thr_PO3H2', 'Aca'],
92
- ['Lys_Boc', 'hHis', 'Aca', 'Cys_SEt', 'T', 'dK', 'Thr_PO3H2', 'Aca'],
93
- ['meI', 'hHis', 'Aca', 'Cys_SEt', 'T', 'dK', 'Thr_PO3H2'],
114
+ ['meI', 'hHis', 'Aca', 'N', 'T', 'dE', 'Thr_PO3H2', 'Aca', 'D-Tyr_Et', 'Thr_PO3H2', 'Aca', 'D-Tyr_Et'],
115
+ ['meI', 'hHis', 'Aca', 'Cys_SEt', 'T', 'dK', 'Thr_PO3H2', 'Aca', 'dK', 'Thr_PO3H2', 'Aca'],
116
+ ['Lys_Boc', 'hHis', 'Aca', 'Cys_SEt', 'T', 'dK', 'Thr_PO3H2', 'Aca', 'dK', 'Thr_PO3H2', 'Aca'],
117
+ ['meI', 'hHis', 'Aca', 'Cys_SEt', 'T', 'dK', 'Thr_PO3H2', 'T', 'dK', 'Thr_PO3H2'],
94
118
  ]
95
119
  }
96
120
  }
@@ -106,8 +130,11 @@ PEPTIDE1{meI.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2}$$$$`
106
130
  expect(col.semType, DG.SEMTYPE.MACROMOLECULE);
107
131
 
108
132
  const uh = UnitsHandler.getOrCreate(col);
133
+ expect(uh.notation, testData.tgt.notation);
134
+ expect(uh.separator === testData.tgt.separator, true);
135
+
109
136
  const resSplitted: ISeqSplitted[] = uh.splitted;
110
137
  expectArray(resSplitted, testData.tgt.splitted);
111
- });
138
+ }, testName == Tests.separatorMsa ? {skipReason: '#2468 CSV row starting with the quote character'} : undefined);
112
139
  }
113
140
  });
@@ -7,19 +7,19 @@ import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
7
7
  import {ALPHABET, NOTATION, TAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
8
8
 
9
9
  const seqDna = `seq
10
- ACGTC
11
- CAGTGT
12
- TTCAAC`;
10
+ ACGTCACGTC
11
+ CAGTGTCAGTGT
12
+ TTCAACTTCAAC`;
13
13
 
14
14
  const seqDnaMsa = `seq
15
- AC-GT-CT
16
- CAC-T-GT
17
- ACCGTACT`;
15
+ AC-GT-CTAC-GT-CT
16
+ CAC-T-GTCAC-T-GT
17
+ ACCGTACTACCGTACT`;
18
18
 
19
19
  const seqUn = `seq
20
- abc-dfgg-abc1-cfr3-rty-wert
21
- rut12-her2-rty-wert-abc-abc1-dfgg
22
- rut12-rty-her2-abc-cfr3-wert-rut12`;
20
+ abc-dfgg-abc1-cfr3-rty-wert-abc-dfgg-abc1-cfr3-rty-wert
21
+ rut12-her2-rty-wert-abc-abc1-dfgg-rut12-her2-rty-wert-abc-abc1-dfgg
22
+ rut12-rty-her2-abc-cfr3-wert-rut12-rut12-rty-her2-abc-cfr3-wert-rut12`;
23
23
 
24
24
  const seqHelm = `seq
25
25
  PEPTIDE1{meI.hHis.Aca.N.T.dE.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.E.N.D-Orn.D-aThr.Phe_4Me}$$$$