@datagrok/bio 2.11.3 → 2.11.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -28,35 +28,35 @@ category('detectors:weak-and-likely', () => {
28
28
 
29
29
  const csvData: { [name: string]: string } = {
30
30
  [csvTests.fastaDnaWeak1]: `id,colName
31
- 1,TTTTT
32
- 2,TTTTT
33
- 3,TTTTT
34
- 4,TTTTT`,
31
+ 1,TTTTTTTTTT
32
+ 2,TTTTTTTTTT
33
+ 3,TTTTTTTTTT
34
+ 4,TTTTTTTTTT`,
35
35
  [csvTests.fastaDnaWeak1LikelyName]: `id,seq
36
- 1,TTTTT
37
- 2,TTTTT
38
- 3,TTTTT
39
- 4,TTTTT`,
36
+ 1,TTTTTTT
37
+ 2,TTTTTTT
38
+ 3,TTTTTTT
39
+ 4,TTTTTTT`,
40
40
  [csvTests.fastaRnaWeak1]: `id,colName
41
- 1,UUUUU
42
- 2,UUUUU
43
- 3,UUUUU
44
- 4,UUUUU`,
41
+ 1,UUUUUUUUUU
42
+ 2,UUUUUUUUUU
43
+ 3,UUUUUUUUUU
44
+ 4,UUUUUUUUUU`,
45
45
  [csvTests.fastaRnaWeak1LikelyName]: `id,seq
46
- 1,UUUUU
47
- 2,UUUUU
48
- 3,UUUUU
49
- 4,UUUUU`,
46
+ 1,UUUUUUU
47
+ 2,UUUUUUU
48
+ 3,UUUUUUU
49
+ 4,UUUUUUU`,
50
50
  [csvTests.fastaPtWeak1]: `id,colName
51
- 1,SLSLSPGK
52
- 2,SLSLSPGK
53
- 3,SLSLSPGK
54
- 4,SLSLSPGK`,
51
+ 1,SLSLSPGKSLSLSPGK
52
+ 2,SLSLSPGKSLSLSPGK
53
+ 3,SLSLSPGKSLSLSPGK
54
+ 4,SLSLSPGKSLSLSPGK`,
55
55
  [csvTests.fastaPtWeak1LikelyName]: `id,seq
56
- 1,SLSLSPGK
57
- 2,SLSLSPGK
58
- 3,SLSLSPGK
59
- 4,SLSLSPGK`,
56
+ 1,SLSLSPGKSLSLSPGK
57
+ 2,SLSLSPGKSLSLSPGK
58
+ 3,SLSLSPGKSLSLSPGK
59
+ 4,SLSLSPGKSLSLSPGK`,
60
60
  [csvTests.fastaUn1]: `id,colName
61
61
  1,word
62
62
  2,other
@@ -27,19 +27,20 @@ category('Distance', async () => {
27
27
  const prot6 = 'FWRRRRY';
28
28
 
29
29
  const protTable = `seq
30
- FWRWYVKHP
31
- YNRWYVKHP
32
- MWRSWYCKHP`;
30
+ FWRWYVKHPFWRWYVKHP
31
+ YNRWYVKHPYNRWYVKHP
32
+ MWRSWYCKHPMWRSWYCKHP`;
33
33
 
34
34
  const DNATable = `seq
35
- ATAACG
36
- ATCGA
37
- ATCGA`;
35
+ ATAACGATAACG
36
+ ATCGAATCGA
37
+ ATCGAATCGA`;
38
38
 
39
39
  const MSATable = `seq
40
- ATAAC
41
- ATCGA
42
- ATCGA`;
40
+ ATAACATAAC
41
+ ATCGAATCGA
42
+ ATCGAATCGA`;
43
+
43
44
  test('protein-distance-function', async () => {
44
45
  const uh = await _initMacromoleculeColumn(protTable);
45
46
  const distFunc = uh.getDistanceFunctionName();
@@ -1,10 +1,7 @@
1
1
  import * as grok from 'datagrok-api/grok';
2
- import * as ui from 'datagrok-api/ui';
3
2
  import * as DG from 'datagrok-api/dg';
4
3
 
5
- import wu from 'wu';
6
-
7
- import {category, test, expect, expectArray} from '@datagrok-libraries/utils/src/test';
4
+ import {category, expect, expectArray, test} from '@datagrok-libraries/utils/src/test';
8
5
  import {GapSymbols, UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
9
6
  import {NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecule';
10
7
  import {ISeqSplitted} from '@datagrok-libraries/bio/src/utils/macromolecule/types';
@@ -17,31 +14,31 @@ category('UnitsHandler', () => {
17
14
  fasta: {
18
15
  src: {
19
16
  csv: `seq
20
- ACGTC
21
- CAGTGT
22
- TTCAAC`
17
+ ACGTCACGTC
18
+ CAGTGTCAGTGT
19
+ TTCAACTTCAAC`
23
20
  },
24
21
  tgt: {
25
22
  splitted: [
26
- 'ACGTC',
27
- 'CAGTGT',
28
- 'TTCAAC',
23
+ 'ACGTCACGTC',
24
+ 'CAGTGTCAGTGT',
25
+ 'TTCAACTTCAAC',
29
26
  ]
30
27
  }
31
28
  },
32
29
  fastaMsa: {
33
30
  src: {
34
31
  csv: `seq
35
- AC-GT-CT
36
- CAC-T-GT
37
- ACCGTACT`,
32
+ AC-GT-CTAC-GT-CT
33
+ CAC-T-GTCAC-T-GT
34
+ ACCGTACTACCGTACT`,
38
35
  },
39
36
  tgt: {
40
37
  splitted: [
41
38
  //@formatter:off
42
- 'AC-GT-CT',
43
- 'CAC-T-GT',
44
- 'ACCGTACT',
39
+ 'AC-GT-CTAC-GT-CT',
40
+ 'CAC-T-GTCAC-T-GT',
41
+ 'ACCGTACTACCGTACT',
45
42
  //@formatter:on
46
43
  ]
47
44
  }
@@ -49,15 +46,15 @@ ACCGTACT`,
49
46
  separator: {
50
47
  src: {
51
48
  csv: `seq
52
- abc-dfgg-abc1-cfr3-rty-wert
53
- rut12-her2-rty-wert-abc-abc1-dfgg
54
- rut12-rty-her2-abc-cfr3-wert-rut12`,
49
+ abc-dfgg-abc1-cfr3-rty-wert-abc-dfgg-abc1-cfr3-rty-wert
50
+ rut12-her2-rty-wert-abc-abc1-dfgg-rut12-her2-rty-wert-abc
51
+ rut12-rty-her2-abc-cfr3-wert-rut12-rut12-rty-her2-abc-cfr3`,
55
52
  },
56
53
  tgt: {
57
54
  splitted: [
58
- ['abc', 'dfgg', 'abc1', 'cfr3', 'rty', 'wert'],
59
- ['rut12', 'her2', 'rty', 'wert', 'abc', 'abc1', 'dfgg'],
60
- ['rut12', 'rty', 'her2', 'abc', 'cfr3', 'wert', 'rut12']
55
+ ['abc', 'dfgg', 'abc1', 'cfr3', 'rty', 'wert', 'abc', 'dfgg', 'abc1', 'cfr3', 'rty', 'wert'],
56
+ ['rut12', 'her2', 'rty', 'wert', 'abc', 'abc1', 'dfgg', 'rut12', 'her2', 'rty', 'wert', 'abc'],
57
+ ['rut12', 'rty', 'her2', 'abc', 'cfr3', 'wert', 'rut12', 'rut12', 'rty', 'her2', 'abc', 'cfr3']
61
58
  ]
62
59
  }
63
60
  },
@@ -65,32 +62,32 @@ rut12-rty-her2-abc-cfr3-wert-rut12`,
65
62
  separatorMsa: {
66
63
  src: {
67
64
  csv: `seq
68
- abc-dfgg-abc1-cfr3-rty-wert
69
- rut12-her2-rty--abc1-dfgg
70
- rut12-rty-her2---wert`
65
+ abc-dfgg-abc1-cfr3-rty-wert-abc-dfgg-abc1-cfr3-rty-wert
66
+ rut12-her2-rty--abc1-dfgg-rut12-her2-rty--abc1-dfgg
67
+ rut12-rty-her2---wert-rut12-rty-her2---wert`
71
68
  },
72
69
  tgt: {
73
70
  splitted: [
74
- ['abc', 'dfgg', 'abc1', 'cfr3', 'rty', 'wert'],
75
- ['rut12', 'her2', 'rty', sG, 'abc1', 'dfgg'],
76
- ['rut12', 'rty', 'her2', sG, sG, 'wert'],
71
+ ['abc', 'dfgg', 'abc1', 'cfr3', 'rty', 'wert', 'abc', 'dfgg', 'abc1', 'cfr3', 'rty', 'wert'],
72
+ ['rut12', 'her2', 'rty', sG, 'abc1', 'dfgg', 'rut12', 'her2', 'rty', sG, 'abc1', 'dfgg'],
73
+ ['rut12', 'rty', 'her2', sG, sG, 'wert', 'rut12', 'rty', 'her2', sG, sG, 'wert'],
77
74
  ]
78
75
  }
79
76
  },
80
77
  helm: {
81
78
  src: {
82
79
  csv: `seq
83
- PEPTIDE1{meI.hHis.Aca.N.T.dE.Thr_PO3H2.Aca.D-Tyr_Et}$$$$
84
- PEPTIDE1{meI.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.Aca}$$$$
85
- PEPTIDE1{Lys_Boc.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.Aca}$$$$
86
- PEPTIDE1{meI.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2}$$$$`
80
+ PEPTIDE1{meI.hHis.Aca.N.T.dE.Thr_PO3H2.Aca.D-Tyr_Et.Thr_PO3H2.Aca.D-Tyr_Et}$$$$
81
+ PEPTIDE1{meI.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.Aca.dK.Thr_PO3H2.Aca}$$$$
82
+ PEPTIDE1{Lys_Boc.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.Aca.dK.Thr_PO3H2.Aca}$$$$
83
+ PEPTIDE1{meI.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.T.dK.Thr_PO3H2}$$$$`
87
84
  },
88
85
  tgt: {
89
86
  splitted: [
90
- ['meI', 'hHis', 'Aca', 'N', 'T', 'dE', 'Thr_PO3H2', 'Aca', 'D-Tyr_Et'],
91
- ['meI', 'hHis', 'Aca', 'Cys_SEt', 'T', 'dK', 'Thr_PO3H2', 'Aca'],
92
- ['Lys_Boc', 'hHis', 'Aca', 'Cys_SEt', 'T', 'dK', 'Thr_PO3H2', 'Aca'],
93
- ['meI', 'hHis', 'Aca', 'Cys_SEt', 'T', 'dK', 'Thr_PO3H2'],
87
+ ['meI', 'hHis', 'Aca', 'N', 'T', 'dE', 'Thr_PO3H2', 'Aca', 'D-Tyr_Et', 'Thr_PO3H2', 'Aca', 'D-Tyr_Et'],
88
+ ['meI', 'hHis', 'Aca', 'Cys_SEt', 'T', 'dK', 'Thr_PO3H2', 'Aca', 'dK', 'Thr_PO3H2', 'Aca'],
89
+ ['Lys_Boc', 'hHis', 'Aca', 'Cys_SEt', 'T', 'dK', 'Thr_PO3H2', 'Aca', 'dK', 'Thr_PO3H2', 'Aca'],
90
+ ['meI', 'hHis', 'Aca', 'Cys_SEt', 'T', 'dK', 'Thr_PO3H2', 'T', 'dK', 'Thr_PO3H2'],
94
91
  ]
95
92
  }
96
93
  }
@@ -7,19 +7,19 @@ import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
7
7
  import {ALPHABET, NOTATION, TAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
8
8
 
9
9
  const seqDna = `seq
10
- ACGTC
11
- CAGTGT
12
- TTCAAC`;
10
+ ACGTCACGTC
11
+ CAGTGTCAGTGT
12
+ TTCAACTTCAAC`;
13
13
 
14
14
  const seqDnaMsa = `seq
15
- AC-GT-CT
16
- CAC-T-GT
17
- ACCGTACT`;
15
+ AC-GT-CTAC-GT-CT
16
+ CAC-T-GTCAC-T-GT
17
+ ACCGTACTACCGTACT`;
18
18
 
19
19
  const seqUn = `seq
20
- abc-dfgg-abc1-cfr3-rty-wert
21
- rut12-her2-rty-wert-abc-abc1-dfgg
22
- rut12-rty-her2-abc-cfr3-wert-rut12`;
20
+ abc-dfgg-abc1-cfr3-rty-wert-abc-dfgg-abc1-cfr3-rty-wert
21
+ rut12-her2-rty-wert-abc-abc1-dfgg-rut12-her2-rty-wert-abc-abc1-dfgg
22
+ rut12-rty-her2-abc-cfr3-wert-rut12-rut12-rty-her2-abc-cfr3-wert-rut12`;
23
23
 
24
24
  const seqHelm = `seq
25
25
  PEPTIDE1{meI.hHis.Aca.N.T.dE.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.E.N.D-Orn.D-aThr.Phe_4Me}$$$$
@@ -0,0 +1,44 @@
1
+ import * as grok from 'datagrok-api/grok';
2
+ import * as ui from 'datagrok-api/ui';
3
+ import * as DG from 'datagrok-api/dg';
4
+
5
+
6
+ import {_package} from '../package';
7
+ import {delay} from '@datagrok-libraries/utils/src/test';
8
+
9
+ type IDetectorReport = { categoriesSample: any[], rejectReason: string };
10
+
11
+ type IDetectorDebugStore = { last: IDetectorReport };
12
+
13
+ export async function detectMacromoleculeProbeDo(
14
+ csv: string, colName: string | undefined, probeCount: number
15
+ ): Promise<void> {
16
+ const pi = DG.TaskBarProgressIndicator.create(`detectMacromolecule probe ...`);
17
+ try {
18
+ let progressLast = 0;
19
+ const store: IDetectorDebugStore = await grok.functions.call('Bio:detectMacromoleculeEnableStore');
20
+ let failCount: number = 0;
21
+ for (let i = 0; i < probeCount; ++i) {
22
+ const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
23
+ const seqCol = colName ? df.getCol(colName) : df.columns.byIndex(0);
24
+
25
+ const detectRes: string = await grok.functions.call('Bio:detectMacromolecule', {col: seqCol});
26
+ if (detectRes !== DG.SEMTYPE.MACROMOLECULE) {
27
+ ++failCount;
28
+ console.warn(`Reject reason: ${store.last.rejectReason}`);
29
+ }
30
+ const progress = i / probeCount;
31
+ if ((progress - progressLast) >= 0.1) {
32
+ progressLast = progress;
33
+ pi.update(100 * progress, `detectMacromolecule probe ${failCount}/${i}/${probeCount} ...`);
34
+ await delay(0);
35
+ }
36
+ }
37
+ if (failCount > 0)
38
+ grok.shell.warning(`detectMacromolecule failed ${failCount} / ${probeCount}`);
39
+ else
40
+ grok.shell.info(`detectMacromolecule success ${probeCount}`);
41
+ } finally {
42
+ pi.close();
43
+ }
44
+ }
@@ -8,9 +8,7 @@ import {IMonomerLib, Monomer} from '@datagrok-libraries/bio/src/types/index';
8
8
  import {MolfileHandler} from '@datagrok-libraries/chem-meta/src/parsing-utils/molfile-handler';
9
9
  import {
10
10
  createJsonMonomerLibFromSdf,
11
- getJsonMonomerLibForEnumerator,
12
11
  IMonomerLibHelper,
13
- isValidEnumeratorLib,
14
12
  } from '@datagrok-libraries/bio/src/monomer-works/monomer-utils';
15
13
  import {
16
14
  HELM_REQUIRED_FIELDS as REQ, HELM_OPTIONAL_FIELDS as OPT, HELM_POLYMER_TYPE
@@ -18,12 +16,8 @@ import {
18
16
 
19
17
  import {_package} from '../package';
20
18
 
21
- const _HELM_REQUIRED_FIELDS_ARRAY = [
22
- REQ.SYMBOL, REQ.NAME, REQ.MOLFILE, REQ.AUTHOR, REQ.ID,
23
- REQ.RGROUPS, REQ.SMILES, REQ.POLYMER_TYPE, REQ.MONOMER_TYPE, REQ.CREATE_DATE,
24
- ] as const;
19
+ import {PolyToolMonomerLibHandler} from '@datagrok-libraries/bio/src/utils/poly-tool/monomer-lib-handler';
25
20
 
26
- const _HELM_OPTIONAL_FIELDS_ARRAY = [OPT.NATURAL_ANALOG, OPT.META] as const;
27
21
  // -- Monomer libraries --
28
22
  export const LIB_STORAGE_NAME = 'Libraries';
29
23
  export const LIB_PATH = 'System:AppData/Bio/libraries/';
@@ -291,8 +285,9 @@ export class MonomerLibHelper implements IMonomerLibHelper {
291
285
  }
292
286
  const df = await fileSource.readCsv(fileName);
293
287
  const json = toJson(df);
294
- if (isValidEnumeratorLib(json))
295
- rawLibData = getJsonMonomerLibForEnumerator(json);
288
+ const polyToolMonomerLib = new PolyToolMonomerLibHandler(json);
289
+ if (polyToolMonomerLib.isValid())
290
+ rawLibData = polyToolMonomerLib.getJsonMonomerLib();
296
291
  else
297
292
  throw new Error('Invalid format of CSV monomer lib');
298
293
  } else {