@datagrok/bio 2.0.25 → 2.0.27

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ seq
2
+ A
3
+ G
4
+ C
5
+ U
6
+ AG
7
+ CU
package/package.json CHANGED
@@ -5,7 +5,7 @@
5
5
  "name": "Leonid Stolbov",
6
6
  "email": "lstolbov@datagrok.ai"
7
7
  },
8
- "version": "2.0.25",
8
+ "version": "2.0.27",
9
9
  "description": "Bio is a [package](https://datagrok.ai/help/develop/develop#packages) for the [Datagrok](https://datagrok.ai) platform",
10
10
  "repository": {
11
11
  "type": "git",
@@ -14,15 +14,10 @@
14
14
  },
15
15
  "dependencies": {
16
16
  "@biowasm/aioli": "^3.1.0",
17
- "@datagrok-libraries/bio": "^5.2.0",
17
+ "@datagrok-libraries/bio": "^5.5.0",
18
18
  "@datagrok-libraries/chem-meta": "1.0.1",
19
19
  "@datagrok-libraries/ml": "^6.2.0",
20
20
  "@datagrok-libraries/utils": "^1.11.1",
21
- "@deck.gl/core": "^8.7.5",
22
- "@deck.gl/layers": "^8.7.5",
23
- "@luma.gl/constants": "^8.5.10",
24
- "@luma.gl/core": "^8.5.10",
25
- "@phylocanvas/phylocanvas.gl": "^1.44.0",
26
21
  "cash-dom": "^8.0.0",
27
22
  "datagrok-api": "^1.7.0",
28
23
  "dayjs": "^1.11.4",
@@ -51,6 +46,10 @@
51
46
  "webpack": "^5.64.1",
52
47
  "webpack-cli": "^4.6.0"
53
48
  },
49
+ "grokDependencies": {
50
+ "@datagrok/chem": "1.3.16",
51
+ "@datagrok/helm": "latest"
52
+ },
54
53
  "scripts": {
55
54
  "link-api": "npm link datagrok-api",
56
55
  "link-bio": "npm link @datagrok-libraries/bio",
package/setup.sh CHANGED
@@ -31,7 +31,7 @@ for dir in ${dirs[@]}; do
31
31
  npm run link-all
32
32
  fi
33
33
  echo -e $GREEN npm run build in$(pwd) $NO_COLOR
34
- npm run build
34
+ npm run build || exit
35
35
  done
36
36
 
37
37
  cd $package_dir
@@ -8,17 +8,20 @@ import {getHelmMonomers} from '../package';
8
8
 
9
9
  const V2000_ATOM_NAME_POS = 31;
10
10
 
11
- export async function getMonomericMols(mcol: DG.Column, pattern: boolean = false): Promise<DG.Column> {
11
+ export async function getMonomericMols(mcol: DG.Column,
12
+ pattern: boolean = false, monomersDict?: Map<string, string>): Promise<DG.Column> {
12
13
  const separator: string = mcol.tags[C.TAGS.SEPARATOR];
13
14
  const units: string = mcol.tags[DG.TAGS.UNITS];
14
15
  const splitter = bio.getSplitter(units, separator);
15
16
  let molV3000Array;
16
- const monomersDict = new Map();
17
+ monomersDict ??= new Map();
17
18
  const monomers = units === 'helm' ?
18
19
  getHelmMonomers(mcol) : Object.keys(bio.getStats(mcol, 0, splitter).freq).filter((it) => it !== '');
19
20
 
20
- for (let i = 0; i < monomers.length; i++)
21
- monomersDict.set(monomers[i], `${i + 1}`);
21
+ for (let i = 0; i < monomers.length; i++) {
22
+ if (!monomersDict.has(monomers[i]))
23
+ monomersDict.set(monomers[i], `${monomersDict.size + 1}`);
24
+ }
22
25
 
23
26
  if (units === 'helm') {
24
27
  molV3000Array = await grok.functions.call('HELM:getMolFiles', {col: mcol});
@@ -2,7 +2,6 @@ import * as DG from 'datagrok-api/dg';
2
2
 
3
3
  import {runTests, TestContext, tests} from '@datagrok-libraries/utils/src/test';
4
4
 
5
- import './tests/WebLogo-test';
6
5
  import './tests/Palettes-test';
7
6
  import './tests/detectors-test';
8
7
  import './tests/msa-tests';
@@ -13,9 +12,11 @@ import './tests/renderers-test';
13
12
  import './tests/convert-test';
14
13
  import './tests/fasta-handler-test';
15
14
  import './tests/fasta-export-tests';
15
+ import './tests/bio-tests';
16
16
  import './tests/WebLogo-positions-test';
17
17
  import './tests/checkInputColumn-tests';
18
18
  import './tests/similarity-diversity-tests';
19
+ import './tests/substructure-filter-tests';
19
20
 
20
21
  export const _package = new DG.Package();
21
22
  export {tests};
package/src/package.ts CHANGED
@@ -13,11 +13,16 @@ import {SequenceAlignment, Aligned} from './seq_align';
13
13
  import {getEmbeddingColsNames, sequenceSpace} from './analysis/sequence-space';
14
14
  import {getActivityCliffs} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
15
15
  import {createPropPanelElement, createTooltipElement, getSimilaritiesMarix} from './analysis/sequence-activity-cliffs';
16
- import {createJsonMonomerLibFromSdf, encodeMonomers, getMolfilesFromSeq, HELM_CORE_LIB_FILENAME} from './utils/utils';
16
+ import {createJsonMonomerLibFromSdf, encodeMonomers, getMolfilesFromSeq} from '@datagrok-libraries/bio/src/utils/monomer-utils';
17
+ import {HELM_CORE_LIB_FILENAME} from '@datagrok-libraries/bio/src/utils/const';
17
18
  import {getMacroMol} from './utils/atomic-works';
18
19
  import {MacromoleculeSequenceCellRenderer} from './utils/cell-renderer';
19
20
  import {convert} from './utils/convert';
20
21
  import {getMacroMolColumnPropertyPanel, representationsWidget} from './widgets/representations';
22
+ import {TAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
23
+ import {ALPHABET, NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecule'
24
+ import {_toAtomicLevel} from '@datagrok-libraries/bio/src/utils/to-atomic-level';
25
+ import {FastaFileHandler} from '@datagrok-libraries/bio/src/utils/fasta-handler';
21
26
  import {removeEmptyStringRows} from '@datagrok-libraries/utils/src/dataframe-utils';
22
27
  import {
23
28
  generateManySequences,
@@ -99,7 +104,7 @@ export function checkInputColumn(
99
104
  ) {
100
105
  const notationAdd = allowedNotations.length == 0 ? 'any notation' :
101
106
  (`notation${allowedNotations.length > 1 ? 's' : ''} ${allowedNotations.map((n) => `"${n}"`).join(', ')} `);
102
- msg = `${name} analysis is allowed for Macromolecules with ${notationAdd}.`;
107
+ msg = `${name} + ' analysis is allowed for Macromolecules with notation ${notationAdd}.`;
103
108
  res = false;
104
109
  } else if (!uh.isHelm()) {
105
110
  // alphabet is not specified for 'helm' notation
@@ -169,10 +174,10 @@ export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column,
169
174
  'SPE': {cycles: 2000, lambda: 1.0, dlambda: 0.0005},
170
175
  };
171
176
  const tags = {
172
- 'units': macroMolecule.tags['units'],
173
- 'aligned': macroMolecule.tags['aligned'],
174
- 'separator': macroMolecule.tags['separator'],
175
- 'alphabet': macroMolecule.tags['alphabet'],
177
+ 'units': macroMolecule.getTag(DG.TAGS.UNITS),
178
+ 'aligned': macroMolecule.getTag(TAGS.aligned),
179
+ 'separator': macroMolecule.getTag(TAGS.separator),
180
+ 'alphabet': macroMolecule.getTag(TAGS.alphabet),
176
181
  };
177
182
  const sp = await getActivityCliffs(
178
183
  df,
@@ -247,20 +252,11 @@ export async function toAtomicLevel(df: DG.DataFrame, macroMolecule: DG.Column):
247
252
  }
248
253
  if (!checkInputColumnUi(macroMolecule, 'To Atomic Level'))
249
254
  return;
250
-
251
255
  const monomersLibFile = await _package.files.readAsText(HELM_CORE_LIB_FILENAME);
252
256
  const monomersLibObject: any[] = JSON.parse(monomersLibFile);
253
- const atomicCodes = getMolfilesFromSeq(macroMolecule, monomersLibObject);
254
- const result = await getMacroMol(atomicCodes!);
255
-
256
- const col = DG.Column.fromStrings('regenerated', result);
257
- col.semType = DG.SEMTYPE.MOLECULE;
258
- col.tags[DG.TAGS.UNITS] = 'molblock';
259
- df.columns.add(col, true);
260
- await grok.data.detectSemanticTypes(df);
257
+ _toAtomicLevel(df, macroMolecule, monomersLibObject);
261
258
  }
262
259
 
263
-
264
260
  //top-menu: Bio | MSA...
265
261
  //name: MSA
266
262
  //input: dataframe table
@@ -528,5 +524,3 @@ export function saveAsFasta() {
528
524
  export function bioSubstructureFilter(): BioSubstructureFilter {
529
525
  return new BioSubstructureFilter();
530
526
  }
531
-
532
-
@@ -8,6 +8,14 @@ import {getMonomericMols} from '../calculations/monomerLevelMols';
8
8
  import {BitSet} from 'datagrok-api/dg';
9
9
  import {updateDivInnerHTML} from '../utils/ui-utils';
10
10
 
11
+ export const MONOMER_MOLS_COL = 'monomeric-mols';
12
+
13
+ const enum MONOMERIC_COL_TAGS{
14
+ MONOMERIC_MOLS = 'monomeric-mols',
15
+ LAST_INVALIDATED_VERSION = 'last-invalidated-version',
16
+ MONOMERS_DICT = 'monomers-dict'
17
+ }
18
+
11
19
  /**
12
20
  * Searches substructure in each row of Macromolecule column
13
21
  *
@@ -67,29 +75,48 @@ export function substructureSearchDialog(col: DG.Column): void {
67
75
  .show();
68
76
  }
69
77
 
70
- export function linearSubstructureSearch(substructure: string, col: DG.Column): DG.BitSet {
71
- const lowerCaseSubstr = substructure.toLowerCase();
78
+ export function linearSubstructureSearch(substructure: string, col: DG.Column, separator?: string): DG.BitSet {
79
+ const re = separator ? prepareSubstructureRegex(substructure, separator) : substructure;
72
80
  const resultArray = DG.BitSet.create(col.length);
73
81
  for (let i = 0; i < col.length; i++) {
74
- const macromolecule = col.get(i).toLowerCase();
75
- if (macromolecule.indexOf(lowerCaseSubstr) !== -1)
82
+ const macromolecule = col.get(i);
83
+ if (macromolecule.match(re) || macromolecule === substructure)
76
84
  resultArray.set(i, true, false);
77
85
  }
78
86
  return resultArray;
79
87
  }
80
88
 
89
+ function prepareSubstructureRegex(substructure: string, separator: string) {
90
+ const char = `${separator}`.replace(/[\-\[\]\/\{\}\(\)\*\+\?\.\\\^\$\|]/g, '\\$&');
91
+ const startsWithSep = substructure.charAt(0) === separator;
92
+ const endsWithSep = substructure.charAt(substructure.length - 1) === separator;
93
+ const substrWithoutSep = substructure.replace(new RegExp(`^${char}|${char}$`, 'g'), '');
94
+ const re = startsWithSep ? endsWithSep ? `${char}${substrWithoutSep}${char}` :
95
+ `${char}${substrWithoutSep}${char}|${char}${substrWithoutSep}$` :
96
+ endsWithSep ? `^${substrWithoutSep}${char}|${char}${substrWithoutSep}${char}` :
97
+ `^${substrWithoutSep}${char}|${char}${substrWithoutSep}${char}|${char}${substrWithoutSep}$`;
98
+ return re;
99
+ }
100
+
81
101
  export async function helmSubstructureSearch(substructure: string, col: DG.Column): Promise<BitSet> {
82
- const helmColWithSubstructure = DG.Column.string('helm', col.length + 1)
83
- .init((i) => i === col.length ? substructure : col.get(i));
84
- helmColWithSubstructure.setTag(DG.TAGS.UNITS, bio.NOTATION.HELM);
85
- const monomericMolsCol = await getMonomericMols(helmColWithSubstructure, true);
86
- const molSubstructure = monomericMolsCol.get(col.length);
87
- const monomericMolsDf = DG.DataFrame.fromColumns([monomericMolsCol]);
88
- monomericMolsDf.rows.removeAt(col.length);
102
+ if (col.version !== col.temp[MONOMERIC_COL_TAGS.LAST_INVALIDATED_VERSION])
103
+ await invalidateHelmMols(col);
104
+ const substructureCol = DG.Column.string('helm', 1).init((i) => substructure);
105
+ substructureCol.setTag(DG.TAGS.UNITS, bio.NOTATION.HELM);
106
+ const substructureMolsCol =
107
+ await getMonomericMols(substructureCol, true, col.temp[MONOMERIC_COL_TAGS.MONOMERS_DICT]);
89
108
  const matchesCol = await grok.functions.call('Chem:searchSubstructure', {
90
- molStringsColumn: monomericMolsDf.columns.byIndex(0),
91
- molString: molSubstructure,
109
+ molStringsColumn: col.temp[MONOMERIC_COL_TAGS.MONOMERIC_MOLS],
110
+ molString: substructureMolsCol.get(0),
92
111
  molBlockFailover: '',
93
112
  });
94
113
  return matchesCol.get(0);
95
114
  }
115
+
116
+ export async function invalidateHelmMols(col: DG.Column) {
117
+ const monomersDict = new Map();
118
+ const monomericMolsCol = await getMonomericMols(col, true, monomersDict);
119
+ col.temp[MONOMERIC_COL_TAGS.MONOMERIC_MOLS] = monomericMolsCol;
120
+ col.temp[MONOMERIC_COL_TAGS.MONOMERS_DICT] = monomersDict;
121
+ col.temp[MONOMERIC_COL_TAGS.LAST_INVALIDATED_VERSION] = col.version;
122
+ }
@@ -5,7 +5,7 @@ import * as bio from '@datagrok-libraries/bio';
5
5
 
6
6
  import {after, before, category, test, expect, expectObject} from '@datagrok-libraries/utils/src/test';
7
7
 
8
- category('WebLogo', () => {
8
+ category('bio', () => {
9
9
  const csvDfN1: string = `seq
10
10
  ACGTCT
11
11
  CAGTGT
@@ -47,7 +47,21 @@ XZJ{}2
47
47
  `;
48
48
 
49
49
  // anonymous functions specified in test() registering must return Promise<any>
50
- test('testGetStats', async () => { await _testGetStats(csvDfN1); });
50
+ test('testGetStatsHelm1', async () => {
51
+ const csv = `seq
52
+ PEPTIDE1{meI}$$$$
53
+ `;
54
+ const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
55
+ const seqCol: DG.Column = df.getCol('seq')!;
56
+ const stats = bio.getStats(seqCol, 1, bio.splitterAsHelm);
57
+
58
+ expectObject(stats.freq, {
59
+ 'meI': 1
60
+ });
61
+ expect(stats.sameLength, true);
62
+ });
63
+
64
+ test('testGetStatsN1', async () => { await _testGetStats(csvDfN1); });
51
65
  test('testGetAlphabetSimilarity', async () => { await _testGetAlphabetSimilarity(); });
52
66
 
53
67
  test('testPickupPaletteN1', async () => { await _testPickupPaletteN1(csvDfN1); });
@@ -8,10 +8,9 @@ import {after, before, category, test, expect, expectArray} from '@datagrok-libr
8
8
 
9
9
  import {checkInputColumn, multipleSequenceAlignmentAny} from '../package';
10
10
  import {UNITS} from 'datagrok-api/dg';
11
-
11
+ import {ALPHABET, NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecule';
12
12
 
13
13
  category('checkInputColumn', () => {
14
-
15
14
  const csv = `seq
16
15
  seq1,
17
16
  seq2,
@@ -22,7 +21,7 @@ seq4`;
22
21
  const func: DG.Func = DG.Func.find({package: 'Bio', name: 'multipleSequenceAlignmentAny'})[0];
23
22
  const funcInputColumnProperty: DG.Property = func.inputs.find((i) => i.name == 'sequence')!;
24
23
 
25
- let k = 11;
24
+ const k = 11;
26
25
 
27
26
  const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
28
27
  const col: DG.Column = df.getCol('seq');
@@ -42,7 +41,7 @@ seq4`;
42
41
  const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
43
42
  const col: DG.Column = df.getCol('seq');
44
43
  col.semType = DG.SEMTYPE.MACROMOLECULE;
45
- col.setTag(DG.TAGS.UNITS, 'helm');
44
+ col.setTag(DG.TAGS.UNITS, NOTATION.HELM);
46
45
  col.setTag(bio.TAGS.alphabetSize, '11');
47
46
  col.setTag(bio.TAGS.alphabetIsMultichar, 'true');
48
47
 
@@ -57,7 +56,7 @@ seq4`;
57
56
  const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
58
57
  const col: DG.Column = df.getCol('seq');
59
58
  col.semType = DG.SEMTYPE.MACROMOLECULE;
60
- col.setTag(DG.TAGS.UNITS, 'fasta');
59
+ col.setTag(DG.TAGS.UNITS, NOTATION.FASTA);
61
60
  col.setTag(bio.TAGS.alphabet, 'UN');
62
61
  col.setTag(bio.TAGS.alphabetSize, '11');
63
62
  col.setTag(bio.TAGS.alphabetIsMultichar, 'true');
@@ -73,6 +72,6 @@ seq4`;
73
72
  test('testGetActionFunctionMeta', async () => {
74
73
  const func: DG.Func = DG.Func.find({package: 'Bio', name: 'multipleSequenceAlignmentAny'})[0];
75
74
  const sequenceInput: DG.Property = func.inputs.find((i) => i.name == 'sequence')!;
76
- let k = 11;
75
+ const k = 11;
77
76
  });
78
- });
77
+ });
@@ -6,7 +6,6 @@ import * as bio from '@datagrok-libraries/bio';
6
6
  import {after, before, category, test, expect, expectObject} from '@datagrok-libraries/utils/src/test';
7
7
 
8
8
  import {importFasta} from '../package';
9
- import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
10
9
 
11
10
  type DfReaderFunc = () => Promise<DG.DataFrame>;
12
11
 
@@ -200,44 +199,49 @@ MWRSWY-CKHP
200
199
  test('NegativeSmiles', async () => { await _testNeg(readCsv('csvDfSmiles', csvDfSmiles), 'col1'); });
201
200
 
202
201
  test('Dna1', async () => {
203
- await _testPos(readCsv('csvDfDna1', csvDfDna1), 'seq', 'fasta', 'SEQ', 'DNA', 4, false);
202
+ await _testPos(readCsv('csvDfDna1', csvDfDna1), 'seq', bio.NOTATION.FASTA, bio.ALIGNMENT.SEQ, bio.ALPHABET.DNA, 4, false);
204
203
  });
205
204
  test('Rna1', async () => {
206
- await _testPos(readCsv('csvDfRna1', csvDfRna1), 'seq', 'fasta', 'SEQ', 'RNA', 4, false);
205
+ await _testPos(readCsv('csvDfRna1', csvDfRna1), 'seq', bio.NOTATION.FASTA, bio.ALIGNMENT.SEQ, bio.ALPHABET.RNA, 4, false);
207
206
  });
208
207
  test('AA1', async () => {
209
- await _testPos(readCsv('csvDfPt1', csvDfPt1), 'seq', 'fasta', 'SEQ', 'PT', 20, false);
208
+ await _testPos(readCsv('csvDfPt1', csvDfPt1), 'seq', bio.NOTATION.FASTA, bio.ALIGNMENT.SEQ, bio.ALPHABET.PT, 20, false);
210
209
  });
211
210
  test('MsaDna1', async () => {
212
- await _testPos(readCsv('csvDfMsaDna1', csvDfMsaDna1), 'seq', 'fasta', 'SEQ.MSA', 'DNA', 4, false);
211
+ await _testPos(readCsv('csvDfMsaDna1', csvDfMsaDna1), 'seq', bio.NOTATION.FASTA, bio.ALIGNMENT.SEQ_MSA, bio.ALPHABET.DNA, 4, false);
213
212
  });
214
213
 
215
214
  test('MsaAA1', async () => {
216
- await _testPos(readCsv('csvDfMsaPt1', csvDfMsaPt1), 'seq', 'fasta', 'SEQ.MSA', 'PT', 20, false);
215
+ await _testPos(readCsv('csvDfMsaPt1', csvDfMsaPt1), 'seq', bio.NOTATION.FASTA,
216
+ bio.ALIGNMENT.SEQ_MSA, bio.ALPHABET.PT, 20, false);
217
217
  });
218
218
 
219
219
  test('SepDna', async () => {
220
- await _testPos(readCsv('csvDfSepDna', csvDfSepDna), 'seq', 'separator', 'SEQ', 'DNA', 4, false, '*');
220
+ await _testPos(readCsv('csvDfSepDna', csvDfSepDna), 'seq', bio.NOTATION.SEPARATOR, bio.ALIGNMENT.SEQ, bio.ALPHABET.DNA, 4, false, '*');
221
221
  });
222
222
  test('SepRna', async () => {
223
- await _testPos(readCsv('csvDfSepRna', csvDfSepRna), 'seq', 'separator', 'SEQ', 'RNA', 4, false, '*');
223
+ await _testPos(readCsv('csvDfSepRna', csvDfSepRna), 'seq', bio.NOTATION.SEPARATOR, bio.ALIGNMENT.SEQ, bio.ALPHABET.RNA, 4, false, '*');
224
224
  });
225
225
  test('SepPt', async () => {
226
- await _testPos(readCsv('csvDfSepPt', csvDfSepPt), 'seq', 'separator', 'SEQ', 'PT', 20, false, '-');
226
+ await _testPos(readCsv('csvDfSepPt', csvDfSepPt), 'seq',
227
+ bio.NOTATION.SEPARATOR, bio.ALIGNMENT.SEQ, bio.ALPHABET.PT, 20, false, '-');
227
228
  });
228
229
  test('SepUn1', async () => {
229
- await _testPos(readCsv('csvDfSepUn1', csvDfSepUn1), 'seq', 'separator', 'SEQ', 'UN', 8, true, '-');
230
+ await _testPos(readCsv('csvDfSepUn1', csvDfSepUn1), 'seq',
231
+ bio.NOTATION.SEPARATOR, bio.ALIGNMENT.SEQ, bio.ALPHABET.UN, 8, true, '-');
230
232
  });
231
233
  test('SepUn2', async () => {
232
- await _testPos(readCsv('csvDfSepUn2', csvDfSepUn2), 'seq', 'separator', 'SEQ', 'UN', 9, true, '/');
234
+ await _testPos(readCsv('csvDfSepUn2', csvDfSepUn2), 'seq',
235
+ bio.NOTATION.SEPARATOR, bio.ALIGNMENT.SEQ, bio.ALPHABET.UN, 9, true, '/');
233
236
  });
234
237
 
235
238
  test('SepMsaN1', async () => {
236
- await _testPos(readCsv('csvDfSepMsaDna1', csvDfSepMsaDna1), 'seq', 'separator', 'SEQ.MSA', 'DNA', 4, false, '-');
239
+ await _testPos(readCsv('csvDfSepMsaDna1', csvDfSepMsaDna1), 'seq',
240
+ bio.NOTATION.SEPARATOR, bio.ALIGNMENT.SEQ_MSA, bio.ALPHABET.DNA, 4, false, '-');
237
241
  });
238
242
 
239
243
  test('SamplesFastaCsvPt', async () => {
240
- await _testPos(readSamples(Samples.fastaCsv), 'sequence', 'fasta', 'SEQ', 'PT', 20, false);
244
+ await _testPos(readSamples(Samples.fastaCsv), 'sequence', bio.NOTATION.FASTA, bio.ALIGNMENT.SEQ, bio.ALPHABET.PT, 20, false);
241
245
  });
242
246
  test('SamplesFastaCsvNegativeEntry', async () => {
243
247
  await _testNeg(readSamples(Samples.fastaCsv), 'Entry');
@@ -250,7 +254,8 @@ MWRSWY-CKHP
250
254
  });
251
255
 
252
256
  test('SamplesFastaFastaPt', async () => {
253
- await _testPos(readSamples(Samples.fastaFasta, readFileFasta), 'sequence', 'fasta', 'SEQ', 'PT', 20, false);
257
+ await _testPos(readSamples(Samples.fastaFasta, readFileFasta),
258
+ 'sequence', bio.NOTATION.FASTA, bio.ALIGNMENT.SEQ, bio.ALPHABET.PT, 20, false);
254
259
  });
255
260
 
256
261
  // peptidesComplex contains monomers with spaces in AlignedSequence columns, which are forbidden
@@ -268,7 +273,8 @@ MWRSWY-CKHP
268
273
  });
269
274
 
270
275
  test('samplesMsaComplexUn', async () => {
271
- await _testPos(readSamples(Samples.msaComplex), 'MSA', 'separator', 'SEQ.MSA', 'UN', 161, true, '/');
276
+ await _testPos(readSamples(Samples.msaComplex), 'MSA',
277
+ bio.NOTATION.SEPARATOR, bio.ALIGNMENT.SEQ_MSA, bio.ALPHABET.UN, 161, true, '/');
272
278
  });
273
279
  test('samplesMsaComplexNegativeActivity', async () => {
274
280
  await _testNeg(readSamples(Samples.msaComplex), 'Activity');
@@ -283,7 +289,7 @@ MWRSWY-CKHP
283
289
  });
284
290
 
285
291
  test('samplesHelmCsvHELM', async () => {
286
- await _testPos(readSamples(Samples.helmCsv), 'HELM', 'helm', null, null, 160, true, null);
292
+ await _testPos(readSamples(Samples.helmCsv), 'HELM', bio.NOTATION.HELM, null, null, 160, true, null);
287
293
  });
288
294
 
289
295
  test('samplesHelmCsvNegativeActivity', async () => {
@@ -299,7 +305,7 @@ MWRSWY-CKHP
299
305
  await _testNeg(readSamples(Samples.testHelmCsv), 'Test type');
300
306
  });
301
307
  test('samplesTestHelmPositiveHelmString', async () => {
302
- await _testPos(readSamples(Samples.testHelmCsv), 'HELM string', 'helm', null, null, 9, true, null);
308
+ await _testPos(readSamples(Samples.testHelmCsv), 'HELM string', bio.NOTATION.HELM, null, null, 9, true, null);
303
309
  });
304
310
  test('samplesTestHelmNegativeValid', async () => {
305
311
  await _testNeg(readSamples(Samples.testHelmCsv), 'Valid?');
@@ -331,7 +337,7 @@ MWRSWY-CKHP
331
337
  });
332
338
 
333
339
  test('samplesFastaPtPosSequence', async () => {
334
- await _testPos(readSamples(Samples.fastaPtCsv), 'sequence', 'fasta', 'SEQ', 'PT', 20, false);
340
+ await _testPos(readSamples(Samples.fastaPtCsv), 'sequence', bio.NOTATION.FASTA, bio.ALIGNMENT.SEQ, bio.ALPHABET.PT, 20, false);
335
341
  });
336
342
 
337
343
  test('samplesTestCerealNegativeCerealName', async () => {
@@ -383,7 +389,7 @@ export async function _testNeg(readDf: DfReaderFunc, colName: string) {
383
389
  throw new Error(msg);
384
390
  // col.semType = '';
385
391
  // col.setTag(DG.TAGS.UNITS, '');
386
- // col.setTag('separator', '');
392
+ // col.setTag(NOTATION.SEPARATOR, '');
387
393
  }
388
394
  }
389
395
 
@@ -406,7 +412,7 @@ export async function _testPos(
406
412
  if (separator)
407
413
  expect(col.getTag(bio.TAGS.separator), separator);
408
414
 
409
- const uh = new UnitsHandler(col);
415
+ const uh = new bio.UnitsHandler(col);
410
416
  expect(uh.getAlphabetSize(), alphabetSize);
411
417
  expect(uh.getAlphabetIsMultichar(), alphabetIsMultichar);
412
418
  if (!uh.isHelm()) {
@@ -103,7 +103,7 @@ category('renderers', () => {
103
103
  async function _rendererMacromoleculeDifference() {
104
104
  const seqDiffCol: DG.Column = DG.Column.fromStrings('SequencesDiff',
105
105
  ['meI/hHis/Aca/N/T/dK/Thr_PO3H2/Aca#D-Tyr_Et/Tyr_ab-dehydroMe/meN/E/N/dV']);
106
- seqDiffCol.tags[DG.TAGS.UNITS] = 'separator';
106
+ seqDiffCol.tags[DG.TAGS.UNITS] = bio.NOTATION.SEPARATOR;
107
107
  seqDiffCol.tags[TAGS.SEPARATOR] = '/';
108
108
  seqDiffCol.semType = SEM_TYPES.MACROMOLECULE_DIFFERENCE;
109
109
  const df = DG.DataFrame.fromColumns([seqDiffCol]);
@@ -139,7 +139,7 @@ category('renderers', () => {
139
139
  `cell.renderer="${srcSeqCol!.getTag(DG.TAGS.CELL_RENDERER)}"`);
140
140
  expect(srcSeqCol.semType, DG.SEMTYPE.MACROMOLECULE);
141
141
  expect(srcSeqCol.getTag(DG.TAGS.UNITS), bio.NOTATION.FASTA);
142
- expect(srcSeqCol.getTag(bio.TAGS.aligned), 'SEQ');
142
+ expect(srcSeqCol.getTag(bio.TAGS.aligned), bio.ALIGNMENT.SEQ);
143
143
  expect(srcSeqCol.getTag(bio.TAGS.alphabet), bio.ALPHABET.PT);
144
144
  expect(srcSeqCol.getTag(DG.TAGS.CELL_RENDERER), 'sequence');
145
145
 
@@ -148,7 +148,7 @@ category('renderers', () => {
148
148
 
149
149
  expect(msaSeqCol.semType, DG.SEMTYPE.MACROMOLECULE);
150
150
  expect(msaSeqCol.getTag(DG.TAGS.UNITS), bio.NOTATION.FASTA);
151
- expect(msaSeqCol.getTag(bio.TAGS.aligned), 'SEQ.MSA');
151
+ expect(msaSeqCol.getTag(bio.TAGS.aligned), bio.ALIGNMENT.SEQ_MSA);
152
152
  expect(msaSeqCol.getTag(bio.TAGS.alphabet), bio.ALPHABET.PT);
153
153
  expect(msaSeqCol.getTag(DG.TAGS.CELL_RENDERER), 'sequence');
154
154
 
@@ -190,7 +190,7 @@ category('renderers', () => {
190
190
  /**/
191
191
  const seqDiffCol: DG.Column = DG.Column.fromStrings('SequencesDiff',
192
192
  ['meI/hHis/Aca/N/T/dK/Thr_PO3H2/Aca#D-Tyr_Et/Tyr_ab-dehydroMe/meN/E/N/dV']);
193
- seqDiffCol.tags[DG.TAGS.UNITS] = 'separator';
193
+ seqDiffCol.tags[DG.TAGS.UNITS] = bio.NOTATION.SEPARATOR;
194
194
  seqDiffCol.tags[TAGS.SEPARATOR] = '/';
195
195
  seqDiffCol.semType = SEM_TYPES.MACROMOLECULE_DIFFERENCE;
196
196
  const df = DG.DataFrame.fromColumns([seqDiffCol]);
@@ -206,10 +206,12 @@ category('renderers', () => {
206
206
  `view renderer has set to '${renderer}' instead of correct 'MacromoleculeDifference'.`);
207
207
  }
208
208
 
209
+ /** GROK-11212 Do not overwrite / recalculate 'cell.renderer' tag that has been set programmatically
210
+ * https://reddata.atlassian.net/browse/GROK-11212 */
209
211
  async function _setRendererManually() {
210
212
  const seqDiffCol: DG.Column = DG.Column.fromStrings('SequencesDiff',
211
213
  ['meI/hHis/Aca/N/T/dK/Thr_PO3H2/Aca#D-Tyr_Et/Tyr_ab-dehydroMe/meN/E/N/dV']);
212
- seqDiffCol.tags[DG.TAGS.UNITS] = 'separator';
214
+ seqDiffCol.tags[DG.TAGS.UNITS] = bio.NOTATION.SEPARATOR;
213
215
  seqDiffCol.tags[TAGS.SEPARATOR] = '/';
214
216
  seqDiffCol.semType = SEM_TYPES.MACROMOLECULE;
215
217
  const tgtCellRenderer = 'MacromoleculeDifference';
@@ -0,0 +1,61 @@
1
+ import {after, before, category, test, expect, delay} from '@datagrok-libraries/utils/src/test';
2
+ import * as DG from 'datagrok-api/dg';
3
+ import * as grok from 'datagrok-api/grok';
4
+ import {readDataframe} from './utils';
5
+ import {BioSubstructureFilter, HelmFilter, SeparatorFilter} from '../widgets/bio-substructure-filter';
6
+
7
+
8
+ category('substructureFilters', async () => {
9
+ test('fasta', async () => {
10
+ const fasta = await readDataframe('tests/filter_FASTA.csv');
11
+ const filter = new BioSubstructureFilter();
12
+ await grok.data.detectSemanticTypes(fasta);
13
+ filter.attach(fasta);
14
+ filter.bioFilter!.substructure = 'MD';
15
+ await delay(100);
16
+ expect(filter.dataFrame!.filter.trueCount, 3);
17
+ expect(filter.dataFrame!.filter.get(0), true);
18
+ expect(filter.dataFrame!.filter.get(3), true);
19
+ expect(filter.dataFrame!.filter.get(8), true);
20
+ expect(filter.dataFrame!.filter.get(1), false);
21
+ });
22
+
23
+ test('separator', async () => {
24
+ const msa = await readDataframe('tests/filter_MSA.csv');
25
+ const filter = new BioSubstructureFilter();
26
+ await grok.data.detectSemanticTypes(msa);
27
+ filter.attach(msa);
28
+ filter.bioFilter!.substructure = 'meI';
29
+ await delay(100);
30
+ expect(filter.dataFrame!.filter.trueCount, 7);
31
+ expect(filter.dataFrame!.filter.get(2), false);
32
+ filter.bioFilter!.substructure = '/meI';
33
+ await delay(100);
34
+ expect(filter.dataFrame!.filter.trueCount, 0);
35
+ filter.bioFilter!.substructure = 'meI-hHis';
36
+ (filter.bioFilter! as SeparatorFilter).separatorInput.value = '-';
37
+ await delay(100);
38
+ expect(filter.dataFrame!.filter.trueCount, 7);
39
+ expect(filter.dataFrame!.filter.get(2), false);
40
+ });
41
+
42
+ test('helm', async () => {
43
+ const helm = await readDataframe('tests/filter_HELM.csv');
44
+ const helmTableView = grok.shell.addTableView(helm);
45
+ const filter = new BioSubstructureFilter();
46
+ await grok.data.detectSemanticTypes(helm);
47
+ filter.attach(helm);
48
+ (filter.bioFilter! as HelmFilter).helmSubstructure = 'PEPTIDE1{C}$$$$V2.0';
49
+ filter.bioFilter!.onChanged.next();
50
+ await delay(1000);
51
+ expect(filter.dataFrame!.filter.trueCount, 2);
52
+ expect(filter.dataFrame!.filter.get(0), true);
53
+ expect(filter.dataFrame!.filter.get(3), true);
54
+ (filter.bioFilter! as HelmFilter).helmSubstructure = 'PEPTIDE1{A.C}$$$$V2.0';
55
+ filter.bioFilter!.onChanged.next();
56
+ await delay(100);
57
+ expect(filter.dataFrame!.filter.trueCount, 1);
58
+ expect(filter.dataFrame!.filter.get(3), true);
59
+ helmTableView.close();
60
+ });
61
+ });
@@ -1,6 +1,8 @@
1
- import * as DG from 'datagrok-api/dg';
2
1
  import * as grok from 'datagrok-api/grok';
3
- import {DataFrame} from 'datagrok-api/dg';
2
+ import * as ui from 'datagrok-api/ui';
3
+ import * as DG from 'datagrok-api/dg';
4
+ import * as bio from '@datagrok-libraries/bio';
5
+
4
6
 
5
7
  export function generateManySequences(): DG.Column[] {
6
8
  let columns: DG.Column[] = [];
@@ -19,10 +21,10 @@ export function generateLongSequence(): DG.Column[] {
19
21
 
20
22
  export function setTagsMacromolecule(col: DG.Column) {
21
23
  col.semType = DG.SEMTYPE.MACROMOLECULE;
22
- col.setTag('units', 'separator');
23
- col.setTag('aligned', 'SEQ.MSA');
24
- col.setTag('alphabet', 'UN');
25
- col.setTag('separator', '/');
24
+ col.setTag(DG.TAGS.UNITS, bio.NOTATION.SEPARATOR);
25
+ col.setTag(bio.TAGS.aligned, bio.ALIGNMENT.SEQ_MSA);
26
+ col.setTag(bio.TAGS.alphabet, bio.ALPHABET.UN);
27
+ col.setTag(bio.TAGS.separator, '/');
26
28
  return col;
27
29
  }
28
30
 
@@ -1,6 +1,8 @@
1
1
  import * as grok from 'datagrok-api/grok';
2
- import * as ui from 'datagrok-api/ui';
3
2
  import * as DG from 'datagrok-api/dg';
3
+ import * as ui from 'datagrok-api/ui';
4
+
5
+ import {printLeftOrCentered, DrawStyle} from '@datagrok-libraries/bio/src/utils/cell-renderer';
4
6
  import * as bio from '@datagrok-libraries/bio';
5
7
  import * as C from './constants';
6
8
 
@@ -44,7 +46,7 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
44
46
  }
45
47
 
46
48
  onMouseMove(gridCell: DG.GridCell, e: MouseEvent): void {
47
- if (gridCell.cell.column.getTag(bio.TAGS.aligned) !== 'SEQ.MSA')
49
+ if (gridCell.cell.column.getTag(bio.TAGS.aligned) !== bio.ALIGNMENT.SEQ_MSA)
48
50
  return;
49
51
 
50
52
  const maxLengthWordsSum = gridCell.cell.column.temp['bio-sum-maxLengthWords'];