@datagrok/bio 2.11.42 → 2.12.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. package/CHANGELOG.md +14 -0
  2. package/README.md +1 -1
  3. package/detectors.js +11 -11
  4. package/dist/36.js +1 -1
  5. package/dist/36.js.map +1 -1
  6. package/dist/413.js +1 -1
  7. package/dist/413.js.map +1 -1
  8. package/dist/590.js +1 -1
  9. package/dist/590.js.map +1 -1
  10. package/dist/709.js +1 -1
  11. package/dist/709.js.map +1 -1
  12. package/dist/895.js +1 -1
  13. package/dist/895.js.map +1 -1
  14. package/dist/package-test.js +3 -3
  15. package/dist/package-test.js.map +1 -1
  16. package/dist/package.js +2 -2
  17. package/dist/package.js.map +1 -1
  18. package/files/tests/libraries/HELMmonomerSchema.json +1 -1
  19. package/package.json +11 -11
  20. package/src/analysis/sequence-activity-cliffs.ts +9 -9
  21. package/src/analysis/sequence-diversity-viewer.ts +3 -3
  22. package/src/analysis/sequence-search-base-viewer.ts +2 -2
  23. package/src/analysis/sequence-similarity-viewer.ts +10 -10
  24. package/src/analysis/sequence-space.ts +26 -23
  25. package/src/calculations/monomerLevelMols.ts +13 -11
  26. package/src/package.ts +12 -15
  27. package/src/tests/WebLogo-layout-tests.ts +5 -2
  28. package/src/tests/WebLogo-positions-test.ts +5 -5
  29. package/src/tests/bio-tests.ts +13 -6
  30. package/src/tests/converters-test.ts +4 -4
  31. package/src/tests/detectors-benchmark-tests.ts +5 -5
  32. package/src/tests/detectors-tests.ts +13 -13
  33. package/src/tests/fasta-export-tests.ts +10 -4
  34. package/src/tests/mm-distance-tests.ts +10 -10
  35. package/src/tests/msa-tests.ts +8 -15
  36. package/src/tests/renderers-monomer-placer.ts +3 -3
  37. package/src/tests/renderers-test.ts +6 -8
  38. package/src/tests/splitters-test.ts +14 -13
  39. package/src/tests/substructure-filters-tests.ts +143 -1
  40. package/src/tests/to-atomic-level-tests.ts +2 -2
  41. package/src/tests/units-handler-get-region.ts +4 -4
  42. package/src/tests/units-handler-splitted-tests.ts +19 -17
  43. package/src/tests/units-handler-tests.ts +32 -32
  44. package/src/utils/cell-renderer.ts +40 -34
  45. package/src/utils/check-input-column.ts +5 -5
  46. package/src/utils/context-menu.ts +9 -6
  47. package/src/utils/convert.ts +9 -9
  48. package/src/utils/get-region-func-editor.ts +11 -11
  49. package/src/utils/get-region.ts +10 -12
  50. package/src/utils/macromolecule-column-widget.ts +4 -3
  51. package/src/utils/monomer-lib/library-file-manager/event-manager.ts +1 -1
  52. package/src/utils/multiple-sequence-alignment-ui.ts +6 -6
  53. package/src/utils/pepsea.ts +1 -0
  54. package/src/utils/poly-tool/transformation.ts +3 -3
  55. package/src/utils/poly-tool/ui.ts +46 -135
  56. package/src/utils/save-as-fasta.ts +14 -15
  57. package/src/utils/sequence-to-mol.ts +4 -4
  58. package/src/viewers/web-logo-viewer.ts +46 -54
  59. package/src/widgets/bio-substructure-filter-types.ts +19 -45
  60. package/src/widgets/bio-substructure-filter.ts +45 -23
  61. package/src/widgets/composition-analysis-widget.ts +8 -8
@@ -21,7 +21,7 @@
21
21
  },
22
22
  "id": {
23
23
  "description": "Unique ID for the monomer. There is no meaning associated with this ID value.",
24
- "type": "integer"
24
+ "type": ["string", "integer"]
25
25
  },
26
26
  "rgroups": {
27
27
  "description": "An array of the monomer R groups and required information.",
package/package.json CHANGED
@@ -2,10 +2,10 @@
2
2
  "name": "@datagrok/bio",
3
3
  "friendlyName": "Bio",
4
4
  "author": {
5
- "name": "Leonid Stolbov",
6
- "email": "lstolbov@datagrok.ai"
5
+ "name": "Aleksandr Tanas",
6
+ "email": "atanas@datagrok.ai"
7
7
  },
8
- "version": "2.11.42",
8
+ "version": "2.12.1",
9
9
  "description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
10
10
  "repository": {
11
11
  "type": "git",
@@ -34,13 +34,13 @@
34
34
  ],
35
35
  "dependencies": {
36
36
  "@biowasm/aioli": "^3.1.0",
37
- "@datagrok-libraries/bio": "5.39.29",
38
- "@datagrok-libraries/chem-meta": "^1.2.1",
39
- "@datagrok-libraries/ml": "^6.4.10",
40
- "@datagrok-libraries/tutorials": "^1.3.11",
37
+ "@datagrok-libraries/bio": "5.40.0",
38
+ "@datagrok-libraries/chem-meta": "^1.2.3",
39
+ "@datagrok-libraries/ml": "^6.4.12",
40
+ "@datagrok-libraries/tutorials": "^1.3.12",
41
41
  "ajv": "^8.12.0",
42
42
  "ajv-errors": "^3.0.0",
43
- "@datagrok-libraries/utils": "^4.1.44",
43
+ "@datagrok-libraries/utils": "^4.2.0",
44
44
  "@datagrok-libraries/math": "^1.0.7",
45
45
  "cash-dom": "^8.0.0",
46
46
  "css-loader": "^6.7.3",
@@ -54,9 +54,9 @@
54
54
  "wu": "latest"
55
55
  },
56
56
  "devDependencies": {
57
- "@datagrok/chem": "^1.8.11",
58
- "@datagrok/dendrogram": "^1.2.22",
59
- "@datagrok/helm": "^2.1.27",
57
+ "@datagrok/chem": "^1.9.0",
58
+ "@datagrok/dendrogram": "^1.2.27",
59
+ "@datagrok/helm": "^2.1.30",
60
60
  "@types/node": "^17.0.24",
61
61
  "@types/wu": "latest",
62
62
  "@typescript-eslint/eslint-plugin": "latest",
@@ -2,13 +2,15 @@ import * as grok from 'datagrok-api/grok';
2
2
  import * as ui from 'datagrok-api/ui';
3
3
  import * as DG from 'datagrok-api/dg';
4
4
 
5
+ import wu from 'wu';
6
+
5
7
  import {ITooltipAndPanelParams} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
6
8
  import {getSimilarityFromDistance} from '@datagrok-libraries/ml/src/distance-metrics-methods';
7
9
  import {AvailableMetrics, DistanceMetricsSubjects, StringMetricsNames} from '@datagrok-libraries/ml/src/typed-metrics';
8
10
  import {drawMoleculeDifferenceOnCanvas} from '../utils/cell-renderer';
9
11
  import {invalidateMols, MONOMERIC_COL_TAGS} from '../substructure-search/substructure-search';
10
12
  import {TAGS as bioTAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
11
- import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
13
+ import {SeqHandler} from '@datagrok-libraries/bio/src/utils/seq-handler';
12
14
  import {ISeqSplitted} from '@datagrok-libraries/bio/src/utils/macromolecule/types';
13
15
 
14
16
  export async function getDistances(col: DG.Column, seq: string): Promise<Array<number>> {
@@ -97,19 +99,16 @@ export function createPropPanelElement(params: ITooltipAndPanelParams): HTMLDivE
97
99
 
98
100
  propPanel.append(ui.divText(params.seqCol.name, {style: {fontWeight: 'bold'}}));
99
101
 
100
- const sequencesArray = new Array<string>(2);
101
102
  const activitiesArray = new Array<number>(2);
102
103
  params.points.forEach((molIdx, idx) => {
103
- sequencesArray[idx] = params.seqCol.get(molIdx);
104
104
  activitiesArray[idx] = params.activityCol.get(molIdx);
105
105
  });
106
106
 
107
107
  const molDifferences: { [key: number]: HTMLCanvasElement } = {};
108
- const uh = UnitsHandler.getOrCreate(params.seqCol);
109
- const splitter = uh.getSplitter();
110
- const subParts1 = splitter(sequencesArray[0]);
111
- const subParts2 = splitter(sequencesArray[1]);
112
- const canvas = createDifferenceCanvas(subParts1, subParts2, uh.units, molDifferences);
108
+ const sh = SeqHandler.forColumn(params.seqCol);
109
+ const subParts1 = sh.getSplitted(params.points[0]); // splitter(sequencesArray[0], {uh, rowIdx: -1});
110
+ const subParts2 = sh.getSplitted(params.points[1]); // splitter(sequencesArray[1], {uh, rowIdx: -1});
111
+ const canvas = createDifferenceCanvas(subParts1, subParts2, sh.units, molDifferences);
113
112
  propPanel.append(ui.div(canvas, {style: {width: '300px', overflow: 'scroll'}}));
114
113
 
115
114
  propPanel.append(createDifferencesWithPositions(molDifferences));
@@ -135,7 +134,8 @@ export function createDifferenceCanvas(
135
134
  const canvas = document.createElement('canvas');
136
135
  const context = canvas.getContext('2d');
137
136
  canvas.height = 30;
138
- drawMoleculeDifferenceOnCanvas(context!, 0, 0, 0, 30, subParts1, subParts2, units, true, molDifferences);
137
+ drawMoleculeDifferenceOnCanvas(context!, 0, 0, 0, 30,
138
+ wu(subParts1.canonicals).toArray(), wu(subParts2.canonicals).toArray(), units, true, molDifferences);
139
139
  return canvas;
140
140
  }
141
141
 
@@ -7,7 +7,7 @@ import {SequenceSearchBaseViewer} from './sequence-search-base-viewer';
7
7
  import {getMonomericMols} from '../calculations/monomerLevelMols';
8
8
  import {updateDivInnerHTML} from '../utils/ui-utils';
9
9
  import {Subject} from 'rxjs';
10
- import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
10
+ import {SeqHandler} from '@datagrok-libraries/bio/src/utils/seq-handler';
11
11
  import {getEncodedSeqSpaceCol} from './sequence-space';
12
12
  import {MmDistanceFunctionsNames} from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
13
13
  import {DistanceMatrixService, dmLinearIndex} from '@datagrok-libraries/ml/src/distance-matrix';
@@ -29,8 +29,8 @@ export class SequenceDiversityViewer extends SequenceSearchBaseViewer {
29
29
  return;
30
30
  if (this.dataFrame) {
31
31
  if (computeData && this.moleculeColumn) {
32
- const uh = UnitsHandler.getOrCreate(this.moleculeColumn);
33
- await (uh.isFasta() ? this.computeByMM() : this.computeByChem());
32
+ const sh = SeqHandler.forColumn(this.moleculeColumn);
33
+ await (sh.isFasta() ? this.computeByMM() : this.computeByChem());
34
34
 
35
35
  const diverseColumnName: string = this.diverseColumnLabel != null ? this.diverseColumnLabel :
36
36
  `diverse (${this.moleculeColumnName})`;
@@ -14,7 +14,7 @@ export class SequenceSearchBaseViewer extends DG.JsViewer {
14
14
  fingerprint: string;
15
15
  metricsProperties = ['distanceMetric', 'fingerprint'];
16
16
  fingerprintChoices = ['Morgan', 'Pattern'];
17
- moleculeColumn?: DG.Column | null;
17
+ moleculeColumn?: DG.Column<string>;
18
18
  moleculeColumnName: string;
19
19
  initialized: boolean = false;
20
20
  tags = [DG.TAGS.UNITS, bioTAGS.aligned, bioTAGS.separator, bioTAGS.alphabet];
@@ -51,7 +51,7 @@ export class SequenceSearchBaseViewer extends DG.JsViewer {
51
51
  .subscribe((_: any) => this.render(false)));
52
52
  this.subs.push(DG.debounce(ui.onSizeChanged(this.root), 50)
53
53
  .subscribe((_: any) => this.render(false)));
54
- this.moleculeColumn = this.dataFrame.columns.bySemType(DG.SEMTYPE.MACROMOLECULE);
54
+ this.moleculeColumn = this.dataFrame.columns.bySemType(DG.SEMTYPE.MACROMOLECULE) as DG.Column<string>;
55
55
  this.moleculeColumnName = this.moleculeColumn?.name!;
56
56
  this.getProperty('limit')!.fromOptions({min: 1, max: this.dataFrame.rowCount});
57
57
  }
@@ -7,7 +7,7 @@ import {getMonomericMols} from '../calculations/monomerLevelMols';
7
7
  import {createDifferenceCanvas, createDifferencesWithPositions} from './sequence-activity-cliffs';
8
8
  import {updateDivInnerHTML} from '../utils/ui-utils';
9
9
  import {Subject} from 'rxjs';
10
- import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
10
+ import {SeqHandler} from '@datagrok-libraries/bio/src/utils/seq-handler';
11
11
  import {alignSequencePair} from '@datagrok-libraries/bio/src/utils/macromolecule/alignment';
12
12
  import {KnnResult, SparseMatrixService} from '@datagrok-libraries/ml/src/distance-matrix/sparse-matrix-service';
13
13
  import {getEncodedSeqSpaceCol} from './sequence-space';
@@ -50,9 +50,9 @@ export class SequenceSimilarityViewer extends SequenceSearchBaseViewer {
50
50
  this.curIdx = this.dataFrame!.currentRowIdx == -1 ? 0 : this.dataFrame!.currentRowIdx;
51
51
  if (computeData && !this.gridSelect) {
52
52
  this.targetMoleculeIdx = this.dataFrame!.currentRowIdx == -1 ? 0 : this.dataFrame!.currentRowIdx;
53
- const uh = UnitsHandler.getOrCreate(this.moleculeColumn!);
53
+ const sh = SeqHandler.forColumn(this.moleculeColumn!);
54
54
 
55
- await (!uh.isHelm() ? this.computeByMM() : this.computeByChem());
55
+ await (!sh.isHelm() ? this.computeByMM() : this.computeByChem());
56
56
  const similarColumnName: string = this.similarColumnLabel != null ? this.similarColumnLabel :
57
57
  `similar (${this.moleculeColumnName})`;
58
58
  this.molCol = DG.Column.string(similarColumnName,
@@ -121,13 +121,13 @@ export class SequenceSimilarityViewer extends SequenceSearchBaseViewer {
121
121
  const propPanel = ui.div();
122
122
  const molDifferences: { [key: number]: HTMLCanvasElement } = {};
123
123
  const molColName = this.molCol?.name!;
124
- const col = resDf.col(molColName)!;
125
- const uh = UnitsHandler.getOrCreate(col);
126
- const splitter = uh.getSplitter();
127
- const subParts1 = splitter(this.moleculeColumn!.get(this.targetMoleculeIdx));
128
- const subParts2 = splitter(resDf.get(molColName, resDf.currentRowIdx));
129
- const alignment = alignSequencePair(Array.from(subParts1), Array.from(subParts2));
130
- const canvas = createDifferenceCanvas(alignment.seq1Splitted, alignment.seq2Splitted, uh.units, molDifferences);
124
+ const resCol: DG.Column<string> = resDf.col(molColName)!;
125
+ const molColSh = SeqHandler.forColumn(this.moleculeColumn!);
126
+ const resSh = SeqHandler.forColumn(resCol);
127
+ const subParts1 = molColSh.getSplitted(this.targetMoleculeIdx);
128
+ const subParts2 = resSh.getSplitted(resDf.currentRowIdx);
129
+ const alignment = alignSequencePair(subParts1, subParts2);
130
+ const canvas = createDifferenceCanvas(alignment.seq1Splitted, alignment.seq2Splitted, resSh.units, molDifferences);
131
131
  propPanel.append(ui.div(canvas, {style: {width: '300px', overflow: 'scroll'}}));
132
132
  if (subParts1.length !== subParts2.length) {
133
133
  propPanel.append(ui.divV([
@@ -1,8 +1,10 @@
1
+ import * as ui from 'datagrok-api/ui';
1
2
  import * as DG from 'datagrok-api/dg';
2
3
  import * as grok from 'datagrok-api/grok';
4
+
3
5
  import {BitArrayMetrics} from '@datagrok-libraries/ml/src/typed-metrics';
4
6
  import {mmDistanceFunctionArgs} from '@datagrok-libraries/ml/src/macromolecule-distance-functions/types';
5
- import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
7
+ import {SeqHandler} from '@datagrok-libraries/bio/src/utils/seq-handler';
6
8
  import {getMonomerSubstitutionMatrix} from '@datagrok-libraries/bio/src/monomer-works/monomer-utils';
7
9
  import {MmDistanceFunctionsNames} from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
8
10
 
@@ -13,32 +15,35 @@ export interface ISequenceSpaceResult {
13
15
 
14
16
  export async function getEncodedSeqSpaceCol(
15
17
  seqCol: DG.Column, similarityMetric: BitArrayMetrics | MmDistanceFunctionsNames, fingerprintType: string = 'Morgan'
16
- ): Promise<{seqList:string[], options: {[_:string]: any}}> {
17
- // encodes sequences using utf charachters to also support multichar and non fasta sequences
18
- const ncUH = UnitsHandler.getOrCreate(seqCol);
19
- const seqList = seqCol.toList();
20
- const splitter = ncUH.getSplitter();
21
- const seqColLength = seqList.length;
18
+ ): Promise<{ seqList: string[], options: { [_: string]: any } }> {
19
+ // encodes sequences using utf characters to also support multichar and non fasta sequences
20
+ const rowCount = seqCol.length;
21
+ const sh = SeqHandler.forColumn(seqCol);
22
+ const encList = Array<string>(rowCount);
22
23
  let charCodeCounter = 36;
23
24
  const charCodeMap = new Map<string, string>();
24
- for (let i = 0; i < seqColLength; i++) {
25
- const seq = seqList[i];
26
- if (seqList[i] === null || seqCol.isNone(i)) {
27
- seqList[i] = null;
25
+ const seqColCats = seqCol.categories;
26
+ const seqColRawData = seqCol.getRawData();
27
+ for (let rowIdx = 0; rowIdx < rowCount; rowIdx++) {
28
+ const catI = seqColRawData[rowIdx];
29
+ const seq = seqColCats[catI];
30
+ if (seq === null || seqCol.isNone(rowIdx)) {
31
+ // @ts-ignore
32
+ encList[rowIdx] = null;
28
33
  continue;
29
34
  }
30
- seqList[i] = '';
31
- const splittedSeq = splitter(seq);
35
+ encList[rowIdx] = '';
36
+ const splittedSeq = sh.getSplitted(rowIdx);
32
37
  for (let j = 0; j < splittedSeq.length; j++) {
33
- const char = splittedSeq[j];
38
+ const char = splittedSeq.getCanonical(j);
34
39
  if (!charCodeMap.has(char)) {
35
40
  charCodeMap.set(char, String.fromCharCode(charCodeCounter));
36
41
  charCodeCounter++;
37
42
  }
38
- seqList[i] += charCodeMap.get(char)!;
43
+ encList[rowIdx] += charCodeMap.get(char)!;
39
44
  }
40
45
  }
41
- let options = {};
46
+ let options = {} as mmDistanceFunctionArgs;
42
47
  if (similarityMetric === MmDistanceFunctionsNames.MONOMER_CHEMICAL_DISTANCE) {
43
48
  const monomers = Array.from(charCodeMap.keys());
44
49
  const monomerRes = await getMonomerSubstitutionMatrix(monomers, fingerprintType);
@@ -48,13 +53,12 @@ export async function getEncodedSeqSpaceCol(
48
53
  monomerRes.scoringMatrix[i][j] = 1 - val;
49
54
  });
50
55
  });
51
- const monomerHashToMatrixMap: {[_: string]: number} = {};
56
+ const monomerHashToMatrixMap: { [_: string]: number } = {};
52
57
  Object.entries(monomerRes.alphabetIndexes).forEach(([key, value]) => {
53
58
  monomerHashToMatrixMap[charCodeMap.get(key)!] = value;
54
59
  });
55
60
  // sets distance function args in place.
56
- options = {scoringMatrix: monomerRes.scoringMatrix,
57
- alphabetIndexes: monomerHashToMatrixMap} satisfies mmDistanceFunctionArgs;
61
+ options = {scoringMatrix: monomerRes.scoringMatrix, alphabetIndexes: monomerHashToMatrixMap};
58
62
  } else if (similarityMetric === MmDistanceFunctionsNames.NEEDLEMANN_WUNSCH) {
59
63
  const monomers = Array.from(charCodeMap.keys());
60
64
  const monomerRes = await getMonomerSubstitutionMatrix(monomers, fingerprintType);
@@ -64,13 +68,12 @@ export async function getEncodedSeqSpaceCol(
64
68
  // monomerRes.scoringMatrix[i][j] = 1 - val;
65
69
  // });
66
70
  // });
67
- const monomerHashToMatrixMap: {[_: string]: number} = {};
71
+ const monomerHashToMatrixMap: { [_: string]: number } = {};
68
72
  Object.entries(monomerRes.alphabetIndexes).forEach(([key, value]) => {
69
73
  monomerHashToMatrixMap[charCodeMap.get(key)!] = value;
70
74
  });
71
75
  // sets distance function args in place.
72
- options = {scoringMatrix: monomerRes.scoringMatrix,
73
- alphabetIndexes: monomerHashToMatrixMap} satisfies mmDistanceFunctionArgs;
76
+ options = {scoringMatrix: monomerRes.scoringMatrix, alphabetIndexes: monomerHashToMatrixMap};
74
77
  }
75
- return {seqList, options};
78
+ return {seqList: encList, options};
76
79
  }
@@ -5,33 +5,32 @@ import * as DG from 'datagrok-api/dg';
5
5
  import wu from 'wu';
6
6
 
7
7
  import {getHelmMonomers} from '../package';
8
- import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
9
- import {ISeqSplitted} from '@datagrok-libraries/bio/src/utils/macromolecule/types';
8
+ import {SeqHandler} from '@datagrok-libraries/bio/src/utils/seq-handler';
9
+ import {GAP_SYMBOL, ISeqSplitted} from '@datagrok-libraries/bio/src/utils/macromolecule/types';
10
10
 
11
11
  const V2000_ATOM_NAME_POS = 31;
12
12
 
13
13
  export async function getMonomericMols(
14
14
  mcol: DG.Column<string>, pattern: boolean = false, monomersDict?: Map<string, string>
15
15
  ): Promise<DG.Column> {
16
- const uh = UnitsHandler.getOrCreate(mcol);
16
+ const sh = SeqHandler.forColumn(mcol);
17
17
  let molV3000Array;
18
18
  monomersDict ??= new Map();
19
- const monomers = uh.isHelm() ?
20
- getHelmMonomers(mcol) : Object.keys(uh.stats.freq).filter((it) => it !== '');
19
+ const monomers = sh.isHelm() ?
20
+ getHelmMonomers(mcol) : Object.keys(sh.stats.freq).filter((it) => it !== '');
21
21
 
22
22
  for (let i = 0; i < monomers.length; i++) {
23
23
  if (!monomersDict.has(monomers[i]))
24
24
  monomersDict.set(monomers[i], `${monomersDict.size + 1}`);
25
25
  }
26
26
 
27
- if (uh.isHelm()) {
27
+ if (sh.isHelm()) {
28
28
  molV3000Array = await grok.functions.call('HELM:getMolFiles', {col: mcol});
29
29
  molV3000Array = changeV2000ToV3000(molV3000Array, monomersDict, pattern);
30
30
  } else {
31
31
  molV3000Array = new Array<string>(mcol.length);
32
32
  for (let i = 0; i < mcol.length; i++) {
33
- const sequenceMonomers = wu(uh.splitted[i]).filter((it) => it !== '').toArray();
34
- const molV3000 = molV3000FromNonHelmSequence(sequenceMonomers, monomersDict, pattern);
33
+ const molV3000 = molV3000FromNonHelmSequence(sh.getSplitted(i), monomersDict, pattern);
35
34
  molV3000Array[i] = molV3000;
36
35
  }
37
36
  }
@@ -51,9 +50,12 @@ M V30 BEGIN CTAB
51
50
  molV3000 += 'M V30 BEGIN ATOM\n';
52
51
 
53
52
  for (let atomRowI = 0; atomRowI < monomers.length; atomRowI++) {
54
- molV3000 += pattern ?
55
- `M V30 ${atomRowI + 1} R${monomersDict.get(monomers[atomRowI])} 0.000 0.000 0 0\n` :
56
- `M V30 ${atomRowI + 1} At 0.000 0.000 0 0 MASS=${monomersDict.get(monomers[atomRowI])}\n`;
53
+ const cm: string = monomers.getCanonical(atomRowI);
54
+ if (cm !== GAP_SYMBOL) {
55
+ molV3000 += pattern ?
56
+ `M V30 ${atomRowI + 1} R${monomersDict.get(cm)} 0.000 0.000 0 0\n` :
57
+ `M V30 ${atomRowI + 1} At 0.000 0.000 0 0 MASS=${monomersDict.get(cm)}\n`;
58
+ }
57
59
  }
58
60
 
59
61
  molV3000 += 'M V30 END ATOM\n';
package/src/package.ts CHANGED
@@ -12,7 +12,7 @@ import {BitArrayMetrics, KnownMetrics} from '@datagrok-libraries/ml/src/typed-me
12
12
  import {
13
13
  TAGS as bioTAGS,
14
14
  } from '@datagrok-libraries/bio/src/utils/macromolecule';
15
- import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
15
+ import {SeqHandler} from '@datagrok-libraries/bio/src/utils/seq-handler';
16
16
  import {IMonomerLib} from '@datagrok-libraries/bio/src/types';
17
17
  import {SeqPalette} from '@datagrok-libraries/bio/src/seq-palettes';
18
18
  import {FastaFileHandler} from '@datagrok-libraries/bio/src/utils/fasta-handler';
@@ -59,7 +59,7 @@ import {BioPackage, BioPackageProperties} from './package-types';
59
59
  import {getCompositionAnalysisWidget} from './widgets/composition-analysis-widget';
60
60
  import {MacromoleculeColumnWidget} from './utils/macromolecule-column-widget';
61
61
  import {addCopyMenuUI} from './utils/context-menu';
62
- import {PolyTool} from './utils/poly-tool/ui';
62
+ import {getPolyToolDialog} from './utils/poly-tool/ui';
63
63
  import {PolyToolCsvLibHandler} from './utils/poly-tool/csv-to-json-monomer-lib-converter';
64
64
  import {_setPeptideColumn} from './utils/poly-tool/utils';
65
65
  import {getRegionDo} from './utils/get-region';
@@ -165,11 +165,11 @@ export function getBioLib(): IMonomerLib {
165
165
  return MonomerLibManager.instance.getBioLib();
166
166
  }
167
167
 
168
- //name: getUnitsHandler
168
+ //name: getSeqHandler
169
169
  //input: column sequence { semType: Macromolecule }
170
170
  //output: object result
171
- export function getUnitsHandler(sequence: DG.Column<string>): UnitsHandler {
172
- return UnitsHandler.getOrCreate(sequence);
171
+ export function getSeqHandler(sequence: DG.Column<string>): SeqHandler {
172
+ return SeqHandler.forColumn(sequence);
173
173
  }
174
174
 
175
175
  // -- Panels --
@@ -471,9 +471,8 @@ export async function activityCliffs(table: DG.DataFrame, molecules: DG.Column<s
471
471
  })
472
472
  .onCancel(() => { resolve(undefined); })
473
473
  .show();
474
- } else {
474
+ } else
475
475
  runCliffs().then((res) => resolve(res)).catch((err) => reject(err));
476
- }
477
476
  }).catch((err: any) => {
478
477
  const [errMsg, errStack] = errInfo(err);
479
478
  _package.logger.error(errMsg, undefined, errStack);
@@ -611,7 +610,7 @@ export async function compositionAnalysis(): Promise<void> {
611
610
  if (col.semType != DG.SEMTYPE.MACROMOLECULE)
612
611
  return false;
613
612
 
614
- const _colUH = UnitsHandler.getOrCreate(col);
613
+ const _colSh = SeqHandler.forColumn(col);
615
614
  // TODO: prevent for cyclic, branched or multiple chains in Helm
616
615
  return true;
617
616
  });
@@ -630,7 +629,7 @@ export async function compositionAnalysis(): Promise<void> {
630
629
  return;
631
630
  } else if (colList.length > 1) {
632
631
  const colListNames: string [] = colList.map((col) => col.name);
633
- const selectedCol = colList.find((c) => { return UnitsHandler.getOrCreate(c).isMsa(); });
632
+ const selectedCol = colList.find((c) => { return SeqHandler.forColumn(c).isMsa(); });
634
633
  const colInput: DG.InputBase = ui.choiceInput(
635
634
  'Column', selectedCol ? selectedCol.name : colListNames[0], colListNames);
636
635
  ui.dialog({
@@ -647,9 +646,8 @@ export async function compositionAnalysis(): Promise<void> {
647
646
  await handler(col);
648
647
  })
649
648
  .show();
650
- } else {
649
+ } else
651
650
  col = colList[0];
652
- }
653
651
 
654
652
  if (!col)
655
653
  return;
@@ -690,10 +688,9 @@ export function convertDialog() {
690
688
  //name: polyTool
691
689
  //description: Perform cyclization of polymers
692
690
  export async function polyTool(): Promise<void> {
693
- const polytool = new PolyTool();
694
691
  let dialog: DG.Dialog;
695
692
  try {
696
- dialog = await polytool.getPolyToolDialog();
693
+ dialog = await getPolyToolDialog();
697
694
  dialog.show();
698
695
  } catch (err: any) {
699
696
  grok.shell.warning('To run PolyTool, open a dataframe with macromolecules');
@@ -773,8 +770,8 @@ export async function splitToMonomersTopMenu(table: DG.DataFrame, sequence: DG.C
773
770
  //name: Bio: getHelmMonomers
774
771
  //input: column sequence {semType: Macromolecule}
775
772
  export function getHelmMonomers(sequence: DG.Column<string>): string[] {
776
- const uh = UnitsHandler.getOrCreate(sequence);
777
- const stats = uh.stats;
773
+ const sh = SeqHandler.forColumn(sequence);
774
+ const stats = sh.stats;
778
775
  return Object.keys(stats.freq);
779
776
  }
780
777
 
@@ -4,10 +4,10 @@ import * as ui from 'datagrok-api/ui';
4
4
 
5
5
  import wu from 'wu';
6
6
 
7
- import {category, expect, test, testViewer} from '@datagrok-libraries/utils/src/test';
7
+ import {category, delay, expect, test} from '@datagrok-libraries/utils/src/test';
8
8
 
9
9
  import {awaitGrid} from './utils';
10
- import {WebLogoViewer} from '../viewers/web-logo-viewer';
10
+ import {Debounces, WebLogoViewer} from '../viewers/web-logo-viewer';
11
11
 
12
12
  import {_package} from '../package-test';
13
13
 
@@ -20,12 +20,15 @@ category('WebLogo-layout', () => {
20
20
  const wlViewer = await df.plot.fromType('WebLogo',
21
21
  {sequenceColumnName: col.name}) as unknown as WebLogoViewer;
22
22
  view.dockManager.dock(wlViewer);
23
+
24
+ await delay(Debounces.render * 2);
23
25
  await wlViewer.awaitRendered();
24
26
  await awaitGrid(view.grid);
25
27
 
26
28
  const viewLayout = view.saveLayout();
27
29
  const viewLayoutJsonStr = viewLayout.toJson();
28
30
  view.loadLayout(viewLayout);
31
+ await delay(Debounces.render * 2);
29
32
  await wlViewer.awaitRendered();
30
33
  await awaitGrid(view.grid);
31
34
 
@@ -3,15 +3,15 @@ import * as DG from 'datagrok-api/dg';
3
3
 
4
4
  import {category, expect, expectArray, test, testEvent} from '@datagrok-libraries/utils/src/test';
5
5
  import {ALPHABET, NOTATION, TAGS as bioTAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
6
+ import {SeqHandler} from '@datagrok-libraries/bio/src/utils/seq-handler';
7
+ import {GAP_SYMBOL} from '@datagrok-libraries/bio/src/utils/macromolecule/types';
8
+
6
9
  import {
7
10
  countForMonomerAtPosition,
8
11
  PositionInfo as PI,
9
12
  PositionMonomerInfo as PMI,
10
13
  WebLogoViewer,
11
14
  } from '../viewers/web-logo-viewer';
12
- import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
13
-
14
- import {GAP_SYMBOL} from '../const';
15
15
 
16
16
  const g: string = GAP_SYMBOL;
17
17
 
@@ -182,8 +182,8 @@ ATC-G-TTGC--
182
182
  }
183
183
 
184
184
  const atPI1: PI = resPosList[1];
185
- const uh = UnitsHandler.getOrCreate(seqCol);
186
- const countAt1 = countForMonomerAtPosition(df, uh, df.filter, 'G', atPI1);
185
+ const sh = SeqHandler.forColumn(seqCol);
186
+ const countAt1 = countForMonomerAtPosition(df, sh, df.filter, 'G', atPI1);
187
187
  expect(countAt1, 5);
188
188
  await wlViewer.awaitRendered();
189
189
  });
@@ -4,16 +4,11 @@ import * as DG from 'datagrok-api/dg';
4
4
 
5
5
  import {category, test, expect, expectObject, expectArray} from '@datagrok-libraries/utils/src/test';
6
6
  import {
7
- getAlphabetSimilarity,
8
- monomerToShort,
9
- pickUpPalette,
10
- splitterAsFasta,
11
- splitterAsHelm,
7
+ NOTATION, getAlphabetSimilarity, monomerToShort, pickUpPalette, splitterAsFasta, splitterAsHelm,
12
8
  } from '@datagrok-libraries/bio/src/utils/macromolecule';
13
9
  import {Nucleotides, NucleotidesPalettes} from '@datagrok-libraries/bio/src/nucleotides';
14
10
  import {AminoacidsPalettes} from '@datagrok-libraries/bio/src/aminoacids';
15
11
  import {UnknownSeqPalette} from '@datagrok-libraries/bio/src/unknown';
16
- import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
17
12
  import {getStatsForCol} from '@datagrok-libraries/bio/src/utils/macromolecule/utils';
18
13
 
19
14
  import {GAP_SYMBOL} from '../const';
@@ -63,6 +58,8 @@ XZJ{}2
63
58
  PEPTIDE1{meI}$$$$`;
64
59
  const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
65
60
  const seqCol: DG.Column = df.getCol('seq')!;
61
+ seqCol.semType = DG.SEMTYPE.MACROMOLECULE;
62
+ seqCol.setTag(DG.TAGS.UNITS, NOTATION.HELM);
66
63
  const stats = getStatsForCol(seqCol, 1, splitterAsHelm);
67
64
 
68
65
  expectObject(stats.freq, {
@@ -132,6 +129,8 @@ category('WebLogo.monomerToShort', () => {
132
129
  export async function _testGetStats(csvDfN1: string) {
133
130
  const dfN1: DG.DataFrame = DG.DataFrame.fromCsv(csvDfN1);
134
131
  const seqCol: DG.Column = dfN1.col('seq')!;
132
+ seqCol.semType = DG.SEMTYPE.MACROMOLECULE;
133
+ seqCol.setTag(DG.TAGS.UNITS, NOTATION.FASTA);
135
134
  const stats = getStatsForCol(seqCol, 5, splitterAsFasta);
136
135
 
137
136
  expectObject(stats.freq, {
@@ -160,6 +159,8 @@ export async function _testGetAlphabetSimilarity() {
160
159
  export async function _testPickupPaletteN1(csvDfN1: string) {
161
160
  const df: DG.DataFrame = DG.DataFrame.fromCsv(csvDfN1);
162
161
  const col: DG.Column = df.col('seq')!;
162
+ col.semType = DG.SEMTYPE.MACROMOLECULE;
163
+ col.setTag(DG.TAGS.UNITS, NOTATION.FASTA);
163
164
  const cp = pickUpPalette(col);
164
165
 
165
166
  expect(cp instanceof NucleotidesPalettes, true);
@@ -168,6 +169,8 @@ export async function _testPickupPaletteN1(csvDfN1: string) {
168
169
  export async function _testPickupPaletteN1e(csvDfN1e: string) {
169
170
  const df: DG.DataFrame = DG.DataFrame.fromCsv(csvDfN1e);
170
171
  const col: DG.Column = df.col('seq')!;
172
+ col.semType = DG.SEMTYPE.MACROMOLECULE;
173
+ col.setTag(DG.TAGS.UNITS, NOTATION.FASTA);
171
174
  const cp = pickUpPalette(col);
172
175
 
173
176
  expect(cp instanceof NucleotidesPalettes, true);
@@ -176,6 +179,8 @@ export async function _testPickupPaletteN1e(csvDfN1e: string) {
176
179
  export async function _testPickupPaletteAA1(csvDfAA1: string) {
177
180
  const df: DG.DataFrame = DG.DataFrame.fromCsv(csvDfAA1);
178
181
  const col: DG.Column = df.col('seq')!;
182
+ col.semType = DG.SEMTYPE.MACROMOLECULE;
183
+ col.setTag(DG.TAGS.UNITS, NOTATION.FASTA);
179
184
  const cp = pickUpPalette(col);
180
185
 
181
186
  expect(cp instanceof AminoacidsPalettes, true);
@@ -184,6 +189,8 @@ export async function _testPickupPaletteAA1(csvDfAA1: string) {
184
189
  export async function _testPickupPaletteX(csvDfX: string) {
185
190
  const df: DG.DataFrame = DG.DataFrame.fromCsv(csvDfX);
186
191
  const col: DG.Column = df.col('seq')!;
192
+ col.semType = DG.SEMTYPE.MACROMOLECULE;
193
+ col.setTag(DG.TAGS.UNITS, NOTATION.FASTA);
187
194
  const cp = pickUpPalette(col);
188
195
 
189
196
  expect(cp instanceof UnknownSeqPalette, true);
@@ -5,7 +5,7 @@ import {category, expect, expectArray, test} from '@datagrok-libraries/utils/src
5
5
 
6
6
  import {ConverterFunc} from './types';
7
7
  import {NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecule';
8
- import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
8
+ import {SeqHandler} from '@datagrok-libraries/bio/src/utils/seq-handler';
9
9
 
10
10
  // import {mmSemType} from '../const';
11
11
  // import {importFasta} from '../package';
@@ -133,8 +133,8 @@ RNA1{p.r(U)p.r(U)p.r(C)p.r(A)p.r(A)p.r(C)p.r(U)p.r(U)p.r(C)p.r(A)p.r(A)p.r(C)p.p
133
133
  throw new Error(`Argument 'separator' is mandatory for target notation '${tgtNotation.toString()}'.`);
134
134
 
135
135
  return function(srcCol: DG.Column): DG.Column {
136
- const converterUH = UnitsHandler.getOrCreate(srcCol);
137
- const resCol = converterUH.convert(tgtNotation, tgtSeparator);
136
+ const converterSh = SeqHandler.forColumn(srcCol);
137
+ const resCol = converterSh.convert(tgtNotation, tgtSeparator);
138
138
  expect(resCol.getTag(DG.TAGS.UNITS), tgtNotation);
139
139
  return resCol;
140
140
  };
@@ -152,7 +152,7 @@ RNA1{p.r(U)p.r(U)p.r(C)p.r(A)p.r(A)p.r(C)p.r(U)p.r(U)p.r(C)p.r(A)p.r(A)p.r(C)p.p
152
152
  const tgtCol: DG.Column = tgtDf.getCol('seq');
153
153
 
154
154
  expectArray(resCol.toList(), tgtCol.toList());
155
- const _uh: UnitsHandler = UnitsHandler.getOrCreate(resCol);
155
+ const _sh: SeqHandler = SeqHandler.forColumn(resCol);
156
156
  }
157
157
 
158
158
  // FASTA tests
@@ -4,7 +4,7 @@ import * as DG from 'datagrok-api/dg';
4
4
 
5
5
  import {before, category, test, expect} from '@datagrok-libraries/utils/src/test';
6
6
  import {ALPHABET, getAlphabet, NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecule';
7
- import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
7
+ import {SeqHandler} from '@datagrok-libraries/bio/src/utils/seq-handler';
8
8
 
9
9
  category('detectorsBenchmark', () => {
10
10
  let detectFunc: DG.Func;
@@ -124,11 +124,11 @@ category('detectorsBenchmark', () => {
124
124
  }
125
125
 
126
126
  function checkDetectorRes(col: DG.Column, tgt: TgtType): void {
127
- const uh = UnitsHandler.getOrCreate(col);
127
+ const sh = SeqHandler.forColumn(col);
128
128
  expect(col.semType === tgt.semType, true);
129
- expect(uh.notation === tgt.notation, true);
130
- expect(uh.alphabet === tgt.alphabet, true);
131
- expect(uh.separator === tgt.separator, true);
129
+ expect(sh.notation === tgt.notation, true);
130
+ expect(sh.alphabet === tgt.alphabet, true);
131
+ expect(sh.separator === tgt.separator, true);
132
132
  }
133
133
  });
134
134