@datagrok/bio 2.8.3 → 2.8.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -24,12 +24,12 @@ RUN savedAptMark="$(apt-mark showmanual)" ; \
24
24
  ; \
25
25
  apt-mark auto '.*' > /dev/null ; \
26
26
  [ -z "$savedAptMark" ] || apt-mark manual $savedAptMark ; \
27
- wget https://mafft.cbrc.jp/alignment/software/mafft_7.520-1_amd64.deb -O mafft.deb; \
28
- apt install -y ./mafft.deb; \
29
- rm -rf mafft.deb; \
27
+ wget https://mafft.cbrc.jp/alignment/software/mafft_7.520-1_amd64.deb -O mafft.deb; \
28
+ apt install -y ./mafft.deb; \
29
+ rm -rf mafft.deb; \
30
30
  wget https://github.com/Merck/PepSeA/archive/refs/heads/main.zip -O PepSeA.zip; \
31
31
  unzip -q PepSeA.zip -d /opt ; \
32
- rm -rf PepSeA.zip; \
32
+ rm -rf PepSeA.zip; \
33
33
  chown -R grok:grok /opt/PepSeA-main ; \
34
34
  apt-get purge -y --auto-remove -o APT::AutoRemove::RecommendsImportant=false ; \
35
35
  apt-get clean ; \
@@ -57,6 +57,7 @@ def distout():\n\
57
57
  \n\
58
58
  return distout_output\n' >> /opt/PepSeA-main/alignment/api.py;
59
59
 
60
+ # It is important to run docker container as user and not as root
60
61
  USER grok:grok
61
62
 
62
63
  # Command source does not work for Docker, cause it will apply only to one layer
package/package.json CHANGED
@@ -5,7 +5,7 @@
5
5
  "name": "Leonid Stolbov",
6
6
  "email": "lstolbov@datagrok.ai"
7
7
  },
8
- "version": "2.8.3",
8
+ "version": "2.8.6",
9
9
  "description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
10
10
  "repository": {
11
11
  "type": "git",
@@ -34,7 +34,7 @@
34
34
  ],
35
35
  "dependencies": {
36
36
  "@biowasm/aioli": "^3.1.0",
37
- "@datagrok-libraries/bio": "^5.34.1",
37
+ "@datagrok-libraries/bio": "^5.36.0",
38
38
  "@datagrok-libraries/chem-meta": "^1.0.1",
39
39
  "@datagrok-libraries/ml": "^6.3.39",
40
40
  "@datagrok-libraries/tutorials": "^1.3.6",
@@ -70,11 +70,12 @@
70
70
  "link-bio": "npm link @datagrok-libraries/bio",
71
71
  "link-ml": "npm link @datagrok-libraries/ml",
72
72
  "link-utils": "npm link @datagrok-libraries/utils",
73
- "link-all": "npm link datagrok-api @datagrok-libraries/utils @datagrok-libraries/bio @datagrok-libraries/ml",
73
+ "link-all": "npm link @datagrok-libraries/chem-meta datagrok-api @datagrok-libraries/utils @datagrok-libraries/ml @datagrok-libraries/bio @datagrok-libraries/tutorials",
74
74
  "debug-sequences1": "webpack && grok publish",
75
75
  "release-sequences1": "webpack && grok publish --release",
76
76
  "build-sequences1": "webpack",
77
77
  "build": "webpack",
78
+ "build-all": "npm --prefix ./../../libraries/chem-meta run build && npm --prefix ./../../js-api run build && npm --prefix ./../../libraries/utils run build && npm --prefix ./../../libraries/ml run build && npm --prefix ./../../libraries/bio run build && npm --prefix ./../../libraries/tutorials run build && npm run build",
78
79
  "debug-sequences1-local": "webpack && grok publish local",
79
80
  "release-sequences1-local": "webpack && grok publish local --release",
80
81
  "debug-sequences1-dev": "webpack && grok publish dev",
@@ -7,7 +7,9 @@ import {getSimilarityFromDistance} from '@datagrok-libraries/ml/src/distance-met
7
7
  import {AvailableMetrics, DistanceMetricsSubjects, StringMetricsNames} from '@datagrok-libraries/ml/src/typed-metrics';
8
8
  import {drawMoleculeDifferenceOnCanvas} from '../utils/cell-renderer';
9
9
  import {invalidateMols, MONOMERIC_COL_TAGS} from '../substructure-search/substructure-search';
10
- import {getSplitter, TAGS as bioTAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
10
+ import {TAGS as bioTAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
11
+ import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
12
+ import {ISeqSplitted} from '@datagrok-libraries/bio/src/utils/macromolecule/types';
11
13
 
12
14
  export async function getDistances(col: DG.Column, seq: string): Promise<Array<number>> {
13
15
  const stringArray = col.toList();
@@ -103,12 +105,11 @@ export function createPropPanelElement(params: ITooltipAndPanelParams): HTMLDivE
103
105
  });
104
106
 
105
107
  const molDifferences: { [key: number]: HTMLCanvasElement } = {};
106
- const units = params.seqCol.getTag(DG.TAGS.UNITS);
107
- const separator = params.seqCol.getTag(bioTAGS.separator);
108
- const splitter = getSplitter(units, separator);
108
+ const uh = UnitsHandler.getOrCreate(params.seqCol);
109
+ const splitter = uh.getSplitter();
109
110
  const subParts1 = splitter(sequencesArray[0]);
110
111
  const subParts2 = splitter(sequencesArray[1]);
111
- const canvas = createDifferenceCanvas(subParts1, subParts2, units, molDifferences);
112
+ const canvas = createDifferenceCanvas(subParts1, subParts2, uh.units, molDifferences);
112
113
  propPanel.append(ui.div(canvas, {style: {width: '300px', overflow: 'scroll'}}));
113
114
 
114
115
  propPanel.append(createDifferencesWithPositions(molDifferences));
@@ -127,8 +128,8 @@ function createPropPanelField(name: string, value: number): HTMLDivElement {
127
128
  }
128
129
 
129
130
  export function createDifferenceCanvas(
130
- subParts1: string[],
131
- subParts2: string[],
131
+ subParts1: ISeqSplitted,
132
+ subParts2: ISeqSplitted,
132
133
  units: string,
133
134
  molDifferences: { [key: number]: HTMLCanvasElement }): HTMLCanvasElement {
134
135
  const canvas = document.createElement('canvas');
@@ -7,7 +7,6 @@ import {getMonomericMols} from '../calculations/monomerLevelMols';
7
7
  import {createDifferenceCanvas, createDifferencesWithPositions} from './sequence-activity-cliffs';
8
8
  import {updateDivInnerHTML} from '../utils/ui-utils';
9
9
  import {Subject} from 'rxjs';
10
- import {TAGS as bioTAGS, getSplitter} from '@datagrok-libraries/bio/src/utils/macromolecule';
11
10
  import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
12
11
  import {calcMmDistanceMatrix, dmLinearIndex} from './workers/mm-distance-worker-creator';
13
12
  import {calculateMMDistancesArray} from './workers/mm-distance-array-service';
@@ -106,9 +105,10 @@ export class SequenceSimilarityViewer extends SequenceSearchBaseViewer {
106
105
  const linearizeFunc = dmLinearIndex(len);
107
106
  // array that keeps track of the indexes and scores together
108
107
  const indexWScore = Array(len).fill(0)
109
- .map((_, i) => ({idx: i, score: i === this.targetMoleculeIdx ? 1 :
110
- this.preComputeDistanceMatrix ? 1 - this.mmDistanceMatrix[linearizeFunc(this.targetMoleculeIdx, i)] :
111
- 1 - distanceArray[i]
108
+ .map((_, i) => ({
109
+ idx: i, score: i === this.targetMoleculeIdx ? 1 :
110
+ this.preComputeDistanceMatrix ? 1 - this.mmDistanceMatrix[linearizeFunc(this.targetMoleculeIdx, i)] :
111
+ 1 - distanceArray[i]
112
112
  }));
113
113
  indexWScore.sort((a, b) => b.score - a.score);
114
114
  // get the most similar molecules
@@ -122,12 +122,12 @@ export class SequenceSimilarityViewer extends SequenceSearchBaseViewer {
122
122
  const propPanel = ui.div();
123
123
  const molDifferences: { [key: number]: HTMLCanvasElement } = {};
124
124
  const molColName = this.molCol?.name!;
125
- const units = resDf.col(molColName)!.getTag(DG.TAGS.UNITS);
126
- const separator = resDf.col(molColName)!.getTag(bioTAGS.separator);
127
- const splitter = getSplitter(units, separator);
125
+ const col = resDf.col(molColName)!;
126
+ const uh = UnitsHandler.getOrCreate(col);
127
+ const splitter = uh.getSplitter();
128
128
  const subParts1 = splitter(this.moleculeColumn!.get(this.targetMoleculeIdx));
129
129
  const subParts2 = splitter(resDf.get(molColName, resDf.currentRowIdx));
130
- const canvas = createDifferenceCanvas(subParts1, subParts2, units, molDifferences);
130
+ const canvas = createDifferenceCanvas(subParts1, subParts2, uh.units, molDifferences);
131
131
  propPanel.append(ui.div(canvas, {style: {width: '300px', overflow: 'scroll'}}));
132
132
  if (subParts1.length !== subParts2.length) {
133
133
  propPanel.append(ui.divV([
@@ -2,7 +2,12 @@ import * as grok from 'datagrok-api/grok';
2
2
  import * as ui from 'datagrok-api/ui';
3
3
  import * as DG from 'datagrok-api/dg';
4
4
 
5
+ import wu from 'wu';
6
+
5
7
  import {IWebLogoViewer} from '@datagrok-libraries/bio/src/viewers/web-logo';
8
+
9
+ import {PROPS as wlPROPS} from '../viewers/web-logo-viewer';
10
+
6
11
  import {_package} from '../package';
7
12
 
8
13
  export class WebLogoApp {
@@ -11,7 +16,7 @@ export class WebLogoApp {
11
16
  df: DG.DataFrame;
12
17
  view: DG.TableView;
13
18
 
14
- constructor() {}
19
+ constructor(private readonly urlParams: URLSearchParams) {}
15
20
 
16
21
  async init(df: DG.DataFrame, funcName: string): Promise<void> {
17
22
  this._funcName = funcName;
@@ -23,12 +28,27 @@ export class WebLogoApp {
23
28
  // -- View --
24
29
 
25
30
  async buildView(): Promise<void> {
26
- this.view = grok.shell.addTableView(this.df);
27
- this.view.path = this.view.basePath = `func/${_package.name}.${this._funcName}`;
31
+ const urlParamsTxt = wu(this.urlParams.entries())
32
+ .map(([key, value]) => `${key}=${encodeURIComponent(value)}`)
33
+ .toArray().join('&');
28
34
 
29
- const viewer: DG.Viewer & IWebLogoViewer = (await this.view.dataFrame.plot.fromType('WebLogo', {
30
- sequenceColumnName: 'sequence',
31
- }));
35
+ this.view = grok.shell.addTableView(this.df);
36
+ this.view.path = this.view.basePath = `func/${_package.name}.${this._funcName}?${urlParamsTxt}`;
37
+
38
+ const options: { [p: string]: any } = {sequenceColumnName: 'sequence'};
39
+ for (const [optName, optValue] of this.urlParams.entries()) {
40
+ switch (optName) {
41
+ // boolean
42
+ case wlPROPS.fixWidth:
43
+ case wlPROPS.fitArea:
44
+ options[optName] = ((v) => { return ['1', 'on', 'true'].includes(v.toLowerCase()); })(optValue);
45
+ break;
46
+ default:
47
+ options[optName] = optValue;
48
+ }
49
+ }
50
+ const viewer: DG.Viewer & IWebLogoViewer = (await this.view.dataFrame.plot
51
+ .fromType('WebLogo', options)) as DG.Viewer & IWebLogoViewer;
32
52
  this.view.dockManager.dock(viewer, DG.DOCK_TYPE.DOWN, null, 'WebLogo', 0.35);
33
53
  }
34
54
  }
@@ -1,9 +1,12 @@
1
1
  import * as grok from 'datagrok-api/grok';
2
2
  import * as ui from 'datagrok-api/ui';
3
3
  import * as DG from 'datagrok-api/dg';
4
+
5
+ import wu from 'wu';
6
+
4
7
  import {getHelmMonomers} from '../package';
5
- import {TAGS as bioTAGS, getSplitter, NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecule';
6
8
  import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
9
+ import {ISeqSplitted} from '@datagrok-libraries/bio/src/utils/macromolecule/types';
7
10
 
8
11
  const V2000_ATOM_NAME_POS = 31;
9
12
 
@@ -27,7 +30,7 @@ export async function getMonomericMols(
27
30
  } else {
28
31
  molV3000Array = new Array<string>(mcol.length);
29
32
  for (let i = 0; i < mcol.length; i++) {
30
- const sequenceMonomers = uh.splitted[i].filter((it) => it !== '');
33
+ const sequenceMonomers = wu(uh.splitted[i]).filter((it) => it !== '').toArray();
31
34
  const molV3000 = molV3000FromNonHelmSequence(sequenceMonomers, monomersDict, pattern);
32
35
  molV3000Array[i] = molV3000;
33
36
  }
@@ -36,7 +39,7 @@ export async function getMonomericMols(
36
39
  }
37
40
 
38
41
  function molV3000FromNonHelmSequence(
39
- monomers: Array<string>, monomersDict: Map<string, string>, pattern: boolean = false) {
42
+ monomers: ISeqSplitted, monomersDict: Map<string, string>, pattern: boolean = false) {
40
43
  let molV3000 = `
41
44
  Datagrok macromolecule handler
42
45
 
package/src/package.ts CHANGED
@@ -764,7 +764,8 @@ export function bioSubstructureFilter(): BioSubstructureFilter {
764
764
  export async function webLogoLargeApp(): Promise<void> {
765
765
  const pi = DG.TaskBarProgressIndicator.create('WebLogo');
766
766
  try {
767
- const app = new WebLogoApp();
767
+ const urlParams = new URLSearchParams(window.location.search);
768
+ const app = new WebLogoApp(urlParams);
768
769
  const df: DG.DataFrame = await _package.files.readCsv('data/sample_PT_100000x5.csv');
769
770
  await grok.data.detectSemanticTypes(df);
770
771
  await app.init(df, 'webLogoLargeApp');
@@ -136,7 +136,7 @@ RNA1{P.R(U)P.R(U)P.R(C)P.R(A)P.R(A)P.R(C)P.P.P}$$$$`,
136
136
  return function(srcCol: DG.Column): DG.Column {
137
137
  const converter = new NotationConverter(srcCol);
138
138
  const resCol = converter.convert(tgtNotation, tgtSeparator);
139
- expect(resCol.getTag('units'), tgtNotation);
139
+ expect(resCol.getTag(DG.TAGS.UNITS), tgtNotation);
140
140
  return resCol;
141
141
  };
142
142
  }
@@ -7,7 +7,6 @@ import {ALIGNMENT, ALPHABET, NOTATION, TAGS as bioTAGS} from '@datagrok-librarie
7
7
  import {runKalign} from '../utils/multiple-sequence-alignment';
8
8
  import {multipleSequenceAlignmentUI} from '../utils/multiple-sequence-alignment-ui';
9
9
  import {awaitContainerStart} from './utils';
10
- //import * as grok from 'datagrok-api/grok';
11
10
 
12
11
  category('MSA', async () => {
13
12
  //table = await grok.data.files.openTable('Demo:Files/bio/peptides.csv');
@@ -113,8 +112,8 @@ async function _testMsaIsCorrect(srcCsv: string, tgtCsv: string): Promise<void>
113
112
  srcCol.semType = semType;
114
113
 
115
114
  const tgtCol: DG.Column = tgtDf.getCol('seq')!;
116
- const msaCol: DG.Column = await runKalign(srcCol, true);
117
- expectArray(msaCol.toList(), tgtCol.toList());
115
+ const resCol: DG.Column = await runKalign(srcCol, true);
116
+ expectArray(resCol.toList(), tgtCol.toList());
118
117
  }
119
118
 
120
119
  async function _testMSAOnColumn(
@@ -1,6 +1,8 @@
1
1
  import * as grok from 'datagrok-api/grok';
2
2
  import * as DG from 'datagrok-api/dg';
3
3
 
4
+ import $ from 'cash-dom';
5
+
4
6
  import {category, expect, test, awaitCheck, delay} from '@datagrok-libraries/utils/src/test';
5
7
 
6
8
  import {importFasta} from '../package';
@@ -47,8 +49,12 @@ category('renderers', () => {
47
49
  await _selectRendererBySemType();
48
50
  });
49
51
 
50
- test('setRendererManually', async () => {
51
- await _setRendererManually();
52
+ test('setRendererManuallyBeforeAddColumn', async () => {
53
+ await _setRendererManuallyBeforeAddColumn();
54
+ }, {skipReason: 'GROK-11212'});
55
+
56
+ test('setRendererManuallyAfterAddColumn', async () => {
57
+ await _setRendererManuallyAfterAddColumn();
52
58
  }, {skipReason: 'GROK-11212'});
53
59
 
54
60
  async function _rendererMacromoleculeFasta() {
@@ -204,7 +210,7 @@ category('renderers', () => {
204
210
 
205
211
  /** GROK-11212 Do not overwrite / recalculate 'cell.renderer' tag that has been set programmatically
206
212
  * https://reddata.atlassian.net/browse/GROK-11212 */
207
- async function _setRendererManually() {
213
+ async function _setRendererManuallyBeforeAddColumn() {
208
214
  const seqDiffCol: DG.Column = DG.Column.fromStrings('SequencesDiff',
209
215
  ['meI/hHis/Aca/N/T/dK/Thr_PO3H2/Aca#D-Tyr_Et/Tyr_ab-dehydroMe/meN/E/N/dV']);
210
216
  seqDiffCol.setTag(DG.TAGS.UNITS, NOTATION.SEPARATOR);
@@ -227,4 +233,32 @@ category('renderers', () => {
227
233
  `instead of manual '${tgtCellRenderer}'.`);
228
234
  }
229
235
  }
236
+
237
+ /** GROK-11212 Do not overwrite / recalculate 'cell.renderer' tag that has been set programmatically
238
+ * https://reddata.atlassian.net/browse/GROK-11212 */
239
+ async function _setRendererManuallyAfterAddColumn() {
240
+ const seqDiffCol: DG.Column = DG.Column.fromStrings('SequencesDiff',
241
+ ['meI/hHis/Aca/N/T/dK/Thr_PO3H2/Aca#D-Tyr_Et/Tyr_ab-dehydroMe/meN/E/N/dV']);
242
+ seqDiffCol.setTag(DG.TAGS.UNITS, NOTATION.SEPARATOR);
243
+ seqDiffCol.setTag(bioTAGS.separator, '/');
244
+ seqDiffCol.setTag(bioTAGS.aligned, 'SEQ');
245
+ seqDiffCol.setTag(bioTAGS.alphabet, 'UN');
246
+ seqDiffCol.setTag(bioTAGS.alphabetIsMultichar, 'true');
247
+ seqDiffCol.semType = DG.SEMTYPE.MACROMOLECULE;
248
+ const tgtCellRenderer = 'MacromoleculeDifference';
249
+ const df = DG.DataFrame.fromColumns([seqDiffCol]);
250
+ await grok.data.detectSemanticTypes(df);
251
+ const tv = grok.shell.addTableView(df);
252
+ await awaitCheck(() => $(tv.root).find('.d4-grid canvas').length > 0, 'View grid canvas not found', 200);
253
+
254
+ seqDiffCol.setTag(DG.TAGS.CELL_RENDERER, tgtCellRenderer);
255
+ await awaitCheck(() => $(tv.root).find('.d4-grid canvas').length > 0, 'View grid canvas not found', 200);
256
+
257
+ const resCellRenderer = seqDiffCol.getTag(DG.TAGS.CELL_RENDERER);
258
+ if (resCellRenderer !== tgtCellRenderer) { // this is value of MacromoleculeDifferenceCR.cellType
259
+ throw new Error(`Tag 'cell.renderer' has been manually set to '${tgtCellRenderer}' for column ` +
260
+ `but after df was added as table, tag 'cell.renderer' has reset to '${resCellRenderer}' ` +
261
+ `instead of manual '${tgtCellRenderer}'.`);
262
+ }
263
+ }
230
264
  });
@@ -14,8 +14,16 @@ import {
14
14
  } from '@datagrok-libraries/utils/src/test';
15
15
  import * as C from '../utils/constants';
16
16
  import {_package, getHelmMonomers} from '../package';
17
- import {TAGS as bioTAGS, splitterAsFasta, splitterAsHelm} from '@datagrok-libraries/bio/src/utils/macromolecule';
17
+ import {
18
+ TAGS as bioTAGS,
19
+ splitterAsFasta,
20
+ splitterAsHelm,
21
+ NOTATION
22
+ } from '@datagrok-libraries/bio/src/utils/macromolecule';
18
23
  import {splitToMonomersUI} from '../utils/split-to-monomers';
24
+ import {SEMTYPE} from 'datagrok-api/dg';
25
+ import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
26
+ import {TAGS} from '../utils/constants';
19
27
 
20
28
 
21
29
  category('splitters', async () => {
@@ -35,6 +43,12 @@ category('splitters', async () => {
35
43
  ['M', 'MeI', 'Y', 'K', 'E', 'T', 'L', 'L', 'MeF', 'P',
36
44
  'K', 'T', 'D', 'F', 'P', 'M', 'R', 'G', 'G', 'L', 'MeA'],
37
45
  ],
46
+ fastaFromHelm: [
47
+ '[meI][Pip][dK][Thr_PO3H2][L-hArg(Et,Et)][D-Tyr_Et][Tyr_ab-dehydroMe][dV]EN[D-Orn][D-aThr][Phe_4Me]',
48
+ ['meI', 'Pip', 'dK', 'Thr_PO3H2', 'L-hArg(Et,Et)', 'D-Tyr_Et', 'Tyr_ab-dehydroMe', 'dV', 'E', 'N', 'D-Orn',
49
+ 'D-aThr', 'Phe_4Me'],
50
+ ],
51
+
38
52
  helm1: [
39
53
  'PEPTIDE1{meI.hHis.Aca.N.T.dE.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.E.N.D-Orn.D-aThr.Phe_4Me}$$$',
40
54
  ['meI', 'hHis', 'Aca', 'N', 'T', 'dE', 'Thr_PO3H2', 'Aca', 'D-Tyr_Et',
@@ -68,6 +82,7 @@ category('splitters', async () => {
68
82
  };
69
83
 
70
84
  test('fastaMulti', async () => { await _testFastaSplitter(data.fastaMulti[0], data.fastaMulti[1]); });
85
+ test('fastaFromHelm', async () => { await _testFastaSplitter(data.fastaFromHelm[0], data.fastaFromHelm[1]); });
71
86
 
72
87
  test('helm1', async () => { await _testHelmSplitter(data.helm1[0], data.helm1[1]); });
73
88
  test('helm2', async () => { await _testHelmSplitter(data.helm2[0], data.helm2[1]); });
@@ -78,6 +93,7 @@ category('splitters', async () => {
78
93
  test('testHelm2', async () => { await _testHelmSplitter(data.testHelm2[0], data.testHelm2[1]); });
79
94
  test('testHelm3', async () => { await _testHelmSplitter(data.testHelm3[0], data.testHelm3[1]); });
80
95
 
96
+
81
97
  test('splitToMonomers', async () => {
82
98
  const df: DG.DataFrame = await grok.dapi.files.readCsv('System:AppData/Bio/samples/MSA.csv');
83
99
 
@@ -122,6 +138,16 @@ PEPTIDE1{hHis.Aca.Cys_SEt}$$$,5.72388
122
138
  throw new Error(msgs.join(' '));
123
139
  }
124
140
  });
141
+
142
+ // test('helmAsFasta', async () => {
143
+ // // The columns can't be empty for UnitsHandler
144
+ // /* eslint-disable max-len */
145
+ // const srcSeq = '[meI][Pip][dK][Thr_PO3H2][L-hArg(Et,Et)][D-Tyr_Et][Tyr_ab-dehydroMe][dV]EN[D-Orn][D-aThr][Phe_4Me]';
146
+ // const tgtSeqA = ['meI', 'Pip', 'dK', 'Thr_PO3H2', 'L-hArg(Et,Et)', 'D-Tyr_Et', 'Tyr_ab-dehydroMe', 'dV', 'E', 'N', 'D-Orn', 'D-aThr', 'Phe_4Me'];
147
+ // /* eslint-enable max-len */
148
+ // const resSeqA = splitterAsFasta(srcSeq);
149
+ // expectArray(resSeqA, tgtSeqA);
150
+ // });
125
151
  });
126
152
 
127
153
  export async function _testFastaSplitter(src: string, tgt: string[]) {
@@ -2,11 +2,18 @@ import * as grok from 'datagrok-api/grok';
2
2
  import * as ui from 'datagrok-api/ui';
3
3
  import * as DG from 'datagrok-api/dg';
4
4
 
5
+ import wu from 'wu';
6
+
5
7
  import {category, test, expect, expectArray} from '@datagrok-libraries/utils/src/test';
6
- import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
8
+ import {GapSymbols, UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
9
+ import {NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecule';
10
+ import {ISeqSplitted} from '@datagrok-libraries/bio/src/utils/macromolecule/types';
7
11
 
8
12
  category('UnitsHandler', () => {
9
- const data: { [testName: string]: { src: { csv: string }, tgt: { splitted: string[][] } } } = {
13
+ const fG = GapSymbols[NOTATION.FASTA];
14
+ const hG = GapSymbols[NOTATION.HELM];
15
+ const sG = GapSymbols[NOTATION.SEPARATOR];
16
+ const data: { [testName: string]: { src: { csv: string }, tgt: { splitted: (string[] | string)[] } } } = {
10
17
  fasta: {
11
18
  src: {
12
19
  csv: `seq
@@ -16,9 +23,9 @@ TTCAAC`
16
23
  },
17
24
  tgt: {
18
25
  splitted: [
19
- ['A', 'C', 'G', 'T', 'C'],
20
- ['C', 'A', 'G', 'T', 'G', 'T'],
21
- ['T', 'T', 'C', 'A', 'A', 'C']
26
+ 'ACGTC',
27
+ 'CAGTGT',
28
+ 'TTCAAC',
22
29
  ]
23
30
  }
24
31
  },
@@ -32,9 +39,9 @@ ACCGTACT`,
32
39
  tgt: {
33
40
  splitted: [
34
41
  //@formatter:off
35
- ['A', 'C', '' , 'G', 'T', '' , 'C', 'T'],
36
- ['C', 'A', 'C', '' , 'T', '' , 'G', 'T'],
37
- ['A', 'C', 'C', 'G', 'T', 'A', 'C', 'T'],
42
+ 'AC-GT-CT',
43
+ 'CAC-T-GT',
44
+ 'ACCGTACT',
38
45
  //@formatter:on
39
46
  ]
40
47
  }
@@ -65,8 +72,8 @@ rut12-rty-her2---wert`
65
72
  tgt: {
66
73
  splitted: [
67
74
  ['abc', 'dfgg', 'abc1', 'cfr3', 'rty', 'wert'],
68
- ['rut12', 'her2', 'rty', '', 'abc1', 'dfgg'],
69
- ['rut12', 'rty', 'her2', '', '', 'wert'],
75
+ ['rut12', 'her2', 'rty', sG, 'abc1', 'dfgg'],
76
+ ['rut12', 'rty', 'her2', sG, sG, 'wert'],
70
77
  ]
71
78
  }
72
79
  },
@@ -99,8 +106,8 @@ PEPTIDE1{meI.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2}$$$$`
99
106
  expect(col.semType, DG.SEMTYPE.MACROMOLECULE);
100
107
 
101
108
  const uh = UnitsHandler.getOrCreate(col);
102
- const splitted: string[][] = uh.splitted;
103
- expectArray(splitted, testData.tgt.splitted);
109
+ const resSplitted: ISeqSplitted[] = uh.splitted;
110
+ expectArray(resSplitted, testData.tgt.splitted);
104
111
  });
105
112
  }
106
113
  });
@@ -41,25 +41,25 @@ category('UnitsHandler', () => {
41
41
  });
42
42
 
43
43
  test('Seq-Fasta-units', async () => {
44
- const [_df, uh] = await loadCsvWithTag(seqDna, DG.TAGS.UNITS, NOTATION.FASTA);
44
+ const [_df, uh] = await loadCsvWithDetection(seqDna);
45
45
  expect(uh.notation, NOTATION.FASTA);
46
46
  expect(uh.isMsa(), false);
47
47
  });
48
48
 
49
49
  test('Seq-Fasta-MSA-units', async () => {
50
- const [_df, uh] = await loadCsvWithTag(seqDnaMsa, DG.TAGS.UNITS, NOTATION.FASTA);
50
+ const [_df, uh] = await loadCsvWithDetection(seqDnaMsa);
51
51
  expect(uh.notation, NOTATION.FASTA);
52
52
  expect(uh.isMsa(), true);
53
53
  });
54
54
 
55
55
  test('Seq-Helm', async () => {
56
- const [_df, uh] = await loadCsvWithTag(seqHelm, DG.TAGS.UNITS, NOTATION.HELM);
56
+ const [_df, uh] = await loadCsvWithDetection(seqHelm);
57
57
  expect(uh.notation, NOTATION.HELM);
58
58
  expect(uh.isHelm(), true);
59
59
  });
60
60
 
61
61
  test('Seq-UN', async () => {
62
- const [_df, uh] = await loadCsvWithTag(seqUn, DG.TAGS.UNITS, NOTATION.SEPARATOR);
62
+ const [_df, uh] = await loadCsvWithDetection(seqUn);
63
63
  expect(uh.notation, NOTATION.SEPARATOR);
64
64
  expect(uh.separator, '-');
65
65
  expect(uh.alphabet, ALPHABET.UN);
@@ -79,15 +79,15 @@ category('UnitsHandler', () => {
79
79
  return [df, uh];
80
80
  }
81
81
 
82
- async function loadCsvWithTag(csv: string, tag: string, value: string):
83
- Promise<[df: DG.DataFrame, uh: UnitsHandler]> {
84
- const df = DG.DataFrame.fromCsv(csv);
85
- const col = df.getCol('seq');
86
- col.setTag(tag, value);
87
- col.semType = DG.SEMTYPE.MACROMOLECULE;
88
- if (value === NOTATION.SEPARATOR)
89
- col.setTag(TAGS.separator, '-');
90
- const uh = UnitsHandler.getOrCreate(df.getCol('seq'));
91
- return [df, uh];
92
- }
82
+ // async function loadCsvWithTag(csv: string, tag: string, value: string):
83
+ // Promise<[df: DG.DataFrame, uh: UnitsHandler]> {
84
+ // const df = DG.DataFrame.fromCsv(csv);
85
+ // const col = df.getCol('seq');
86
+ // col.setTag(tag, value);
87
+ // col.semType = DG.SEMTYPE.MACROMOLECULE;
88
+ // if (value === NOTATION.SEPARATOR)
89
+ // col.setTag(TAGS.separator, '-');
90
+ // const uh = UnitsHandler.getOrCreate(df.getCol('seq'));
91
+ // return [df, uh];
92
+ // }
93
93
  });
@@ -2,13 +2,12 @@ import * as grok from 'datagrok-api/grok';
2
2
  import * as DG from 'datagrok-api/dg';
3
3
  import * as ui from 'datagrok-api/ui';
4
4
 
5
- import {_package, getBioLib} from '../package';
5
+ import wu from 'wu';
6
+
6
7
  import {printLeftOrCentered, DrawStyle} from '@datagrok-libraries/bio/src/utils/cell-renderer';
7
- import * as C from './constants';
8
8
  import {MonomerPlacer} from '@datagrok-libraries/bio/src/utils/cell-renderer-monomer-placer';
9
9
  import {
10
10
  getPaletteByType,
11
- getSplitter,
12
11
  monomerToShort,
13
12
  MonomerToShortFunc,
14
13
  NOTATION,
@@ -18,8 +17,16 @@ import {
18
17
  import {SeqPalette} from '@datagrok-libraries/bio/src/seq-palettes';
19
18
  import {UnknownSeqPalettes} from '@datagrok-libraries/bio/src/unknown';
20
19
  import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
21
- import {Temps as mmcrTemps, Tags as mmcrTags,
22
- tempTAGS, rendererSettingsChangedState} from '../utils/cell-renderer-consts';
20
+
21
+ import {
22
+ Temps as mmcrTemps, Tags as mmcrTags,
23
+ tempTAGS, rendererSettingsChangedState
24
+ } from '../utils/cell-renderer-consts';
25
+ import * as C from './constants';
26
+
27
+ import {_package, getBioLib} from '../package';
28
+ import {ISeqSplitted} from '@datagrok-libraries/bio/src/utils/macromolecule/types';
29
+
23
30
 
24
31
  type TempType = { [tagName: string]: any };
25
32
 
@@ -30,20 +37,21 @@ function getUpdatedWidth(grid: DG.Grid | null, g: CanvasRenderingContext2D, x: n
30
37
  return grid ? Math.min(grid.canvas.width - x, w) : g.canvas.width - x;
31
38
  }
32
39
 
33
- export function processSequence(subParts: string[]): [string[], boolean] {
34
- const simplified = !subParts.some((amino, index) =>
40
+ export function processSequence(subParts: ISeqSplitted): [string[], boolean] {
41
+ const simplified = !wu.enumerate(subParts).some(([amino, index]) =>
35
42
  amino.length > 1 &&
36
43
  index != 0 &&
37
44
  index != subParts.length - 1);
38
45
 
39
46
  const text: string[] = [];
40
47
  const gap = simplified ? '' : ' ';
41
- subParts.forEach((amino: string, index) => {
48
+ for (const [amino, index] of wu.enumerate(subParts)) {
49
+ let aminoRes = amino;
42
50
  if (index < subParts.length)
43
- amino += `${amino ? '' : '-'}${gap}`;
51
+ aminoRes += `${amino ? '' : '-'}${gap}`;
44
52
 
45
- text.push(amino);
46
- });
53
+ text.push(aminoRes);
54
+ }
47
55
  return [text, simplified];
48
56
  }
49
57
 
@@ -188,11 +196,12 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
188
196
 
189
197
  const separator = tableCol.getTag(bioTAGS.separator) ?? '';
190
198
  const splitLimit = w / 5;
191
- const splitterFunc: SplitterFunc = getSplitter(units, separator, splitLimit);
199
+ const uh = UnitsHandler.getOrCreate(tableCol);
200
+ const splitterFunc: SplitterFunc = uh.getSplitter(splitLimit);
192
201
 
193
202
  const tempReferenceSequence: string | null = tableColTemp[tempTAGS.referenceSequence];
194
203
  const tempCurrentWord: string | null = tableColTemp[tempTAGS.currentWord];
195
- const referenceSequence: string[] = splitterFunc(
204
+ const referenceSequence: ISeqSplitted = splitterFunc(
196
205
  ((tempReferenceSequence != null) && (tempReferenceSequence != '')) ?
197
206
  tempReferenceSequence : tempCurrentWord ?? '');
198
207
 
@@ -226,7 +235,7 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
226
235
  // maxLengthWords = colTemp[tempTAGS.bioMaxLengthWords];
227
236
  // }
228
237
 
229
- const subParts: string[] = splitterFunc(value);
238
+ const subParts: ISeqSplitted = splitterFunc(value);
230
239
  /* let x1 = x; */
231
240
  let color = undefinedColor;
232
241
  let drawStyle = DrawStyle.classic;
@@ -234,7 +243,7 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
234
243
  if (aligned && aligned.includes('MSA') && units == NOTATION.SEPARATOR)
235
244
  drawStyle = DrawStyle.MSA;
236
245
 
237
- for (const [index, amino] of subParts.entries()) {
246
+ for (const [amino, index] of wu.enumerate(subParts)) {
238
247
  color = palette.get(amino);
239
248
  g.fillStyle = undefinedColor;
240
249
  const last = index === subParts.length - 1;
@@ -281,16 +290,16 @@ export class MacromoleculeDifferenceCellRenderer extends DG.GridCellRenderer {
281
290
  _cellStyle: DG.GridCellStyle): void {
282
291
  const grid = gridCell.grid;
283
292
  const cell = gridCell.cell;
293
+ const tableCol = gridCell.tableColumn as DG.Column<string>;
284
294
  const s: string = cell.value ?? '';
285
- const separator = gridCell.tableColumn!.tags[bioTAGS.separator];
286
- const units: string = gridCell.tableColumn!.tags[DG.TAGS.UNITS];
287
295
  w = getUpdatedWidth(grid, g, x, w);
288
296
  //TODO: can this be replaced/merged with splitSequence?
289
297
  const [s1, s2] = s.split('#');
290
- const splitter = getSplitter(units, separator);
298
+ const uh = UnitsHandler.getOrCreate(tableCol);
299
+ const splitter = uh.getSplitter();
291
300
  const subParts1 = splitter(s1);
292
301
  const subParts2 = splitter(s2);
293
- drawMoleculeDifferenceOnCanvas(g, x, y, w, h, subParts1, subParts2, units);
302
+ drawMoleculeDifferenceOnCanvas(g, x, y, w, h, subParts1, subParts2, uh.units);
294
303
  }
295
304
  }
296
305
 
@@ -300,14 +309,14 @@ export function drawMoleculeDifferenceOnCanvas(
300
309
  y: number,
301
310
  w: number,
302
311
  h: number,
303
- subParts1: string [],
304
- subParts2: string [],
312
+ subParts1: ISeqSplitted,
313
+ subParts2: ISeqSplitted,
305
314
  units: string,
306
315
  fullStringLength?: boolean,
307
316
  molDifferences?: { [key: number]: HTMLCanvasElement },
308
317
  ): void {
309
318
  if (subParts1.length !== subParts2.length) {
310
- const sequences: IComparedSequences = fillShorterSequence(subParts1, subParts2);
319
+ const sequences: IComparedSequences = fillShorterSequence(wu(subParts1).toArray(), wu(subParts2).toArray());
311
320
  subParts1 = sequences.subParts1;
312
321
  subParts2 = sequences.subParts2;
313
322
  }