@datagrok/bio 1.11.0 → 1.11.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -5,7 +5,7 @@
5
5
  "name": "Leonid Stolbov",
6
6
  "email": "lstolbov@datagrok.ai"
7
7
  },
8
- "version": "1.11.0",
8
+ "version": "1.11.3",
9
9
  "description": "Bio is a [package](https://datagrok.ai/help/develop/develop#packages) for the [Datagrok](https://datagrok.ai) platform",
10
10
  "repository": {
11
11
  "type": "git",
@@ -14,12 +14,12 @@
14
14
  },
15
15
  "dependencies": {
16
16
  "@biowasm/aioli": ">=2.4.0",
17
- "@datagrok-libraries/bio": "^4.2.0",
17
+ "@datagrok-libraries/bio": "^4.2.1",
18
18
  "@datagrok-libraries/chem-meta": "1.0.0",
19
- "@datagrok-libraries/ml": "^4.0.0",
19
+ "@datagrok-libraries/ml": "^6.0.0",
20
20
  "@datagrok-libraries/utils": "^1.6.2",
21
21
  "cash-dom": "latest",
22
- "datagrok-api": "^1.6.6",
22
+ "datagrok-api": "^1.6.7",
23
23
  "dayjs": "^1.11.4",
24
24
  "openchemlib": "6.0.1",
25
25
  "rxjs": "^6.5.5",
@@ -48,6 +48,7 @@
48
48
  "link-api": "npm link datagrok-api",
49
49
  "link-bio": "npm link @datagrok-libraries/bio",
50
50
  "link-ml": "npm link @datagrok-libraries/ml",
51
+ "link-utils": "npm link @datagrok-libraries/utils",
51
52
  "link-all": "npm link datagrok-api @datagrok-libraries/utils @datagrok-libraries/bio @datagrok-libraries/ml",
52
53
  "debug-sequences1": "grok publish",
53
54
  "release-sequences1": "grok publish --release",
@@ -69,7 +70,11 @@
69
70
  "Developers"
70
71
  ],
71
72
  "sources": [
72
- "css/helm.css"
73
+ "css/helm.css",
74
+ "https://ajax.googleapis.com/ajax/libs/dojo/1.10.4/dojo/dojo.js",
75
+ "helm/JSDraw/Scilligence.JSDraw2.Lite.js",
76
+ "helm/JSDraw/Scilligence.JSDraw2.Resources.js",
77
+ "helm/JSDraw/Pistoia.HELM-uncompressed.js"
73
78
  ],
74
79
  "category": "Bioinformatics"
75
80
  }
package/src/package.ts CHANGED
@@ -15,7 +15,7 @@ import {Aminoacids} from '@datagrok-libraries/bio/src/aminoacids';
15
15
  import {getEmbeddingColsNames, sequenceSpace} from './utils/sequence-space';
16
16
  import {AvailableMetrics} from '@datagrok-libraries/ml/src/typed-metrics';
17
17
  import {getActivityCliffs} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
18
- import {drawSequences, sequenceGetSimilarities} from './utils/sequence-activity-cliffs';
18
+ import {createPropPanelElement, createTooltipElement, getSimilaritiesMarix} from './utils/sequence-activity-cliffs';
19
19
  import {createJsonMonomerLibFromSdf, encodeMonomers, getMolfilesFromSeq, HELM_CORE_LIB_FILENAME} from './utils/utils';
20
20
  import {getMacroMol} from './utils/atomic-works';
21
21
  import {MacromoleculeSequenceCellRenderer} from './utils/cell-renderer';
@@ -180,8 +180,9 @@ export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column,
180
180
  DG.SEMTYPE.MACROMOLECULE,
181
181
  tags,
182
182
  sequenceSpace,
183
- sequenceGetSimilarities,
184
- drawSequences,
183
+ getSimilaritiesMarix,
184
+ createTooltipElement,
185
+ createPropPanelElement,
185
186
  (options as any)[methodName]);
186
187
  return sp;
187
188
  }
@@ -4,7 +4,7 @@ import * as grok from 'datagrok-api/grok';
4
4
  import * as ui from 'datagrok-api/ui';
5
5
  import * as DG from 'datagrok-api/dg';
6
6
 
7
- import {checkInputColumn} from '../package';
7
+ import {checkInputColumn, multipleSequenceAlignmentAny} from '../package';
8
8
  import {UNITS} from 'datagrok-api/dg';
9
9
  import {ALPHABET, UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
10
10
 
@@ -17,53 +17,56 @@ seq2,
17
17
  seq3,
18
18
  seq4`;
19
19
 
20
- category('MSA', () => {
20
+ test('testMsaPos', async () => {
21
+ const func: DG.Func = DG.Func.find({package: 'Bio', name: 'multipleSequenceAlignmentAny'})[0];
22
+ const funcInputColumnProperty: DG.Property = func.inputs.find((i) => i.name == 'sequence')!;
21
23
 
22
- test('testMsaPos', async () => {
23
- const func: DG.Func = DG.Func.find({package: 'Bio', name: 'multipleSequenceAlignmentAny'})[0];
24
- const funcInputColumnProperty: DG.Property = func.inputs.find((i) => i.name == 'sequence')!;
24
+ let k = 11;
25
25
 
26
- let k = 11;
26
+ const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
27
+ const col: DG.Column = df.getCol('seq');
28
+ col.semType = DG.SEMTYPE.MACROMOLECULE;
29
+ col.setTag(DG.TAGS.UNITS, 'fasta');
30
+ col.setTag(UnitsHandler.TAGS.alphabet, ALPHABET.DNA);
27
31
 
28
- const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
29
- const col: DG.Column = df.getCol('seq');
30
- col.semType = DG.SEMTYPE.MACROMOLECULE;
31
- col.setTag(DG.TAGS.UNITS, 'fasta');
32
- col.setTag(UnitsHandler.TAGS.alphabet, ALPHABET.DNA);
32
+ const [res, msg]: [boolean, string] = checkInputColumn(
33
+ col, 'Test', ['fasta',], ['DNA', 'RNA', 'PT']);
33
34
 
34
- const [res, msg]: [boolean, string] = checkInputColumn(
35
- col, 'Test', ['fasta',], ['DNA', 'RNA', 'PT']);
35
+ expect(res, true);
36
+ });
36
37
 
37
- expect(res, true);
38
- });
38
+ test('testMsaNegHelm', async () => {
39
+ const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
40
+ const col: DG.Column = df.getCol('seq');
41
+ col.semType = DG.SEMTYPE.MACROMOLECULE;
42
+ col.setTag(DG.TAGS.UNITS, 'helm');
43
+ col.setTag(UnitsHandler.TAGS.alphabetSize, '11');
44
+ col.setTag(UnitsHandler.TAGS.alphabetIsMultichar, 'true');
39
45
 
40
- test('testMsaNegHelm', async () => {
41
- const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
42
- const col: DG.Column = df.getCol('seq');
43
- col.semType = DG.SEMTYPE.MACROMOLECULE;
44
- col.setTag(DG.TAGS.UNITS, 'helm');
45
- col.setTag(UnitsHandler.TAGS.alphabetSize, '11');
46
- col.setTag(UnitsHandler.TAGS.alphabetIsMultichar, 'true');
46
+ const [res, msg]: [boolean, string] = checkInputColumn(
47
+ col, 'Test', ['fasta',], ['DNA', 'RNA', 'PT']);
47
48
 
48
- const [res, msg]: [boolean, string] = checkInputColumn(
49
- col, 'Test', ['fasta',], ['DNA', 'RNA', 'PT']);
49
+ expect(res, false);
50
+ });
50
51
 
51
- expect(res, false);
52
- });
52
+ test('testMsaNegUN', async () => {
53
+ const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
54
+ const col: DG.Column = df.getCol('seq');
55
+ col.semType = DG.SEMTYPE.MACROMOLECULE;
56
+ col.setTag(DG.TAGS.UNITS, 'fasta');
57
+ col.setTag(UnitsHandler.TAGS.alphabet, 'UN');
58
+ col.setTag(UnitsHandler.TAGS.alphabetSize, '11');
59
+ col.setTag(UnitsHandler.TAGS.alphabetIsMultichar, 'true');
53
60
 
54
- test('testMsaNegUN', async () => {
55
- const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
56
- const col: DG.Column = df.getCol('seq');
57
- col.semType = DG.SEMTYPE.MACROMOLECULE;
58
- col.setTag(DG.TAGS.UNITS, 'fasta');
59
- col.setTag(UnitsHandler.TAGS.alphabet, 'UN');
60
- col.setTag(UnitsHandler.TAGS.alphabetSize, '11');
61
- col.setTag(UnitsHandler.TAGS.alphabetIsMultichar, 'true');
61
+ const [res, msg]: [boolean, string] = checkInputColumn(
62
+ col, 'Test', ['fasta',], ['DNA', 'RNA', 'PT']);
62
63
 
63
- const [res, msg]: [boolean, string] = checkInputColumn(
64
- col, 'Test', ['fasta',], ['DNA', 'RNA', 'PT']);
64
+ expect(res, false);
65
+ });
65
66
 
66
- expect(res, false);
67
- });
67
+ test('testGetActionFunctionMeta', async () => {
68
+ const func: DG.Func = DG.Func.find({package: 'Bio', name: 'multipleSequenceAlignmentAny'})[0];
69
+ const sequenceInput: DG.Property = func.inputs.find((i) => i.name == 'sequence')!;
70
+ let k = 11;
68
71
  });
69
72
  });
@@ -5,7 +5,7 @@ import * as DG from 'datagrok-api/dg';
5
5
 
6
6
  import {ConverterFunc} from './types';
7
7
  import {NotationConverter} from '@datagrok-libraries/bio/src/utils/notation-converter';
8
- import {NOTATION} from '@datagrok-libraries/bio/src/utils/units-handler';
8
+ import {NOTATION, UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
9
9
 
10
10
  // import {mmSemType} from '../const';
11
11
  // import {importFasta} from '../package';
@@ -145,14 +145,17 @@ RNA1{P.R(U)P.R(U)P.R(C)P.R(A)P.R(A)P.R(C)P.P.P}$$$
145
145
 
146
146
  async function _testConvert(srcKey: string, converter: ConverterFunc, tgtKey: string) {
147
147
  const srcDf: DG.DataFrame = await readCsv(srcKey);
148
- const srcCol: DG.Column = srcDf.col('seq')!;
148
+ const srcCol: DG.Column = srcDf.getCol('seq');
149
149
 
150
+ // conversion results
150
151
  const resCol: DG.Column = converter(srcCol);
151
152
 
153
+ // The correct reference data to compare conversion results with.
152
154
  const tgtDf: DG.DataFrame = await readCsv(tgtKey);
153
- const tgtCol: DG.Column = tgtDf.col('seq')!;
155
+ const tgtCol: DG.Column = tgtDf.getCol('seq');
154
156
 
155
157
  expectArray(resCol.toList(), tgtCol.toList());
158
+ const uh: UnitsHandler = new UnitsHandler(resCol);
156
159
  }
157
160
 
158
161
  // FASTA tests
@@ -3,9 +3,9 @@ import {after, before, category, delay, expect, test} from '@datagrok-libraries/
3
3
  import * as grok from 'datagrok-api/grok';
4
4
  import * as DG from 'datagrok-api/dg';
5
5
  import {importFasta, multipleSequenceAlignmentAny} from '../package';
6
- import {readDataframe} from './utils';
7
6
  import {convertDo} from '../utils/convert';
8
7
  import {ALPHABET, NOTATION, UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
8
+ import {SEM_TYPES, TAGS} from '../utils/constants';
9
9
 
10
10
  category('renderers', () => {
11
11
  let tvList: DG.TableView[];
@@ -30,33 +30,45 @@ category('renderers', () => {
30
30
  await _testAfterConvert();
31
31
  });
32
32
 
33
+ test('setRenderer', async () => {
34
+ await _setRendererManually();
35
+ });
36
+
33
37
  async function _testAfterMsa() {
34
38
  const fastaTxt: string = await grok.dapi.files.readAsText('System:AppData/Bio/samples/sample_FASTA.fasta');
35
39
  const df: DG.DataFrame = importFasta(fastaTxt)[0];
40
+
41
+ const srcSeqCol: DG.Column = df.getCol('sequence');
42
+ const semType: string = await grok.functions.call('Bio:detectMacromolecule', {col: srcSeqCol});
43
+ if (semType)
44
+ srcSeqCol.semType = semType;
45
+
36
46
  const tv: DG.TableView = grok.shell.addTableView(df);
47
+ // call to calculate 'cell.renderer' tag
37
48
  await grok.data.detectSemanticTypes(df);
38
- console.log('Bio: tests/renderers/afterMsa, table view');
39
49
 
40
- const srcSeqCol: DG.Column | null = df.col('sequence');
41
- expect(srcSeqCol !== null, true);
50
+ console.log('Bio: tests/renderers/afterMsa, table view');
42
51
 
43
52
  console.log('Bio: tests/renderers/afterMsa, src before test ' +
44
53
  `semType="${srcSeqCol!.semType}", units="${srcSeqCol!.getTag(DG.TAGS.UNITS)}", ` +
45
54
  `cell.renderer="${srcSeqCol!.getTag(DG.TAGS.CELL_RENDERER)}"`);
46
- expect(srcSeqCol!.semType, DG.SEMTYPE.MACROMOLECULE);
47
- expect(srcSeqCol!.getTag(DG.TAGS.UNITS), NOTATION.FASTA);
48
- expect(srcSeqCol!.getTag(UnitsHandler.TAGS.aligned), 'SEQ');
49
- expect(srcSeqCol!.getTag(UnitsHandler.TAGS.alphabet), ALPHABET.PT);
50
- expect(srcSeqCol!.getTag(DG.TAGS.CELL_RENDERER), 'sequence');
55
+ expect(srcSeqCol.semType, DG.SEMTYPE.MACROMOLECULE);
56
+ expect(srcSeqCol.getTag(DG.TAGS.UNITS), NOTATION.FASTA);
57
+ expect(srcSeqCol.getTag(UnitsHandler.TAGS.aligned), 'SEQ');
58
+ expect(srcSeqCol.getTag(UnitsHandler.TAGS.alphabet), ALPHABET.PT);
59
+ expect(srcSeqCol.getTag(DG.TAGS.CELL_RENDERER), 'sequence');
51
60
 
52
- const msaSeqCol: DG.Column | null = await multipleSequenceAlignmentAny(df, srcSeqCol!);
61
+ const msaSeqCol: DG.Column = (await multipleSequenceAlignmentAny(df, srcSeqCol!))!;
53
62
  tv.grid.invalidate();
54
63
 
55
- expect(msaSeqCol!.semType, DG.SEMTYPE.MACROMOLECULE);
56
- expect(msaSeqCol!.getTag(DG.TAGS.UNITS), NOTATION.FASTA);
57
- expect(msaSeqCol!.getTag(UnitsHandler.TAGS.aligned), 'SEQ.MSA');
58
- expect(msaSeqCol!.getTag(UnitsHandler.TAGS.alphabet), ALPHABET.PT);
59
- expect(msaSeqCol!.getTag(DG.TAGS.CELL_RENDERER), 'sequence');
64
+ expect(msaSeqCol.semType, DG.SEMTYPE.MACROMOLECULE);
65
+ expect(msaSeqCol.getTag(DG.TAGS.UNITS), NOTATION.FASTA);
66
+ expect(msaSeqCol.getTag(UnitsHandler.TAGS.aligned), 'SEQ.MSA');
67
+ expect(msaSeqCol.getTag(UnitsHandler.TAGS.alphabet), ALPHABET.PT);
68
+ expect(msaSeqCol.getTag(DG.TAGS.CELL_RENDERER), 'sequence');
69
+
70
+ // check newColumn with UnitsHandler constructor
71
+ const uh: UnitsHandler = new UnitsHandler(msaSeqCol);
60
72
 
61
73
  dfList.push(df);
62
74
  tvList.push(tv);
@@ -66,13 +78,35 @@ category('renderers', () => {
66
78
  const csv: string = await grok.dapi.files.readAsText('System:AppData/Bio/samples/sample_FASTA_PT.csv');
67
79
  const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
68
80
  const tv: DG.TableView = grok.shell.addTableView(df);
69
- await grok.data.detectSemanticTypes(df);
70
81
 
71
82
  const srcCol: DG.Column = df.col('sequence')!;
83
+ // await grok.data.detectSemanticTypes(df);
84
+ const semType: string = await grok.functions.call('Bio:detectMacromolecule', {col: srcCol});
85
+ if (semType)
86
+ srcCol.semType = semType;
87
+ await grok.data.detectSemanticTypes(df);
88
+
72
89
  const tgtCol: DG.Column = await convertDo(srcCol, NOTATION.SEPARATOR, '/');
73
90
  expect(tgtCol.getTag(DG.TAGS.CELL_RENDERER), 'sequence');
74
91
 
92
+ // check tgtCol with UnitsHandler constructor
93
+ const uh: UnitsHandler = new UnitsHandler(tgtCol);
94
+
75
95
  tvList.push(tv);
76
96
  dfList.push(df);
77
- };
97
+ }
98
+
99
+ async function _setRendererManually() {
100
+ const df = DG.DataFrame.fromColumns([DG.Column.fromStrings(
101
+ 'SequencesDiff', ['meI/hHis/Aca/N/T/dK/Thr_PO3H2/Aca#D-Tyr_Et/Tyr_ab-dehydroMe/meN/E/N/dV'])]);
102
+ df.col('SequencesDiff')!.tags[DG.TAGS.UNITS] = 'separator';
103
+ df.col('SequencesDiff')!.tags[TAGS.SEPARATOR] = '/';
104
+ df.col('SequencesDiff')!.semType = SEM_TYPES.MACROMOLECULE_DIFFERENCE;
105
+ const tw = grok.shell.addTableView(df);
106
+ await delay(100);
107
+ const renderer = tw.dataFrame.col('SequencesDiff')?.getTag(DG.TAGS.CELL_RENDERER);
108
+ if (renderer !== 'MacromoleculeDifferenceCR')
109
+ throw new Error(`Units 'separator', separator '/' and semType 'MacromoleculeDifference' have been ` +
110
+ `manually set on column but after df aws added as table view renderer has been reset to '${renderer}'`);
111
+ }
78
112
  });
@@ -1,10 +1,15 @@
1
1
  import * as DG from 'datagrok-api/dg';
2
2
  import * as grok from 'datagrok-api/grok';
3
- import { expect } from '@datagrok-libraries/utils/src/test';
4
- import { sequenceSpaceTopMenu } from '../package';
3
+ import {expect} from '@datagrok-libraries/utils/src/test';
4
+ import {sequenceSpaceTopMenu} from '../package';
5
5
 
6
6
  export async function _testSequenceSpaceReturnsResult(df: DG.DataFrame, algorithm: string, colName: string) {
7
- await grok.data.detectSemanticTypes(df);
7
+ // await grok.data.detectSemanticTypes(df);
8
+ const col: DG.Column = df.getCol(colName);
9
+ const semType: string = await grok.functions.call('Bio:detectMacromolecule', {col: col});
10
+ if (semType)
11
+ col.semType = semType;
12
+
8
13
  const sp = await sequenceSpaceTopMenu(df, df.col(colName)!, algorithm, 'Levenshtein', true);
9
14
  expect(sp != null, true);
10
15
  }
@@ -1,27 +1,22 @@
1
1
  import * as DG from 'datagrok-api/dg';
2
2
  import * as grok from 'datagrok-api/grok';
3
+ import {DataFrame} from 'datagrok-api/dg';
3
4
 
4
- export function generateManySequences(): string {
5
- let csvData = `MSA,Activity
6
- meI/hHis/Aca/N/T/dE/Thr_PO3H2/Aca/D-Tyr_Et/Tyr_ab-dehydroMe/dV/E/N/D-Orn/D-aThr//Phe_4Me,5.30751`;
7
- for (let i = 0; i < 10 ** 6; i++) {
8
- csvData += `\n meI/hHis/Aca/N/T/dE/Thr_PO3H2/Aca/D-Tyr_Et/Tyr_ab-dehydroMe/dV/E/N/D-Orn/D-aThr//Phe_4Me,5.30751`;
9
- }
10
- return csvData;
5
+ export function generateManySequences(): DG.Column[] {
6
+ let columns: DG.Column[] = [];
7
+ columns.push(DG.Column.fromList('string', 'MSA', new Array(10 ** 6).fill('meI/hHis/Aca/N/T/dE/Thr_PO3H2/Aca/D-Tyr_Et/Tyr_ab-dehydroMe/dV/E/N/D-Orn/D-aThr//Phe_4Me')));
8
+ columns.push(DG.Column.fromList('string', 'Activity', new Array(10 ** 6).fill('5.30751')));
9
+ return columns;
11
10
  }
12
11
 
13
- export function generateLongSequence(): string {
14
- let longSequence = `meI/hHis/Aca/N/T/dE/Thr_PO3H2/Aca/D-Tyr_Et/Tyr_ab-dehydroMe/dV/E/N/D-Orn/D-aThr`;
15
- for (let i = 0; i < 10 ** 5; i++) {
16
- longSequence += `/Aca/N/T/dE/Thr_PO3H2/Aca/D-Tyr_Et/Tyr_ab-dehydroMe/dV/dv`;
17
- }
18
- longSequence += `//Phe_4Me,5.30751`;
19
- let csvData = `MSA,Activity `;
20
- for (let i = 0; i <= 10 ** 1 * 4; i++) {
21
- csvData += `\n ${longSequence}`;
22
- }
23
- return csvData;
12
+ export function generateLongSequence(): DG.Column[] {
13
+ let columns: DG.Column[] = [];
14
+ const longSequence = `meI/hHis/Aca/N/T/dE/Thr_PO3H2/Aca/D-Tyr_Et/Tyr_ab-dehydroMe/dV/E/N/D-Orn/D-aThr`.repeat(10 ** 5);
15
+ columns.push(DG.Column.fromList('string', 'MSA', new Array(10 ** 2).fill(longSequence)));
16
+ columns.push(DG.Column.fromList('string', 'Activity', new Array(10 ** 2).fill('7.30751')));
17
+ return columns;
24
18
  }
19
+
25
20
  export function setTagsMacromolecule(col: DG.Column) {
26
21
  col.semType = DG.SEMTYPE.MACROMOLECULE;
27
22
  col.setTag('units', 'separator');
@@ -31,10 +26,10 @@ export function setTagsMacromolecule(col: DG.Column) {
31
26
  return col;
32
27
  }
33
28
 
34
- export function performanceTest(generateFunc: () => string,testName: string) {
29
+ export function performanceTest(generateFunc: () => DG.Column[], testName: string) {
30
+ const columns = generateFunc();
31
+ const df: DG.DataFrame = DG.DataFrame.fromColumns(columns);
35
32
  const startTime: number = Date.now();
36
- const csv = generateFunc();
37
- const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
38
33
  const col: DG.Column = df.columns.byName('MSA');
39
34
  setTagsMacromolecule(col);
40
35
  grok.shell.addTableView(df);
@@ -2,7 +2,7 @@ import * as C from './constants';
2
2
  import * as DG from 'datagrok-api/dg';
3
3
  import {AminoacidsPalettes} from '@datagrok-libraries/bio/src/aminoacids';
4
4
  import {NucleotidesPalettes} from '@datagrok-libraries/bio/src/nucleotides';
5
- import {UnknownSeqPalette, UnknownSeqPalettes} from '@datagrok-libraries/bio/src/unknown';
5
+ import {UnknownSeqPalettes} from '@datagrok-libraries/bio/src/unknown';
6
6
  import {SplitterFunc, WebLogo} from '@datagrok-libraries/bio/src/viewers/web-logo';
7
7
  import {SeqPalette} from '@datagrok-libraries/bio/src/seq-palettes';
8
8
  import * as ui from 'datagrok-api/ui';
@@ -14,7 +14,7 @@ const monomerToShortFunction: (amino: string, maxLengthOfMonomer: number) => str
14
14
  const gapRenderer = 5;
15
15
 
16
16
 
17
- function getPalleteByType(paletteType: string): SeqPalette {
17
+ function getPaletteByType(paletteType: string): SeqPalette {
18
18
  switch (paletteType) {
19
19
  case 'PT':
20
20
  return AminoacidsPalettes.GrokGroups;
@@ -30,6 +30,10 @@ function getPalleteByType(paletteType: string): SeqPalette {
30
30
  }
31
31
  }
32
32
 
33
+ function getUpdatedWidth(grid: DG.Grid | null, g: CanvasRenderingContext2D, x: number, w: number): number {
34
+ return grid ? Math.min(grid.canvas.width - x, w) : g.canvas.width - x;
35
+ }
36
+
33
37
  export function processSequence(subParts: string[]): [string[], boolean] {
34
38
  const simplified = !subParts.some((amino, index) =>
35
39
  amino.length > 1 &&
@@ -63,8 +67,7 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
63
67
  }
64
68
  const maxLengthWordsSum = gridCell.cell.column.temp['bio-sum-maxLengthWords'];
65
69
  const maxIndex = gridCell.cell.column.temp['bio-maxIndex'];
66
- //@ts-ignore
67
- const argsX = e.layerX - gridCell.gridColumn.left + (gridCell.gridColumn.left - gridCell.bounds.x);
70
+ const argsX = e.offsetX - gridCell.gridColumn.left + (gridCell.gridColumn.left - gridCell.bounds.x);
68
71
  let left = 0;
69
72
  let right = maxIndex;
70
73
  let found = false;
@@ -109,23 +112,22 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
109
112
  g: CanvasRenderingContext2D, x: number, y: number, w: number, h: number, gridCell: DG.GridCell,
110
113
  cellStyle: DG.GridCellStyle
111
114
  ): void {
112
- const grid = gridCell.gridRow !== -1 ? gridCell.grid : undefined;
115
+ const grid = gridCell.gridRow !== -1 ? gridCell.grid : null;
113
116
  const cell = gridCell.cell;
114
- const [type, subtype, paletteType] = gridCell.cell.column.getTag(DG.TAGS.UNITS).split(':');
117
+ const paletteType = gridCell.cell.column.getTag(C.TAGS.ALPHABET);
115
118
  const minDistanceRenderer = 50;
116
- w = grid ? Math.min(grid.canvas.width - x, w) : g.canvas.width - x;
119
+ w = getUpdatedWidth(grid, g, x, w);
117
120
  g.save();
118
121
  g.beginPath();
119
122
  g.rect(x, y, w, h);
120
123
  g.clip();
121
124
  g.font = '12px monospace';
122
125
  g.textBaseline = 'top';
123
- const s: string = cell.value ?? '';
124
126
 
125
127
  //TODO: can this be replaced/merged with splitSequence?
126
128
  const units = gridCell.cell.column.getTag(DG.TAGS.UNITS);
127
129
 
128
- const palette = getPalleteByType(paletteType);
130
+ const palette = getPaletteByType(paletteType);
129
131
 
130
132
  const separator = gridCell.cell.column.getTag('separator') ?? '';
131
133
  const splitLimit = gridCell.bounds.width / 5;
@@ -184,10 +186,8 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
184
186
  g.fillStyle = undefinedColor;
185
187
  let last = index === subParts.length - 1;
186
188
  x1 = printLeftOrCentered(x1, y, w, h, g, monomerToShortFunction(amino, maxLengthOfMonomer), color, 0, true, 1.0, separator, last, drawStyle, maxLengthWords, index, gridCell);
187
- if (x1 - minDistanceRenderer - gridCell.gridColumn.left + (gridCell.gridColumn.left - gridCell.bounds.x) > gridCell.bounds.width) {
188
- return false;
189
- }
190
- return true;
189
+ return x1 - minDistanceRenderer - gridCell.gridColumn.left + (gridCell.gridColumn.left - gridCell.bounds.x) <= gridCell.bounds.width;
190
+
191
191
  });
192
192
 
193
193
  g.restore();
@@ -226,7 +226,7 @@ export class MonomerCellRenderer extends DG.GridCellRenderer {
226
226
  g.font = `12px monospace`;
227
227
  g.textBaseline = 'top';
228
228
 
229
- const palette = getPalleteByType(gridCell.tableColumn!.tags[C.TAGS.ALPHABET]);
229
+ const palette = getPaletteByType(gridCell.tableColumn!.tags[C.TAGS.ALPHABET]);
230
230
  const s: string = gridCell.cell.value ? gridCell.cell.value : '-';
231
231
  const color = palette.get(s);
232
232
 
@@ -262,7 +262,7 @@ export class MacromoleculeDifferenceCellRenderer extends DG.GridCellRenderer {
262
262
  const grid = gridCell.grid;
263
263
  const cell = gridCell.cell;
264
264
 
265
- w = grid ? Math.min(grid.canvas.width - x, w) : g.canvas.width - x;
265
+ w = getUpdatedWidth(grid, g, w, x);
266
266
  g.save();
267
267
  g.beginPath();
268
268
  g.rect(x, y, w, h);
@@ -286,7 +286,7 @@ export class MacromoleculeDifferenceCellRenderer extends DG.GridCellRenderer {
286
286
 
287
287
  let palette: SeqPalette = UnknownSeqPalettes.Color;
288
288
  if (units != 'HELM')
289
- palette = getPalleteByType(units.substring(units.length - 2));
289
+ palette = getPaletteByType(units.substring(units.length - 2));
290
290
 
291
291
  const vShift = 7;
292
292
  for (let i = 0; i < subParts1.length; i++) {
@@ -5,7 +5,7 @@ import $ from 'cash-dom';
5
5
 
6
6
  import {Subscription} from 'rxjs';
7
7
  import {NotationConverter} from '@datagrok-libraries/bio/src/utils/notation-converter';
8
- import {NOTATION} from '@datagrok-libraries/bio/src/utils/units-handler';
8
+ import {NOTATION, UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
9
9
 
10
10
 
11
11
  let convertDialog: DG.Dialog | null = null;
@@ -85,6 +85,9 @@ export async function convertDo(
85
85
  const converter = new NotationConverter(srcCol);
86
86
  const newColumn = converter.convert(targetNotation, separator);
87
87
  srcCol.dataFrame.columns.add(newColumn);
88
+
89
+ // call to calculate 'cell.renderer' tag
88
90
  await grok.data.detectSemanticTypes(srcCol.dataFrame);
91
+
89
92
  return newColumn;
90
93
  }
@@ -3,18 +3,115 @@ import * as DG from 'datagrok-api/dg';
3
3
  import * as ui from 'datagrok-api/ui';
4
4
  import {getSimilarityFromDistance} from '@datagrok-libraries/utils/src/similarity-metrics';
5
5
  import {AvailableMetrics} from '@datagrok-libraries/ml/src/typed-metrics';
6
+ import * as grok from 'datagrok-api/grok';
7
+ import { SplitterFunc, WebLogo } from '@datagrok-libraries/bio/src/viewers/web-logo';
8
+ import { UnitsHandler } from '@datagrok-libraries/bio/src/utils/units-handler';
6
9
 
7
- export async function sequenceGetSimilarities(col: DG.Column, seq: string): Promise<DG.Column | null> {
10
+ export async function getDistances(col: DG.Column, seq: string): Promise<Array<number>> {
8
11
  const stringArray = col.toList();
9
- const distances = new Array(stringArray.length).fill(0.0);
10
- for (let i = 0; i < stringArray.length; ++i)
11
- distances[i] = stringArray[i] ? getSimilarityFromDistance(AvailableMetrics['String']['Levenshtein'](stringArray[i], seq)) : 0;
12
- return DG.Column.fromList(DG.COLUMN_TYPE.FLOAT, 'distances', distances);
12
+ const distances = new Array(stringArray.length).fill(0);
13
+ for (let i = 0; i < stringArray.length; ++i) {
14
+ const distance = stringArray[i] ? AvailableMetrics['String']['Levenshtein'](stringArray[i], seq) : null;
15
+ distances[i] = distance ? distance/Math.max((stringArray[i] as string).length, seq.length) : null;
16
+ }
17
+ return distances;
13
18
  }
14
19
 
15
- export function drawSequences(params: ITooltipAndPanelParams) {
16
- params.line.mols.forEach((mol: number, index: number) => {
17
- ui.empty(params.hosts[index]);
18
- params.hosts[index].append(ui.divText(params.seqCol.get(mol)));
20
+ export async function getSimilaritiesMarix(dim: number, seqCol: DG.Column, df: DG.DataFrame, colName: string, simArr: DG.Column[])
21
+ : Promise<DG.Column[]> {
22
+
23
+ const distances = new Array(simArr.length).fill(null);
24
+ for (let i = 0; i != dim - 1; ++i) {
25
+ const seq: string = seqCol.get(i);
26
+ df.rows.removeAt(0, 1, false);
27
+ distances[i] = (await getDistances(df.col(colName)!, seq))!;
28
+ }
29
+
30
+ for (let i = 0; i < distances.length; i++) {
31
+ for (let j = 0; j < distances[i].length; j++) {
32
+ distances[i][j] = getSimilarityFromDistance(distances[i][j]);
33
+ }
34
+ simArr[i] = DG.Column.fromList(DG.COLUMN_TYPE.FLOAT, 'distances', distances[i]);
35
+ }
36
+ return simArr;
37
+ }
38
+
39
+ export function createTooltipElement(params: ITooltipAndPanelParams): HTMLDivElement {
40
+ const tooltipElement = ui.divH([]);
41
+ const columnNames = ui.divV([
42
+ ui.divText(params.seqCol.name),
43
+ ui.divText(params.activityCol.name),
44
+ ]);
45
+ columnNames.style.fontWeight = 'bold';
46
+ columnNames.style.display = 'flex';
47
+ columnNames.style.justifyContent = 'space-between';
48
+ tooltipElement.append(columnNames);
49
+ params.line.mols.forEach((molIdx: number, idx: number) => {
50
+ const activity = ui.divText(params.activityCol.get(molIdx).toFixed(2));
51
+ activity.style.display = 'flex';
52
+ activity.style.justifyContent = 'left';
53
+ activity.style.paddingLeft = '30px';
54
+ tooltipElement.append(ui.divV([
55
+ ui.divText(params.seqCol.get(molIdx)),
56
+ activity,
57
+ ]));
19
58
  });
59
+ return tooltipElement;
20
60
  }
61
+
62
+ function moleculeInfo(df: DG.DataFrame, idx: number, seqColName: string): HTMLElement {
63
+ let dict: {[key: string]: string} = {};
64
+ for (let col of df.columns) {
65
+ if(col.name !== seqColName) {
66
+ dict[col.name] = df.get(col.name, idx);
67
+ }
68
+ }
69
+ return ui.tableFromMap(dict);
70
+ }
71
+
72
+
73
+ export function createPropPanelElement(params: ITooltipAndPanelParams): HTMLDivElement {
74
+ const propPanel = ui.divV([]);
75
+ const columnNames = ui.divH([
76
+ ui.divText(params.seqCol.name),
77
+ ui.divText(params.activityCol.name),
78
+ ]);
79
+ columnNames.style.fontWeight = 'bold';
80
+ columnNames.style.justifyContent = 'space-between';
81
+ propPanel.append(columnNames);
82
+ const hosts: HTMLDivElement[] = [];
83
+ params.line.mols.forEach((molIdx: number, hostIdx: number) => {
84
+ const activity = ui.divText(params.activityCol.get(molIdx).toFixed(2));
85
+ activity.style.paddingLeft = '15px';
86
+ activity.style.paddingLeft = '10px';
87
+ const molHost = ui.divText(params.seqCol.get(molIdx));
88
+ if (params.df.currentRowIdx === molIdx) {
89
+ molHost.style.border = 'solid 1px lightgrey';
90
+ }
91
+ //@ts-ignore
92
+ ui.tooltip.bind(molHost, () => moleculeInfo(params.df, molIdx, params.seqCol.name));
93
+ molHost.onclick = () => {
94
+ const obj = grok.shell.o;
95
+ molHost.style.border = 'solid 1px lightgrey';
96
+ params.df.currentRowIdx = molIdx;
97
+ hosts.forEach((h, i) => {
98
+ if (i !== hostIdx) {
99
+ h.style.border = '';
100
+ }
101
+ })
102
+ setTimeout(() => {
103
+ grok.shell.o = obj
104
+ }, 1000);
105
+ };
106
+ propPanel.append(ui.divH([
107
+ molHost,
108
+ activity,
109
+ ]));
110
+ hosts.push(molHost);
111
+ });
112
+ propPanel.append(ui.divH([
113
+ ui.divText(`Cliff: `, {style: {fontWeight: 'bold', paddingRight: '5px'}}),
114
+ ui.divText(params.sali!.toFixed(2))
115
+ ]));
116
+ return propPanel;
117
+ }