@datagrok/bio 1.7.14 → 1.7.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/detectors.js +5 -4
- package/dist/package-test.js +371 -203
- package/dist/package.js +236 -166
- package/files/tests/testDmvOffices.csv +514 -0
- package/package.json +5 -9
- package/setup +35 -0
- package/setup-unlink-clean +22 -0
- package/setup-unlink-clean.cmd +15 -0
- package/setup.cmd +15 -19
- package/src/package-test.ts +1 -0
- package/src/package.ts +52 -83
- package/src/tests/WebLogo-positions-test.ts +68 -0
- package/src/tests/activity-cliffs-tests.ts +2 -2
- package/src/tests/detectors-test.ts +10 -1
- package/src/tests/msa-tests.ts +45 -20
- package/src/tests/utils.ts +0 -12
- package/src/utils/cell-renderer.ts +16 -29
- package/src/utils/multiple-sequence-alignment.ts +7 -11
- package/src/utils/utils.ts +6 -6
- package/src/widgets/representations.ts +1 -0
- package/{test-Bio-34f75e5127b8-92b3a565.html → test-Bio-4f0c8bae6479-ddc1ded2.html} +8 -11
package/setup.cmd
CHANGED
|
@@ -1,19 +1,15 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
cd
|
|
13
|
-
call npm
|
|
14
|
-
call npm link
|
|
15
|
-
call npm
|
|
16
|
-
cd ../../packages/Bio
|
|
17
|
-
call npm install
|
|
18
|
-
call npm link datagrok-api @datagrok-libraries/bio @datagrok-libraries/utils @datagrok-libraries/ml
|
|
19
|
-
webpack
|
|
1
|
+
call setup-unlink-clean.cmd
|
|
2
|
+
|
|
3
|
+
set package_dir=%cd%
|
|
4
|
+
|
|
5
|
+
set dirs=^
|
|
6
|
+
\..\..\js-api\ ^
|
|
7
|
+
\..\..\libraries\utils\ ^
|
|
8
|
+
\..\..\libraries\ml\ ^
|
|
9
|
+
\..\..\libraries\bio\ ^
|
|
10
|
+
\
|
|
11
|
+
|
|
12
|
+
for %%p in (%dirs%) do cd %package_dir%\%%p & call npm install
|
|
13
|
+
for %%p in (%dirs%) do cd %package_dir%\%%p & call npm link
|
|
14
|
+
for %%p in (%dirs%) do cd %package_dir%\%%p & call npm run link-all
|
|
15
|
+
for %%p in (%dirs%) do cd %package_dir%\%%p & call npm run build
|
package/src/package-test.ts
CHANGED
package/src/package.ts
CHANGED
|
@@ -22,14 +22,21 @@ import {MacromoleculeSequenceCellRenderer} from './utils/cell-renderer';
|
|
|
22
22
|
import {convert} from './utils/convert';
|
|
23
23
|
import {lru} from './utils/cell-renderer';
|
|
24
24
|
import {representationsWidget} from './widgets/representations';
|
|
25
|
+
import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
26
|
+
import {FastaFileHandler} from '@datagrok-libraries/bio/src/utils/fasta-handler';
|
|
27
|
+
|
|
28
|
+
|
|
25
29
|
|
|
26
30
|
//tags: init
|
|
27
31
|
export async function initBio(): Promise<void> {
|
|
28
32
|
// apparently HELMWebEditor requires dojo to be initialized first
|
|
33
|
+
if (DG.Func.find({package: 'Helm', name: 'initHelm'}) != null) {
|
|
34
|
+
grok.functions.call('Helm:initHelp');
|
|
35
|
+
}
|
|
29
36
|
return new Promise((resolve, reject) => {
|
|
30
37
|
// @ts-ignore
|
|
31
38
|
dojo.ready(function() { resolve(null); });
|
|
32
|
-
});
|
|
39
|
+
});
|
|
33
40
|
}
|
|
34
41
|
|
|
35
42
|
//name: Lru
|
|
@@ -240,26 +247,53 @@ export async function compositionAnalysis(): Promise<void> {
|
|
|
240
247
|
// Higher priority for columns with MSA data to show with WebLogo.
|
|
241
248
|
const tv = grok.shell.tv;
|
|
242
249
|
const df = tv.dataFrame;
|
|
250
|
+
//@ts-ignore
|
|
251
|
+
const colList: DG.Column[] = df.columns.toList().filter((col) => {
|
|
252
|
+
if (col.semType != DG.SEMTYPE.MACROMOLECULE)
|
|
253
|
+
return false;
|
|
254
|
+
|
|
255
|
+
const colUH = new UnitsHandler(col);
|
|
256
|
+
// TODO: prevent for cyclic, branched or multiple chains in Helm
|
|
257
|
+
return true;
|
|
258
|
+
});
|
|
259
|
+
|
|
260
|
+
const handler = async (col: DG.Column) => {
|
|
261
|
+
if (!checkInputColumn(col, 'Composition'))
|
|
262
|
+
return;
|
|
263
|
+
|
|
264
|
+
const wlViewer = tv.addViewer('WebLogo', {sequenceColumnName: col.name});
|
|
265
|
+
grok.shell.tv.dockManager.dock(wlViewer, DG.DOCK_TYPE.DOWN, null, 'Composition analysis', 0.25);
|
|
266
|
+
};
|
|
243
267
|
|
|
244
|
-
|
|
245
|
-
if (
|
|
268
|
+
let col: DG.Column | null = null;
|
|
269
|
+
if (colList.length == 0) {
|
|
246
270
|
grok.shell.error('Current table does not contain sequences');
|
|
247
271
|
return;
|
|
272
|
+
} else if (colList.length > 1) {
|
|
273
|
+
const colListNames: string [] = colList.map((col) => col.name);
|
|
274
|
+
const colInput: DG.InputBase = ui.choiceInput('Column', colListNames[0], colListNames);
|
|
275
|
+
ui.dialog({
|
|
276
|
+
title: 'R-Groups Analysis',
|
|
277
|
+
helpUrl: '/help/domains/bio/macromolecules.md#composition-analysis'
|
|
278
|
+
})
|
|
279
|
+
.add(ui.div([
|
|
280
|
+
colInput,
|
|
281
|
+
]))
|
|
282
|
+
.onOK(async () => {
|
|
283
|
+
const col: DG.Column | null = colList.find((col) => col.name == colInput.value) ?? null;
|
|
284
|
+
|
|
285
|
+
if (col)
|
|
286
|
+
await handler(col);
|
|
287
|
+
})
|
|
288
|
+
.show();
|
|
289
|
+
} else {
|
|
290
|
+
col = colList[0];
|
|
248
291
|
}
|
|
249
292
|
|
|
250
|
-
if (!
|
|
293
|
+
if (!col)
|
|
251
294
|
return;
|
|
252
295
|
|
|
253
|
-
|
|
254
|
-
const units = col.getTag(DG.TAGS.UNITS);
|
|
255
|
-
if (!allowedNotations.some((n) => units.toUpperCase().startsWith(n.toUpperCase()))) {
|
|
256
|
-
grok.shell.warning('Composition analysis is allowed for ' +
|
|
257
|
-
`notation${allowedNotations.length > 1 ? 's' : ''} ${allowedNotations.map((n) => `"${n}"`).join(', ')}.`);
|
|
258
|
-
return;
|
|
259
|
-
}
|
|
260
|
-
|
|
261
|
-
const wlViewer = tv.addViewer('WebLogo', {sequenceColumnName: col.name});
|
|
262
|
-
grok.shell.tv.dockManager.dock(wlViewer, DG.DOCK_TYPE.DOWN, null, 'Composition analysis', 0.25);
|
|
296
|
+
await handler(col);
|
|
263
297
|
}
|
|
264
298
|
|
|
265
299
|
//top-menu: Bio | Sdf to Json lib...
|
|
@@ -269,17 +303,6 @@ export async function sdfToJsonLib(table: DG.DataFrame) {
|
|
|
269
303
|
const jsonMonomerLibrary = createJsonMonomerLibFromSdf(table);
|
|
270
304
|
}
|
|
271
305
|
|
|
272
|
-
// helper function for importFasta
|
|
273
|
-
function parseMacromolecule(
|
|
274
|
-
fileContent: string,
|
|
275
|
-
startOfSequence: number,
|
|
276
|
-
endOfSequence: number
|
|
277
|
-
): string {
|
|
278
|
-
const seq = fileContent.slice(startOfSequence, endOfSequence);
|
|
279
|
-
const seqArray = seq.split(/\s/);
|
|
280
|
-
return seqArray.join('');
|
|
281
|
-
}
|
|
282
|
-
|
|
283
306
|
//name: Representations
|
|
284
307
|
//tags: panel, widgets
|
|
285
308
|
//input: cell macroMolecule {semType: Macromolecule}
|
|
@@ -287,7 +310,7 @@ function parseMacromolecule(
|
|
|
287
310
|
export async function peptideMolecule(macroMolecule: DG.Cell): Promise<DG.Widget> {
|
|
288
311
|
const monomersLibFile = await _package.files.readAsText(HELM_CORE_LIB_FILENAME);
|
|
289
312
|
const monomersLibObject: any[] = JSON.parse(monomersLibFile);
|
|
290
|
-
|
|
313
|
+
|
|
291
314
|
return representationsWidget(macroMolecule, monomersLibObject);
|
|
292
315
|
}
|
|
293
316
|
|
|
@@ -298,63 +321,8 @@ export async function peptideMolecule(macroMolecule: DG.Cell): Promise<DG.Widget
|
|
|
298
321
|
//input: string fileContent
|
|
299
322
|
//output: list tables
|
|
300
323
|
export function importFasta(fileContent: string): DG.DataFrame [] {
|
|
301
|
-
const
|
|
302
|
-
|
|
303
|
-
const sequencesArray: string[] = [];
|
|
304
|
-
let startOfSequence = 0;
|
|
305
|
-
let match; // match.index is the beginning of the matched line
|
|
306
|
-
while (match = regex.exec(fileContent)) {
|
|
307
|
-
const description = fileContent.substring(match.index + 1, regex.lastIndex);
|
|
308
|
-
descriptionsArray.push(description);
|
|
309
|
-
if (startOfSequence !== 0)
|
|
310
|
-
sequencesArray.push(parseMacromolecule(fileContent, startOfSequence, match.index));
|
|
311
|
-
startOfSequence = regex.lastIndex + 1;
|
|
312
|
-
}
|
|
313
|
-
sequencesArray.push(parseMacromolecule(fileContent, startOfSequence, -1));
|
|
314
|
-
const descriptionsArrayCol = DG.Column.fromStrings('description', descriptionsArray);
|
|
315
|
-
const sequenceCol = DG.Column.fromStrings('sequence', sequencesArray);
|
|
316
|
-
sequenceCol.semType = 'Macromolecule';
|
|
317
|
-
const stats: SeqColStats = WebLogo.getStats(sequenceCol, 5, WebLogo.splitterAsFasta);
|
|
318
|
-
const seqType = stats.sameLength ? 'SEQ.MSA' : 'SEQ';
|
|
319
|
-
|
|
320
|
-
const PeptideFastaAlphabet = new Set([
|
|
321
|
-
'G', 'L', 'Y', 'S', 'E', 'Q', 'D', 'N', 'F', 'A',
|
|
322
|
-
'K', 'R', 'H', 'C', 'V', 'P', 'W', 'I', 'M', 'T',
|
|
323
|
-
]);
|
|
324
|
-
|
|
325
|
-
const DnaFastaAlphabet = new Set(['A', 'C', 'G', 'T']);
|
|
326
|
-
|
|
327
|
-
const RnaFastaAlphabet = new Set(['A', 'C', 'G', 'U']);
|
|
328
|
-
|
|
329
|
-
//const SmilesRawAlphabet = new Set([
|
|
330
|
-
// 'O', 'C', 'c', 'N', 'S', 'F', '(', ')',
|
|
331
|
-
// '1', '2', '3', '4', '5', '6', '7',
|
|
332
|
-
// '+', '-', '@', '[', ']', '/', '\\', '#', '=']);
|
|
333
|
-
|
|
334
|
-
const alphabetCandidates: [string, Set<string>][] = [
|
|
335
|
-
['PT', PeptideFastaAlphabet],
|
|
336
|
-
['DNA', DnaFastaAlphabet],
|
|
337
|
-
['RNA', RnaFastaAlphabet],
|
|
338
|
-
];
|
|
339
|
-
|
|
340
|
-
//const alphabetCandidates: [string, Set<string>][] = [
|
|
341
|
-
// ['NT', new Set(Object.keys(Nucleotides.Names))],
|
|
342
|
-
// ['PT', new Set(Object.keys(Aminoacids.Names))],
|
|
343
|
-
//];
|
|
344
|
-
|
|
345
|
-
// Calculate likelihoods for alphabet_candidates
|
|
346
|
-
const alphabetCandidatesSim: number[] = alphabetCandidates.map(
|
|
347
|
-
(c) => WebLogo.getAlphabetSimilarity(stats.freq, c[1]));
|
|
348
|
-
const maxCos = Math.max(...alphabetCandidatesSim);
|
|
349
|
-
const alphabet = maxCos > 0.65 ? alphabetCandidates[alphabetCandidatesSim.indexOf(maxCos)][0] : 'UN';
|
|
350
|
-
sequenceCol.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
351
|
-
const units: string = `fasta:${seqType}:${alphabet}`;
|
|
352
|
-
sequenceCol.setTag(DG.TAGS.UNITS, units);
|
|
353
|
-
|
|
354
|
-
return [DG.DataFrame.fromColumns([
|
|
355
|
-
descriptionsArrayCol,
|
|
356
|
-
sequenceCol,
|
|
357
|
-
])];
|
|
324
|
+
const ffh = new FastaFileHandler(fileContent);
|
|
325
|
+
return ffh.importFasta();
|
|
358
326
|
}
|
|
359
327
|
|
|
360
328
|
//name: Bio | Convert ...
|
|
@@ -388,6 +356,7 @@ export async function testDetectMacromolecule(path: string): Promise<DG.DataFram
|
|
|
388
356
|
const pi = DG.TaskBarProgressIndicator.create('Test detectMacromolecule...');
|
|
389
357
|
|
|
390
358
|
const fileList = await grok.dapi.files.list(path, true, '');
|
|
359
|
+
//@ts-ignore
|
|
391
360
|
const fileListToTest = fileList.filter((fi) => fi.fileName.endsWith('.csv'));
|
|
392
361
|
|
|
393
362
|
let readyCount = 0;
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
import {after, before, category, test, expect, expectObject} from '@datagrok-libraries/utils/src/test';
|
|
2
|
+
|
|
3
|
+
import * as grok from 'datagrok-api/grok';
|
|
4
|
+
import * as ui from 'datagrok-api/ui';
|
|
5
|
+
import * as DG from 'datagrok-api/dg';
|
|
6
|
+
import {PositionInfo, PositionMonomerInfo, WebLogo} from '@datagrok-libraries/bio/src/viewers/web-logo';
|
|
7
|
+
|
|
8
|
+
category('WebLogo-positions', () => {
|
|
9
|
+
let tvList: DG.TableView[];
|
|
10
|
+
let dfList: DG.DataFrame[];
|
|
11
|
+
|
|
12
|
+
const csvDf1 = `seq
|
|
13
|
+
ATC-G-TTGC--
|
|
14
|
+
ATC-G-TTGC--
|
|
15
|
+
-TC-G-TTGC--
|
|
16
|
+
-TC-GCTTGC--
|
|
17
|
+
-TC-GCTTGC--`;
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
const resShrinkEmptyTailDf1: PositionInfo[] = [];
|
|
21
|
+
|
|
22
|
+
before(async () => {
|
|
23
|
+
tvList = [];
|
|
24
|
+
dfList = [];
|
|
25
|
+
});
|
|
26
|
+
|
|
27
|
+
after(async () => {
|
|
28
|
+
dfList.forEach((df: DG.DataFrame) => { grok.shell.closeTable(df); });
|
|
29
|
+
tvList.forEach((tv: DG.TableView) => tv.close());
|
|
30
|
+
});
|
|
31
|
+
|
|
32
|
+
test('allPositions', async () => {
|
|
33
|
+
const df: DG.DataFrame = DG.DataFrame.fromCsv(csvDf1);
|
|
34
|
+
const tv: DG.TableView = grok.shell.addTableView(df);
|
|
35
|
+
|
|
36
|
+
const wlViewer: WebLogo = await df.plot.fromType('WebLogo') as unknown as WebLogo;
|
|
37
|
+
tv.dockManager.dock(wlViewer.root, DG.DOCK_TYPE.DOWN);
|
|
38
|
+
|
|
39
|
+
tvList.push(tv);
|
|
40
|
+
dfList.push(df);
|
|
41
|
+
|
|
42
|
+
const positions: PositionInfo[] = wlViewer['positions'];
|
|
43
|
+
|
|
44
|
+
const resAllDf1: PositionInfo[] = [
|
|
45
|
+
new PositionInfo('1', {'A': new PositionMonomerInfo(2), '-': new PositionMonomerInfo(3)}),
|
|
46
|
+
new PositionInfo('2', {'T': new PositionMonomerInfo(5)}),
|
|
47
|
+
new PositionInfo('3', {'C': new PositionMonomerInfo(5)}),
|
|
48
|
+
new PositionInfo('4', {'-': new PositionMonomerInfo(5)}),
|
|
49
|
+
new PositionInfo('5', {'G': new PositionMonomerInfo(5)}),
|
|
50
|
+
new PositionInfo('6', {'-': new PositionMonomerInfo(3), 'C': new PositionMonomerInfo(2)}),
|
|
51
|
+
new PositionInfo('7', {'T': new PositionMonomerInfo(5)}),
|
|
52
|
+
new PositionInfo('8', {'T': new PositionMonomerInfo(5)}),
|
|
53
|
+
new PositionInfo('9', {'G': new PositionMonomerInfo(5)}),
|
|
54
|
+
new PositionInfo('10', {'C': new PositionMonomerInfo(5)}),
|
|
55
|
+
new PositionInfo('11', {'-': new PositionMonomerInfo(5)}),
|
|
56
|
+
new PositionInfo('12', {'-': new PositionMonomerInfo(5)})
|
|
57
|
+
];
|
|
58
|
+
// check all positions are equal resAllDf1
|
|
59
|
+
for (let i = 0; i < positions.length; i++) {
|
|
60
|
+
expect(positions[i].name, resAllDf1[i].name);
|
|
61
|
+
for (const key in positions[i].freq) {
|
|
62
|
+
expect(positions[i].freq[key].count, resAllDf1[i].freq[key].count);
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
});
|
|
67
|
+
|
|
68
|
+
});
|
|
@@ -7,7 +7,7 @@ import {readDataframe} from './utils';
|
|
|
7
7
|
import {getEmbeddingColsNames, sequenceSpace} from '../utils/sequence-space';
|
|
8
8
|
import {drawTooltip, sequenceGetSimilarities} from '../utils/sequence-activity-cliffs';
|
|
9
9
|
import {getActivityCliffs} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
|
|
10
|
-
import {
|
|
10
|
+
import {encodeMonomers} from '../utils/utils';
|
|
11
11
|
|
|
12
12
|
|
|
13
13
|
category('activityCliffs', async () => {
|
|
@@ -53,6 +53,6 @@ category('activityCliffs', async () => {
|
|
|
53
53
|
|
|
54
54
|
const cliffsLink = (Array.from(scatterPlot.root.children) as Element[])
|
|
55
55
|
.filter((it) => it.className === 'ui-btn ui-btn-ok');
|
|
56
|
-
expect((cliffsLink[0] as HTMLElement).innerText, '
|
|
56
|
+
expect((cliffsLink[0] as HTMLElement).innerText, '2362 cliffs');
|
|
57
57
|
});
|
|
58
58
|
});
|
|
@@ -124,6 +124,7 @@ MWRSWY-CKHP
|
|
|
124
124
|
testActivityCliffsCsv = 'testActivityCliffsCsv',
|
|
125
125
|
testSpgi100 = 'testSpgi100',
|
|
126
126
|
testUnichemSources = 'testUnichemSources',
|
|
127
|
+
testDmvOffices = 'testDmvOffices',
|
|
127
128
|
}
|
|
128
129
|
|
|
129
130
|
const samples: { [key: string]: string } = {
|
|
@@ -143,6 +144,7 @@ MWRSWY-CKHP
|
|
|
143
144
|
'testCerealCsv': 'System:AppData/Bio/tests/testCereal.csv',
|
|
144
145
|
'testSpgi100': 'System:AppData/Bio/tests/testSpgi100.csv',
|
|
145
146
|
'testUnichemSources': 'System:AppData/Bio/tests/testUnichemSources.csv',
|
|
147
|
+
'testDmvOffices': 'System:AppData/Bio/tests/testDmvOffices.csv',
|
|
146
148
|
};
|
|
147
149
|
|
|
148
150
|
const _samplesDfs: { [key: string]: Promise<DG.DataFrame> } = {};
|
|
@@ -186,7 +188,7 @@ MWRSWY-CKHP
|
|
|
186
188
|
};
|
|
187
189
|
};
|
|
188
190
|
|
|
189
|
-
test('NegativeEmpty', async () => {await _testNeg(readCsv('csvDfEmpty', csvDfEmpty), 'col1'); });
|
|
191
|
+
test('NegativeEmpty', async () => { await _testNeg(readCsv('csvDfEmpty', csvDfEmpty), 'col1'); });
|
|
190
192
|
test('Negative1', async () => { await _testNeg(readCsv('csvDf1', csvDf1), 'col1'); });
|
|
191
193
|
test('Negative2', async () => { await _testNeg(readCsv('csvDf2', csvDf2), 'col1'); });
|
|
192
194
|
test('Negative3', async () => { await _testNeg(readCsv('csvDf3', csvDf3), 'col1'); });
|
|
@@ -347,6 +349,13 @@ MWRSWY-CKHP
|
|
|
347
349
|
test('samplesTestUnichemSourcesNegativeBaseIdUrl', async () => {
|
|
348
350
|
await _testNeg(readSamples(Samples.testUnichemSources), 'base_id_url');
|
|
349
351
|
});
|
|
352
|
+
|
|
353
|
+
test('samplesTestDmvOfficesNegativeOfficeName', async () => {
|
|
354
|
+
await _testNeg(readSamples(Samples.testDmvOffices), 'Office Name');
|
|
355
|
+
});
|
|
356
|
+
test('samplesTestDmvOfficesNegativeCity', async () => {
|
|
357
|
+
await _testNeg(readSamples(Samples.testDmvOffices), 'City');
|
|
358
|
+
});
|
|
350
359
|
});
|
|
351
360
|
|
|
352
361
|
export async function _testNeg(readDf: DfReaderFunc, colName: string) {
|
package/src/tests/msa-tests.ts
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
import
|
|
2
|
-
import
|
|
3
|
-
_testMSAIsCorrect,
|
|
4
|
-
_testTableIsNotEmpty,
|
|
5
|
-
} from './utils';
|
|
6
|
-
|
|
1
|
+
import * as grok from 'datagrok-api/grok';
|
|
2
|
+
import * as ui from 'datagrok-api/ui';
|
|
7
3
|
import * as DG from 'datagrok-api/dg';
|
|
4
|
+
|
|
5
|
+
import {category, expect, expectArray, test} from '@datagrok-libraries/utils/src/test';
|
|
6
|
+
|
|
7
|
+
import {runKalign} from '../utils/multiple-sequence-alignment';
|
|
8
8
|
//import * as grok from 'datagrok-api/grok';
|
|
9
9
|
|
|
10
10
|
export const _package = new DG.Package();
|
|
@@ -13,22 +13,47 @@ export const _package = new DG.Package();
|
|
|
13
13
|
category('MSA', async () => {
|
|
14
14
|
//table = await grok.data.files.openTable('Demo:Files/bio/peptides.csv');
|
|
15
15
|
const fromCsv = `seq
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
16
|
+
FWRWYVKHP
|
|
17
|
+
YNRWYVKHP
|
|
18
|
+
MWRSWYCKHP`;
|
|
19
19
|
const toCsv = `seq
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
const
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
20
|
+
FWR-WYVKHP
|
|
21
|
+
YNR-WYVKHP
|
|
22
|
+
MWRSWYCKHP`;
|
|
23
|
+
|
|
24
|
+
const longFromCsv = `seq
|
|
25
|
+
FWRWYVKHPFWRWYVKHPFWRWYVKHPFWRWYVKHPFWRWYVKHPFWRWYVKHPFWRWYVKHPFWRWYVKHP
|
|
26
|
+
YNRWYVKHPYNRWYVKHPYNRWYVKHPYNRWYVKHPYNRWYVKHPYNRWYVKHPYNRWYVKHPYNRWYVKHP
|
|
27
|
+
MWRSWYCKHPMWRSWYCKHPMWRSWYCKHPMWRSWYCKHPMWRSWYCKHPMWRSWYCKHPMWRSWYCKHPMWRSWYCKHP`;
|
|
28
|
+
|
|
29
|
+
const longToCsv = `seq
|
|
30
|
+
FWR-WYVKHPFWR-WYVKHPFWR-WYVKHPFWR-WYVKHPFWR-WYVKHPFWR-WYVKHPFWR-WYVKHPFWR-WYVKHP
|
|
31
|
+
YNR-WYVKHPYNR-WYVKHPYNR-WYVKHPYNR-WYVKHPYNR-WYVKHPYNR-WYVKHPYNR-WYVKHPYNR-WYVKHP
|
|
32
|
+
MWRSWYCKHPMWRSWYCKHPMWRSWYCKHPMWRSWYCKHPMWRSWYCKHPMWRSWYCKHPMWRSWYCKHPMWRSWYCKHP`;
|
|
33
|
+
|
|
34
|
+
// test('test_table.is_not_empty', async () => {
|
|
35
|
+
// await _testTableIsNotEmpty(table);
|
|
36
|
+
// });
|
|
37
|
+
|
|
38
|
+
test('isCorrect', async () => {
|
|
39
|
+
await _testMsaIsCorrect(fromCsv, toCsv);
|
|
29
40
|
});
|
|
30
41
|
|
|
31
|
-
test('
|
|
32
|
-
await
|
|
42
|
+
test('isCorrectLong', async () => {
|
|
43
|
+
await _testMsaIsCorrect(longFromCsv, longToCsv);
|
|
33
44
|
});
|
|
34
45
|
});
|
|
46
|
+
|
|
47
|
+
async function _testMsaIsCorrect(srcCsv: string, tgtCsv: string): Promise<void> {
|
|
48
|
+
const srcDf: DG.DataFrame = DG.DataFrame.fromCsv(srcCsv);
|
|
49
|
+
const tgtDf: DG.DataFrame = DG.DataFrame.fromCsv(tgtCsv);
|
|
50
|
+
|
|
51
|
+
const srcCol: DG.Column = srcDf.getCol('seq')!;
|
|
52
|
+
const semType: string = await grok.functions.call('Bio:detectMacromolecule', {col: srcCol});
|
|
53
|
+
if (semType)
|
|
54
|
+
srcCol.semType = semType;
|
|
55
|
+
|
|
56
|
+
const tgtCol: DG.Column = tgtDf.getCol('seq')!;
|
|
57
|
+
const msaCol: DG.Column = await runKalign(srcCol, true);
|
|
58
|
+
expectArray(msaCol.toList(), tgtCol.toList());
|
|
59
|
+
}
|
package/src/tests/utils.ts
CHANGED
|
@@ -31,15 +31,3 @@ export async function createTableView(tableName: string): Promise<DG.TableView>
|
|
|
31
31
|
export function _testTableIsNotEmpty(table: DG.DataFrame): void {
|
|
32
32
|
expect(table.columns.length > 0 && table.rowCount > 0, true);
|
|
33
33
|
}
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
/**
|
|
37
|
-
* Tests if MSA works and returns consistent result.
|
|
38
|
-
*
|
|
39
|
-
* @export
|
|
40
|
-
* @param {DG.Column} col Aligned sequences column.
|
|
41
|
-
*/
|
|
42
|
-
export async function _testMSAIsCorrect(col: DG.Column): Promise<void> {
|
|
43
|
-
const msaCol = await runKalign(col, true);
|
|
44
|
-
expect(msaCol.toList().every((v, i) => (v == col.get(i) || v == null)), true);
|
|
45
|
-
}
|
|
@@ -9,7 +9,7 @@ import * as ui from 'datagrok-api/ui';
|
|
|
9
9
|
|
|
10
10
|
export const lru = new DG.LruCache<any, any>();
|
|
11
11
|
const undefinedColor = 'rgb(100,100,100)';
|
|
12
|
-
const grayColor = '#808080'
|
|
12
|
+
const grayColor = '#808080';
|
|
13
13
|
|
|
14
14
|
function getPalleteByType(paletteType: string): SeqPalette {
|
|
15
15
|
switch (paletteType) {
|
|
@@ -68,9 +68,7 @@ function printLeftOrCentered(
|
|
|
68
68
|
separator: string = '', last: boolean = false): number {
|
|
69
69
|
g.textAlign = 'start';
|
|
70
70
|
const colorPart = s.substring(0);
|
|
71
|
-
let grayPart = separator;
|
|
72
|
-
if (last)
|
|
73
|
-
grayPart = '';
|
|
71
|
+
let grayPart = last ? '' : separator;
|
|
74
72
|
|
|
75
73
|
const textSize = g.measureText(colorPart + grayPart);
|
|
76
74
|
const indent = 5;
|
|
@@ -142,7 +140,6 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
|
|
|
142
140
|
const cell = gridCell.cell;
|
|
143
141
|
const tag = gridCell.cell.column.getTag(DG.TAGS.UNITS);
|
|
144
142
|
if (tag === 'HELM') {
|
|
145
|
-
console.log(findMonomers(cell.value));
|
|
146
143
|
const monomers = findMonomers(cell.value);
|
|
147
144
|
if (monomers.size == 0) {
|
|
148
145
|
const host = ui.div([], {style: {width: `${w}px`, height: `${h}px`}});
|
|
@@ -173,17 +170,10 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
|
|
|
173
170
|
let x1 = x;
|
|
174
171
|
const s: string = cell.value ?? '';
|
|
175
172
|
let subParts: string[] = WebLogo.splitterAsHelm(s);
|
|
176
|
-
let color = undefinedColor;
|
|
177
173
|
subParts.forEach((amino, index) => {
|
|
178
|
-
|
|
179
|
-
color = 'red';
|
|
180
|
-
} else {
|
|
181
|
-
color = grayColor;
|
|
182
|
-
}
|
|
174
|
+
let color = monomers.has(amino) ? 'red' : grayColor;
|
|
183
175
|
g.fillStyle = undefinedColor;
|
|
184
|
-
let last =
|
|
185
|
-
if (index === subParts.length - 1)
|
|
186
|
-
last = true;
|
|
176
|
+
let last = index === subParts.length - 1;
|
|
187
177
|
x1 = printLeftOrCentered(x1, y, w, h, g, amino, color, 0, true, 1.0, '/', last);
|
|
188
178
|
});
|
|
189
179
|
g.restore();
|
|
@@ -215,10 +205,7 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
|
|
|
215
205
|
subParts.forEach((amino, index) => {
|
|
216
206
|
color = palette.get(amino);
|
|
217
207
|
g.fillStyle = undefinedColor;
|
|
218
|
-
let last =
|
|
219
|
-
if (index === subParts.length - 1)
|
|
220
|
-
last = true;
|
|
221
|
-
|
|
208
|
+
let last = index === subParts.length - 1;
|
|
222
209
|
x1 = printLeftOrCentered(x1, y, w, h, g, amino, color, 0, true, 1.0, separator, last);
|
|
223
210
|
});
|
|
224
211
|
|
|
@@ -239,16 +226,16 @@ export class AminoAcidsCellRenderer extends DG.GridCellRenderer {
|
|
|
239
226
|
get defaultWidth(): number {return 30;}
|
|
240
227
|
|
|
241
228
|
/**
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
229
|
+
* Cell renderer function.
|
|
230
|
+
*
|
|
231
|
+
* @param {CanvasRenderingContext2D} g Canvas rendering context.
|
|
232
|
+
* @param {number} x x coordinate on the canvas.
|
|
233
|
+
* @param {number} y y coordinate on the canvas.
|
|
234
|
+
* @param {number} w width of the cell.
|
|
235
|
+
* @param {number} h height of the cell.
|
|
236
|
+
* @param {DG.GridCell} gridCell Grid cell.
|
|
237
|
+
* @param {DG.GridCellStyle} cellStyle Cell style.
|
|
238
|
+
*/
|
|
252
239
|
render(
|
|
253
240
|
g: CanvasRenderingContext2D, x: number, y: number, w: number, h: number, gridCell: DG.GridCell,
|
|
254
241
|
cellStyle: DG.GridCellStyle): void {
|
|
@@ -318,7 +305,7 @@ export class AlignedSequenceDifferenceCellRenderer extends DG.GridCellRenderer {
|
|
|
318
305
|
|
|
319
306
|
const palette = getPalleteByType(gridCell.tableColumn!.tags[C.TAGS.ALPHABET]);
|
|
320
307
|
for (let i = 0; i < subParts1.length; i++) {
|
|
321
|
-
const amino1 = subParts1[i]
|
|
308
|
+
const amino1 = subParts1[i];
|
|
322
309
|
const amino2 = subParts2[i];
|
|
323
310
|
const color1 = palette.get(amino1);
|
|
324
311
|
const color2 = palette.get(amino2);
|
|
@@ -1,5 +1,10 @@
|
|
|
1
|
+
/* Do not change these import lines to match external modules in webpack configuration */
|
|
2
|
+
import * as grok from 'datagrok-api/grok';
|
|
3
|
+
import * as ui from 'datagrok-api/ui';
|
|
1
4
|
import * as DG from 'datagrok-api/dg';
|
|
2
5
|
|
|
6
|
+
import {FastaFileHandler} from '@datagrok-libraries/bio/src/utils/fasta-handler';
|
|
7
|
+
|
|
3
8
|
//@ts-ignore
|
|
4
9
|
import Aioli from '@biowasm/aioli';
|
|
5
10
|
|
|
@@ -15,16 +20,6 @@ function _stringsToFasta(sequences: string[]): string {
|
|
|
15
20
|
return sequences.reduce((a, v, i) => a + `>sample${i + 1}\n${v}\n`, '');
|
|
16
21
|
}
|
|
17
22
|
|
|
18
|
-
/**
|
|
19
|
-
* Extracts array of sequences from simple fasta string.
|
|
20
|
-
*
|
|
21
|
-
* @param {string} fasta Fasta-formatted string.
|
|
22
|
-
* @return {string[]} Output list of sequences.
|
|
23
|
-
*/
|
|
24
|
-
function _fastaToStrings(fasta: string): string[] {
|
|
25
|
-
return fasta.replace(/>sample\d+(\r\n|\r|\n)/g, '').split('\n');
|
|
26
|
-
}
|
|
27
|
-
|
|
28
23
|
/**
|
|
29
24
|
* Runs Aioli environment with kalign tool.
|
|
30
25
|
*
|
|
@@ -56,7 +51,8 @@ export async function runKalign(srcCol: DG.Column, isAligned = false, unUsedName
|
|
|
56
51
|
if (!buf)
|
|
57
52
|
throw new Error(`kalign output no result`);
|
|
58
53
|
|
|
59
|
-
const
|
|
54
|
+
const ffh = new FastaFileHandler(buf);
|
|
55
|
+
const aligned = ffh.sequencesArray; // array of sequences extracted from FASTA
|
|
60
56
|
const tgtCol = DG.Column.fromStrings(unUsedName, aligned);
|
|
61
57
|
|
|
62
58
|
// units
|
package/src/utils/utils.ts
CHANGED
|
@@ -14,7 +14,7 @@ export const HELM_CORE_FIELDS = ['symbol', 'molfile', 'rgroups', 'name'];
|
|
|
14
14
|
|
|
15
15
|
export function encodeMonomers(col: DG.Column): DG.Column | null {
|
|
16
16
|
let encodeSymbol = MONOMER_ENCODE_MIN;
|
|
17
|
-
const monomerSymbolDict:
|
|
17
|
+
const monomerSymbolDict: { [key: string]: number } = {};
|
|
18
18
|
const units = col.tags[DG.TAGS.UNITS];
|
|
19
19
|
const sep = col.getTag('separator');
|
|
20
20
|
const splitterFunc: SplitterFunc = WebLogo.getSplitter(units, sep);
|
|
@@ -22,9 +22,9 @@ export function encodeMonomers(col: DG.Column): DG.Column | null {
|
|
|
22
22
|
for (let i = 0; i < col.length; ++i) {
|
|
23
23
|
let encodedMonomerStr = '';
|
|
24
24
|
const monomers = splitterFunc(col.get(i));
|
|
25
|
-
monomers.forEach(m => {
|
|
26
|
-
if(!monomerSymbolDict[m]) {
|
|
27
|
-
if(encodeSymbol > MONOMER_ENCODE_MAX) {
|
|
25
|
+
monomers.forEach((m) => {
|
|
26
|
+
if (!monomerSymbolDict[m]) {
|
|
27
|
+
if (encodeSymbol > MONOMER_ENCODE_MAX) {
|
|
28
28
|
grok.shell.error(`Not enougth symbols to encode monomers`);
|
|
29
29
|
return null;
|
|
30
30
|
}
|
|
@@ -32,7 +32,7 @@ export function encodeMonomers(col: DG.Column): DG.Column | null {
|
|
|
32
32
|
encodeSymbol++;
|
|
33
33
|
}
|
|
34
34
|
encodedMonomerStr += String.fromCodePoint(monomerSymbolDict[m]);
|
|
35
|
-
})
|
|
35
|
+
});
|
|
36
36
|
encodedStringArray.push(encodedMonomerStr);
|
|
37
37
|
}
|
|
38
38
|
return DG.Column.fromStrings('encodedMolecules', encodedStringArray);
|
|
@@ -129,4 +129,4 @@ export function createJsonMonomerLibFromSdf(table: DG.DataFrame): any {
|
|
|
129
129
|
resultLib.push(monomer);
|
|
130
130
|
}
|
|
131
131
|
return resultLib;
|
|
132
|
-
}
|
|
132
|
+
}
|
|
@@ -9,6 +9,7 @@ import {getMacroMol} from '../utils/atomic-works';
|
|
|
9
9
|
*
|
|
10
10
|
* @export
|
|
11
11
|
* @param {DG.Cell} macroMolecule macromolecule cell.
|
|
12
|
+
* @param {any[]} monomersLibObject
|
|
12
13
|
* @return {Promise<DG.Widget>} Widget.
|
|
13
14
|
*/
|
|
14
15
|
export async function representationsWidget(macroMolecule: DG.Cell, monomersLibObject: any[]): Promise<DG.Widget> {
|