@datagrok/peptides 0.7.1 → 0.8.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.eslintrc.json +12 -2
- package/files/aligned_2.csv +1 -1
- package/package.json +21 -15
- package/setup.sh +15 -0
- package/src/describe.ts +45 -49
- package/src/model.ts +2 -1
- package/src/monomer-library.ts +187 -0
- package/src/package-test.ts +19 -0
- package/src/package.ts +79 -7
- package/src/peptides.ts +62 -21
- package/src/semantics.ts +5 -0
- package/src/tests/msa-tests.ts +27 -0
- package/src/tests/peptide-space-test.ts +77 -0
- package/src/tests/peptides-tests.ts +121 -0
- package/src/tests/test-data.ts +649 -0
- package/src/tests/utils.ts +126 -0
- package/src/utils/cell-renderer.ts +129 -26
- package/src/utils/chem-palette.ts +16 -15
- package/src/utils/molecular-measure.ts +3 -4
- package/src/utils/multiple-sequence-alignment.ts +88 -0
- package/src/utils/peptide-similarity-space.ts +39 -13
- package/src/utils/split-aligned.ts +6 -6
- package/src/viewers/logo-viewer.ts +10 -12
- package/src/viewers/sar-viewer.ts +16 -17
- package/src/viewers/stacked-barchart-viewer.ts +21 -26
- package/src/viewers/subst-viewer.ts +16 -13
- package/src/widgets/analyze-peptides.ts +6 -6
- package/src/widgets/manual-alignment.ts +5 -3
- package/src/widgets/peptide-molecule.ts +21 -13
- package/src/workers/dimensionality-reducer.ts +2 -1
- package/tsconfig.json +1 -1
- package/webpack.config.js +16 -2
package/.eslintrc.json
CHANGED
|
@@ -23,7 +23,17 @@
|
|
|
23
23
|
"error",
|
|
24
24
|
120
|
|
25
25
|
],
|
|
26
|
+
"require-jsdoc": "off",
|
|
26
27
|
"spaced-comment": "off",
|
|
27
|
-
"
|
|
28
|
+
"linebreak-style": "off",
|
|
29
|
+
"curly": [
|
|
30
|
+
"error",
|
|
31
|
+
"multi-or-nest"
|
|
32
|
+
],
|
|
33
|
+
"brace-style": [
|
|
34
|
+
"error",
|
|
35
|
+
"1tbs",
|
|
36
|
+
{ "allowSingleLine": true }
|
|
37
|
+
]
|
|
28
38
|
}
|
|
29
|
-
}
|
|
39
|
+
}
|
package/files/aligned_2.csv
CHANGED
|
@@ -10272,4 +10272,4 @@ ID,AlignedSequence,Measured,Value
|
|
|
10272
10272
|
1357,HOC18gEPEG2PEG2-k(Me3)-Aib-K(PEG2PEG2PgEC18OH)-Hyp-7(4OCF3Ph)W-K(Me)3-S5H(4)-mTyr-6OH2Nal 6OH2Nal-aMeK-S5H-3Pya-H-4diFPro-PEG2PEG2eKC16OH-ON(Me)2A,SGF (No protein present) % Remaining 24 hr,86.31581627936768
|
|
10273
10273
|
1359,HOC18gEPEG2PEG2-k(Me3)-Aib-K(PEG2PEG2PgEC18OH)-Hyp-7(4OCF3Ph)W-K(Me)3-S5H(4)-mTyr-6OH2Nal 6OH2Nal-aMeK-S5H-3Pya-homobAla-4diFPro-PEG2PEG2eKC16OH-ON(Me)2A,SGF (No protein present) % Remaining 24 hr,93.44441627936769
|
|
10274
10274
|
1360,HOC18gEPEG2PEG2-k(Me3)-Aib-K(PEG2PEG2PgEC18OH)-Hyp-7(4OCF3Ph)W-K(Me)3-S5H(4)-mTyr-6OH2Nal 6OH2Nal-aMeK-S5H-3Pya-D(NPyr)-4diFPro-PEG2PEG2eKC16OH-ON(Me)2A,SGF (No protein present) % Remaining 24 hr,88.34951627936769
|
|
10275
|
-
1361,HOC18gEPEG2PEG2-k(Me3)-Aib-K(PEG2PEG2PgEC18OH)-Hyp-7(4OCF3Ph)W-K(Me)3-S5H(4)-mTyr-6OH2Nal 6OH2Nal-aMeK-S5H-3Pya-3OHPhe-4diFPro-PEG2PEG2eKC16OH-ON(Me)2A,SGF (No protein present) % Remaining 24 hr,98.38061627936769
|
|
10275
|
+
1361,HOC18gEPEG2PEG2-k(Me3)-Aib-K(PEG2PEG2PgEC18OH)-Hyp-7(4OCF3Ph)W-K(Me)3-S5H(4)-mTyr-6OH2Nal 6OH2Nal-aMeK-S5H-3Pya-3OHPhe-4diFPro-PEG2PEG2eKC16OH-ON(Me)2A,SGF (No protein present) % Remaining 24 hr,98.38061627936769
|
package/package.json
CHANGED
|
@@ -1,31 +1,33 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@datagrok/peptides",
|
|
3
|
-
"version": "0.7
|
|
3
|
+
"version": "0.8.7",
|
|
4
4
|
"description": "",
|
|
5
5
|
"dependencies": {
|
|
6
|
+
"@biowasm/aioli": ">=2.4.0",
|
|
7
|
+
"@datagrok-libraries/bio": ">=0.0.4",
|
|
8
|
+
"@datagrok-libraries/ml": ">=0.0.14",
|
|
9
|
+
"@datagrok-libraries/statistics": ">=0.1.5",
|
|
10
|
+
"@datagrok-libraries/utils": ">=0.0.22",
|
|
11
|
+
"@types/d3": "^7.0.0",
|
|
12
|
+
"@types/jquery": "^3.5.6",
|
|
6
13
|
"cash-dom": "latest",
|
|
7
14
|
"d3": "latest",
|
|
8
|
-
"datagrok-api": ">=0.
|
|
15
|
+
"datagrok-api": ">=0.115.0",
|
|
9
16
|
"dayjs": "latest",
|
|
17
|
+
"file-loader": "^6.2.0",
|
|
10
18
|
"jstat": "^1.9.5",
|
|
11
19
|
"logojs-react": "^2.1.1",
|
|
12
|
-
"rxjs": "^6.5.5"
|
|
13
|
-
"@datagrok-libraries/utils": ">=0.0.13",
|
|
14
|
-
"@datagrok-libraries/statistics": ">=0.1.5",
|
|
15
|
-
"@datagrok-libraries/ml": ">=0.0.1",
|
|
16
|
-
"@datagrok-libraries/bio": ">=0.0.4",
|
|
17
|
-
"@types/d3": "^7.0.0",
|
|
18
|
-
"@types/jquery": "^3.5.6"
|
|
20
|
+
"rxjs": "^6.5.5"
|
|
19
21
|
},
|
|
20
22
|
"devDependencies": {
|
|
21
|
-
"typescript": "^4.4.4",
|
|
22
|
-
"ts-loader": "^9.2.5",
|
|
23
|
-
"css-loader": "^5.2.4",
|
|
24
|
-
"style-loader": "^2.0.0",
|
|
25
23
|
"@typescript-eslint/eslint-plugin": "^4.29.1",
|
|
26
24
|
"@typescript-eslint/parser": "^4.29.1",
|
|
25
|
+
"css-loader": "^5.2.4",
|
|
27
26
|
"eslint": "^7.32.0",
|
|
28
27
|
"eslint-config-google": "^0.14.0",
|
|
28
|
+
"style-loader": "^2.0.0",
|
|
29
|
+
"ts-loader": "^9.2.5",
|
|
30
|
+
"typescript": "^4.4.4",
|
|
29
31
|
"webpack": "latest",
|
|
30
32
|
"webpack-cli": "latest"
|
|
31
33
|
},
|
|
@@ -55,6 +57,10 @@
|
|
|
55
57
|
"lint": "eslint \"./src/**/*.ts\"",
|
|
56
58
|
"lint-fix": "eslint \"./src/**/*.ts\" --fix"
|
|
57
59
|
},
|
|
58
|
-
"canEdit": [
|
|
59
|
-
|
|
60
|
+
"canEdit": [
|
|
61
|
+
"Developers"
|
|
62
|
+
],
|
|
63
|
+
"canView": [
|
|
64
|
+
"All users"
|
|
65
|
+
]
|
|
60
66
|
}
|
package/setup.sh
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
npm unlink datagrok-api
|
|
2
|
+
npm unlink @datagrok-libraries/utils
|
|
3
|
+
npm unlink @datagrok-libraries/ml
|
|
4
|
+
cd ../../js-api
|
|
5
|
+
npm install
|
|
6
|
+
npm link
|
|
7
|
+
cd ../libraries/utils
|
|
8
|
+
npm install
|
|
9
|
+
npm link
|
|
10
|
+
cd ../../libraries/ml
|
|
11
|
+
npm install
|
|
12
|
+
npm link datagrok-api @datagrok-libraries/utils
|
|
13
|
+
cd ../../packages/Peptides
|
|
14
|
+
npm install
|
|
15
|
+
npm link datagrok-api @datagrok-libraries/utils @datagrok-libraries/ml
|
package/src/describe.ts
CHANGED
|
@@ -3,6 +3,7 @@ import * as DG from 'datagrok-api/dg';
|
|
|
3
3
|
import {splitAlignedPeptides} from './utils/split-aligned';
|
|
4
4
|
import {tTest} from '@datagrok-libraries/statistics/src/tests';
|
|
5
5
|
import {fdrcorrection} from '@datagrok-libraries/statistics/src/multiple-tests';
|
|
6
|
+
import {StringDictionary} from '@datagrok-libraries/utils/src/type-declarations';
|
|
6
7
|
import {ChemPalette} from './utils/chem-palette';
|
|
7
8
|
import {setAARRenderer} from './utils/cell-renderer';
|
|
8
9
|
|
|
@@ -60,41 +61,38 @@ function joinDataFrames(
|
|
|
60
61
|
splitSeqDf: DG.DataFrame,
|
|
61
62
|
activityColumn: string,
|
|
62
63
|
) {
|
|
63
|
-
if (df.col(activityColumnScaled))
|
|
64
|
-
|
|
65
|
-
|
|
64
|
+
if (df.col(activityColumnScaled))
|
|
65
|
+
(df.columns as DG.ColumnList).remove(activityColumnScaled);
|
|
66
|
+
|
|
66
67
|
|
|
67
68
|
//FIXME: this column usually duplicates, so remove it then
|
|
68
|
-
if (df.col(`${activityColumnScaled} (2)`))
|
|
69
|
-
df.columns.remove(`${activityColumnScaled} (2)`);
|
|
70
|
-
|
|
69
|
+
if (df.col(`${activityColumnScaled} (2)`))
|
|
70
|
+
(df.columns as DG.ColumnList).remove(`${activityColumnScaled} (2)`);
|
|
71
|
+
|
|
71
72
|
|
|
72
73
|
// append splitSeqDf columns to source table and make sure columns are not added more than once
|
|
73
74
|
const dfColsSet = new Set(df.columns.names());
|
|
74
|
-
if (!positionColumns.every((col: string) => dfColsSet.has(col)))
|
|
75
|
+
if (!positionColumns.every((col: string) => dfColsSet.has(col)))
|
|
75
76
|
df.join(splitSeqDf, [activityColumn], [activityColumn], df.columns.names(), positionColumns, 'inner', true);
|
|
76
|
-
}
|
|
77
77
|
}
|
|
78
78
|
|
|
79
79
|
function sortSourceGrid(sourceGrid: DG.Grid) {
|
|
80
80
|
if (sourceGrid) {
|
|
81
|
-
const colNames:
|
|
82
|
-
for (let i =
|
|
83
|
-
colNames.push(sourceGrid.columns.byIndex(i)
|
|
84
|
-
|
|
81
|
+
const colNames: DG.GridColumn[] = [];
|
|
82
|
+
for (let i = 1; i < sourceGrid.columns.length; i++)
|
|
83
|
+
colNames.push(sourceGrid.columns.byIndex(i)!);
|
|
84
|
+
|
|
85
85
|
colNames.sort((a, b)=>{
|
|
86
|
-
if (
|
|
87
|
-
if (
|
|
86
|
+
if (a.column!.semType == 'aminoAcids') {
|
|
87
|
+
if (b.column!.semType == 'aminoAcids')
|
|
88
88
|
return 0;
|
|
89
|
-
}
|
|
90
89
|
return -1;
|
|
91
90
|
}
|
|
92
|
-
if (
|
|
91
|
+
if (b.column!.semType == 'aminoAcids')
|
|
93
92
|
return 1;
|
|
94
|
-
}
|
|
95
93
|
return 0;
|
|
96
94
|
});
|
|
97
|
-
sourceGrid
|
|
95
|
+
sourceGrid.columns.setOrder(colNames.map((v) => v.name));
|
|
98
96
|
}
|
|
99
97
|
}
|
|
100
98
|
|
|
@@ -135,7 +133,7 @@ async function calculateStatistics(
|
|
|
135
133
|
activityColumnScaled: string,
|
|
136
134
|
peptidesCount: number,
|
|
137
135
|
splitSeqDf: DG.DataFrame,
|
|
138
|
-
groupMapping:
|
|
136
|
+
groupMapping: StringDictionary,
|
|
139
137
|
) {
|
|
140
138
|
matrixDf = matrixDf.groupBy([positionColName, aminoAcidResidue])
|
|
141
139
|
.add('count', activityColumnScaled, 'Count')
|
|
@@ -180,13 +178,13 @@ async function calculateStatistics(
|
|
|
180
178
|
pvalues[i] = pvalue;
|
|
181
179
|
}
|
|
182
180
|
|
|
183
|
-
if (true)
|
|
181
|
+
if (true)
|
|
184
182
|
pvalues = fdrcorrection(pvalues)[1];
|
|
185
|
-
}
|
|
186
183
|
|
|
187
|
-
|
|
184
|
+
|
|
185
|
+
for (let i = 0; i < pvalues.length; ++i)
|
|
188
186
|
pValCol.set(i, pvalues[i]);
|
|
189
|
-
|
|
187
|
+
|
|
190
188
|
|
|
191
189
|
return matrixDf.clone();
|
|
192
190
|
}
|
|
@@ -195,9 +193,9 @@ async function setCategoryOrder(
|
|
|
195
193
|
twoColorMode: boolean, statsDf: DG.DataFrame, aminoAcidResidue: string, matrixDf: DG.DataFrame,
|
|
196
194
|
) {
|
|
197
195
|
const sortArgument = twoColorMode ? 'Absolute Mean difference' : 'Mean difference';
|
|
198
|
-
if (twoColorMode)
|
|
196
|
+
if (twoColorMode)
|
|
199
197
|
await statsDf.columns.addNewCalculated('Absolute Mean difference', 'Abs(${Mean difference})');
|
|
200
|
-
|
|
198
|
+
|
|
201
199
|
const aarWeightsDf = statsDf.groupBy([aminoAcidResidue]).sum(sortArgument, 'weight').aggregate();
|
|
202
200
|
const aarList = aarWeightsDf.getCol(aminoAcidResidue).toList();
|
|
203
201
|
const getWeight = (aar: string) => aarWeightsDf
|
|
@@ -257,13 +255,12 @@ function createGrids(
|
|
|
257
255
|
|
|
258
256
|
if (!grouping) {
|
|
259
257
|
let tempCol = matrixDf.columns.byName(aminoAcidResidue);
|
|
260
|
-
if (tempCol)
|
|
258
|
+
if (tempCol)
|
|
261
259
|
setAARRenderer(tempCol, sarGrid);
|
|
262
|
-
|
|
260
|
+
|
|
263
261
|
tempCol = sequenceDf.columns.byName(aminoAcidResidue);
|
|
264
|
-
if (tempCol)
|
|
262
|
+
if (tempCol)
|
|
265
263
|
setAARRenderer(tempCol, sarGrid);
|
|
266
|
-
}
|
|
267
264
|
}
|
|
268
265
|
|
|
269
266
|
return [sarGrid, sarVGrid];
|
|
@@ -308,15 +305,15 @@ function setCellRendererFunc(
|
|
|
308
305
|
|
|
309
306
|
let coef;
|
|
310
307
|
const variant = args.cell.cell.value < 0;
|
|
311
|
-
if (pVal < 0.01)
|
|
308
|
+
if (pVal < 0.01)
|
|
312
309
|
coef = variant && twoColorMode ? '#FF7900' : '#299617';
|
|
313
|
-
|
|
310
|
+
else if (pVal < 0.05)
|
|
314
311
|
coef = variant && twoColorMode ? '#FFA500' : '#32CD32';
|
|
315
|
-
|
|
312
|
+
else if (pVal < 0.1)
|
|
316
313
|
coef = variant && twoColorMode ? '#FBCEB1' : '#98FF98';
|
|
317
|
-
|
|
314
|
+
else
|
|
318
315
|
coef = DG.Color.toHtml(DG.Color.lightLightGray);
|
|
319
|
-
|
|
316
|
+
|
|
320
317
|
|
|
321
318
|
const chooseMin = () => twoColorMode ? 0 : mdCol.min;
|
|
322
319
|
const chooseMax = () => twoColorMode ? Math.max(Math.abs(mdCol.min), mdCol.max) : mdCol.max;
|
|
@@ -380,11 +377,11 @@ function setTooltipFunc(
|
|
|
380
377
|
const textNum = statsDf.groupBy([col]).where(query).aggregate().get(col, 0);
|
|
381
378
|
let text = `${col === 'Count' ? textNum : textNum.toFixed(5)}`;
|
|
382
379
|
|
|
383
|
-
if (col === 'Count')
|
|
380
|
+
if (col === 'Count')
|
|
384
381
|
text += ` / ${peptidesCount}`;
|
|
385
|
-
|
|
382
|
+
else if (col === 'pValue')
|
|
386
383
|
text = parseFloat(text) !== 0 ? text : '<0.01';
|
|
387
|
-
|
|
384
|
+
|
|
388
385
|
|
|
389
386
|
tooltipMap[col === 'pValue' ? 'p-value' : col] = text;
|
|
390
387
|
}
|
|
@@ -403,9 +400,8 @@ function setTooltipFunc(
|
|
|
403
400
|
const currentGroup = groupDescription[cell.cell.value];
|
|
404
401
|
const divText = ui.divText('Amino Acids in this group: ' + currentGroup['aminoAcids'].join(', '));
|
|
405
402
|
ui.tooltip.show(ui.divV([ui.h3(currentGroup['description']), divText]), x, y);
|
|
406
|
-
} else
|
|
403
|
+
} else
|
|
407
404
|
cp.showTooltip(cell, x, y);
|
|
408
|
-
}
|
|
409
405
|
}
|
|
410
406
|
return true;
|
|
411
407
|
};
|
|
@@ -424,14 +420,13 @@ function postProcessGrids(
|
|
|
424
420
|
) {
|
|
425
421
|
sourceGrid.onCellPrepare((cell: DG.GridCell) => {
|
|
426
422
|
const currentRowIndex = cell.tableRowIndex;
|
|
427
|
-
if (currentRowIndex && invalidIndexes.includes(currentRowIndex) && !cell.isRowHeader)
|
|
423
|
+
if (currentRowIndex && invalidIndexes.includes(currentRowIndex) && !cell.isRowHeader)
|
|
428
424
|
cell.style.backColor = DG.Color.lightLightGray;
|
|
429
|
-
}
|
|
430
425
|
});
|
|
431
426
|
|
|
432
|
-
for (const col of matrixDf.columns.names())
|
|
427
|
+
for (const col of matrixDf.columns.names())
|
|
433
428
|
sarGrid.col(col)!.width = sarGrid.props.rowHeight;
|
|
434
|
-
|
|
429
|
+
|
|
435
430
|
|
|
436
431
|
if (grouping) {
|
|
437
432
|
sarGrid.col(aminoAcidResidue)!.name = 'Groups';
|
|
@@ -440,6 +435,8 @@ function postProcessGrids(
|
|
|
440
435
|
|
|
441
436
|
sarGrid.props.allowEdit = false;
|
|
442
437
|
sarVGrid.props.allowEdit = false;
|
|
438
|
+
|
|
439
|
+
sarVGrid.col('Mean difference')!.name = 'Diff';
|
|
443
440
|
}
|
|
444
441
|
|
|
445
442
|
export async function describe(
|
|
@@ -450,7 +447,7 @@ export async function describe(
|
|
|
450
447
|
twoColorMode: boolean,
|
|
451
448
|
initialBitset: DG.BitSet | null,
|
|
452
449
|
grouping: boolean,
|
|
453
|
-
): Promise<[DG.Grid, DG.Grid, DG.DataFrame,
|
|
450
|
+
): Promise<[DG.Grid, DG.Grid, DG.DataFrame, StringDictionary]> {
|
|
454
451
|
//Split the aligned sequence into separate AARs
|
|
455
452
|
let splitSeqDf: DG.DataFrame | undefined;
|
|
456
453
|
let invalidIndexes: number[];
|
|
@@ -469,9 +466,8 @@ export async function describe(
|
|
|
469
466
|
joinDataFrames(activityColumnScaled, df, positionColumns, splitSeqDf, activityColumn);
|
|
470
467
|
|
|
471
468
|
for (const col of df.columns) {
|
|
472
|
-
if (splitSeqDf.col(col.name) && col.name != activityColumn)
|
|
469
|
+
if (splitSeqDf.col(col.name) && col.name != activityColumn)
|
|
473
470
|
setAARRenderer(col, sourceGrid);
|
|
474
|
-
}
|
|
475
471
|
}
|
|
476
472
|
|
|
477
473
|
sortSourceGrid(sourceGrid);
|
|
@@ -489,15 +485,15 @@ export async function describe(
|
|
|
489
485
|
let matrixDf = splitSeqDf.unpivot([activityColumnScaled], positionColumns, positionColName, aminoAcidResidue);
|
|
490
486
|
|
|
491
487
|
//TODO: move to chem palette
|
|
492
|
-
let groupMapping:
|
|
488
|
+
let groupMapping: StringDictionary = {};
|
|
493
489
|
if (grouping) {
|
|
494
490
|
groupMapping = aarGroups;
|
|
495
491
|
const aarCol = matrixDf.getCol(aminoAcidResidue);
|
|
496
492
|
aarCol.init((index) => groupMapping[aarCol.get(index)[0]] ?? '-');
|
|
497
493
|
aarCol.compact();
|
|
498
|
-
} else
|
|
494
|
+
} else
|
|
499
495
|
Object.keys(aarGroups).forEach((value) => groupMapping[value] = value);
|
|
500
|
-
|
|
496
|
+
|
|
501
497
|
|
|
502
498
|
//statistics for specific AAR at a specific position
|
|
503
499
|
const statsDf = await calculateStatistics(
|
package/src/model.ts
CHANGED
|
@@ -2,6 +2,7 @@ import * as DG from 'datagrok-api/dg';
|
|
|
2
2
|
|
|
3
3
|
import {describe} from './describe';
|
|
4
4
|
import {Subject} from 'rxjs';
|
|
5
|
+
import {StringDictionary} from '@datagrok-libraries/utils/src/type-declarations';
|
|
5
6
|
|
|
6
7
|
/**
|
|
7
8
|
* Model class for SAR viewers that retrieves and stores data.
|
|
@@ -12,7 +13,7 @@ class SARViewerModel {
|
|
|
12
13
|
private viewerGrid: Subject<DG.Grid> = new Subject<DG.Grid>();
|
|
13
14
|
private viewerVGrid: Subject<DG.Grid> = new Subject<DG.Grid>();
|
|
14
15
|
private statsDf: Subject<DG.DataFrame> = new Subject<DG.DataFrame>();
|
|
15
|
-
private groupMapping: Subject<
|
|
16
|
+
private groupMapping: Subject<StringDictionary> = new Subject<StringDictionary>();
|
|
16
17
|
public viewerGrid$;
|
|
17
18
|
public viewerVGrid$;
|
|
18
19
|
public statsDf$;
|
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
import * as ui from 'datagrok-api/ui';
|
|
2
|
+
import * as DG from 'datagrok-api/dg';
|
|
3
|
+
import * as grok from 'datagrok-api/grok';
|
|
4
|
+
|
|
5
|
+
/** HELM associated sdf libraries with monomer processing*/
|
|
6
|
+
export class MonomerLibrary {
|
|
7
|
+
private monomerFields: string[] = ['molecule', 'MonomerType', 'MonomerNaturalAnalogCode', 'MonomerName', 'MonomerCode', 'MonomerCaps', 'BranchMonomer'];
|
|
8
|
+
private library: {
|
|
9
|
+
[name: string]: {
|
|
10
|
+
mol: string,
|
|
11
|
+
type: string,
|
|
12
|
+
analogueCode: string,
|
|
13
|
+
linkages: { [link: string]: { atomNumber: number, type: string } }
|
|
14
|
+
}
|
|
15
|
+
} = {};
|
|
16
|
+
private monomers: string[] = [];
|
|
17
|
+
|
|
18
|
+
constructor(sdf: string) {
|
|
19
|
+
//sdf = sdf.replaceAll('\n\[', '\[');
|
|
20
|
+
const sdfReader = new SDFReader();
|
|
21
|
+
const data = sdfReader.get_colls(sdf);
|
|
22
|
+
this.monomerFields.forEach((f) => {
|
|
23
|
+
if (!(f in data))
|
|
24
|
+
throw `Monomer library was not compiled: ${f} field is absent in provided file`;
|
|
25
|
+
|
|
26
|
+
if (data[f].length != data.molecule.length)
|
|
27
|
+
throw `Monomer library was not compiled: ${f} field is not presented for each monomer`;
|
|
28
|
+
});
|
|
29
|
+
|
|
30
|
+
for (let i = 0; i < data.molecule.length; i++) {
|
|
31
|
+
const linkData = this.getLinkData(data.molecule[i], data.MonomerCaps[i], data.MonomerName[i]);
|
|
32
|
+
const entry = {
|
|
33
|
+
mol: data.molecule[i],
|
|
34
|
+
type: 'Peptide',
|
|
35
|
+
code: data.MonomerCode[i],
|
|
36
|
+
analogueCode: data.MonomerNaturalAnalogCode[i],
|
|
37
|
+
linkages: linkData,
|
|
38
|
+
};
|
|
39
|
+
|
|
40
|
+
const name = data.MonomerCode[i] !== '.' ? data.MonomerCode[i] : data.MonomerName[i];
|
|
41
|
+
this.library[name] = entry;
|
|
42
|
+
this.monomers.push(data.MonomerName[i]);
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
/** getting full monomer information from monomer library*/
|
|
47
|
+
public getMonomerEntry(name: string) {
|
|
48
|
+
if (!this.monomers.includes(name))
|
|
49
|
+
throw `Monomer library do not contain ${name} monomer`;
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
return this.library[name];
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
/** getting mol as string for monomer*/
|
|
56
|
+
public getMonomerMol(name: string) {
|
|
57
|
+
if (!this.monomers.includes(name))
|
|
58
|
+
throw `Monomer library do not contain ${name} monomer`;
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
const entry = this.library[name];
|
|
62
|
+
let monomerMol = entry.mol.replace(/M RGP .+\n/, '');
|
|
63
|
+
|
|
64
|
+
//order matters
|
|
65
|
+
const links = Object.keys(entry.linkages);
|
|
66
|
+
for (let i = 0; i < links.length; i++)
|
|
67
|
+
monomerMol = monomerMol.replace('R#', entry.linkages[links[i]].type + ' ');
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
return monomerMol;
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
/** getting the list of the minomers available in library*/
|
|
74
|
+
get monomerNames() {
|
|
75
|
+
return this.monomers;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
private getLinkData(mol: string, caps: string, name: string) {
|
|
79
|
+
const rawData = mol.match(/M RGP .+/);
|
|
80
|
+
if (rawData === null)
|
|
81
|
+
throw `Monomer library was not compiled: ${name} entry has no RGP`;
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
const types: { [code: string]: string } = {};
|
|
85
|
+
caps.split('\n')?.forEach((e) => {
|
|
86
|
+
types[e.match(/\d+/)![0]] = e.match(/(?<=\])\w+/)![0];
|
|
87
|
+
});
|
|
88
|
+
|
|
89
|
+
const data = rawData![0].replace('M RGP ', '').split(/\s+/);
|
|
90
|
+
const res: { [link: string]: { atomNumber: number, type: string } } = {};
|
|
91
|
+
for (let i = 0; i < parseInt(data[0]); i++) {
|
|
92
|
+
const code = parseInt(data[2 * i + 2]);
|
|
93
|
+
let type = '';
|
|
94
|
+
switch (code) {
|
|
95
|
+
case 1:
|
|
96
|
+
type = 'N-terminal';
|
|
97
|
+
break;
|
|
98
|
+
case 2:
|
|
99
|
+
type = 'C-terminal';
|
|
100
|
+
break;
|
|
101
|
+
case 3:
|
|
102
|
+
type = 'branch';
|
|
103
|
+
break;
|
|
104
|
+
default:
|
|
105
|
+
break;
|
|
106
|
+
}
|
|
107
|
+
res[type] = {atomNumber: parseInt(data[2 * i + 1]), type: types[code]};
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
return res;
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
//TODO: merge with Chem version
|
|
115
|
+
class SDFReader {
|
|
116
|
+
dataColls: { [_: string]: any };
|
|
117
|
+
|
|
118
|
+
constructor() {
|
|
119
|
+
this.dataColls = {'molecule': []};
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
get_colls(content: string) {
|
|
123
|
+
this.read(content);
|
|
124
|
+
return this.dataColls;
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
read(content: string) {
|
|
128
|
+
content = content.replaceAll('\r', ''); //equalize old and new sdf standards
|
|
129
|
+
let startIndex = content.indexOf('$$$$', 0);
|
|
130
|
+
this.parse(content, 0, startIndex, (name: string, val: any) => { // TODO: type
|
|
131
|
+
this.dataColls[name] = [];
|
|
132
|
+
this.dataColls[name].push(val);
|
|
133
|
+
});
|
|
134
|
+
startIndex += 5;
|
|
135
|
+
while (startIndex > -1 && startIndex < content.length)
|
|
136
|
+
startIndex = this.readNext(content, startIndex);
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
readNext(content: string, startIndex: number) {
|
|
140
|
+
const nextStartIndex = content.indexOf('$$$$', startIndex);
|
|
141
|
+
if (nextStartIndex === -1)
|
|
142
|
+
return -1;
|
|
143
|
+
else {
|
|
144
|
+
this.parse(content, startIndex, nextStartIndex,
|
|
145
|
+
(name: string, val: number) => this.dataColls[name].push(val));
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
if (nextStartIndex > -1)
|
|
149
|
+
return nextStartIndex + 5;
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
return nextStartIndex;
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
parse(content: string, start: number, end: number, handler: any) {
|
|
156
|
+
const molEnd = +content.indexOf('M END\n', start) + 7;
|
|
157
|
+
let localEnd = start;
|
|
158
|
+
this.dataColls['molecule'].push(content.substr(start, molEnd - start));
|
|
159
|
+
|
|
160
|
+
start = molEnd;
|
|
161
|
+
while (localEnd < end) {
|
|
162
|
+
start = content.indexOf('> <', localEnd);
|
|
163
|
+
if (start === -1)
|
|
164
|
+
return;
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
start += 3;
|
|
168
|
+
localEnd = content.indexOf('>\n', start);
|
|
169
|
+
if (localEnd === -1)
|
|
170
|
+
return;
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
const propertyName = content.substring(start, localEnd);
|
|
174
|
+
start = localEnd + 2;
|
|
175
|
+
|
|
176
|
+
localEnd = content.indexOf('\n', start);
|
|
177
|
+
if (localEnd === -1)
|
|
178
|
+
localEnd = end;
|
|
179
|
+
else if (content[localEnd + 1] != '\n')
|
|
180
|
+
localEnd = content.indexOf('\n', ++localEnd);
|
|
181
|
+
;
|
|
182
|
+
|
|
183
|
+
handler(propertyName, content.substring(start, localEnd));
|
|
184
|
+
localEnd += 2;
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
}
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import * as DG from 'datagrok-api/dg';
|
|
2
|
+
|
|
3
|
+
import {runTests} from '@datagrok-libraries/utils/src/test';
|
|
4
|
+
|
|
5
|
+
import './tests/peptide-space-test';
|
|
6
|
+
import './tests/peptides-tests';
|
|
7
|
+
import './tests/msa-tests';
|
|
8
|
+
|
|
9
|
+
export const _package = new DG.Package();
|
|
10
|
+
|
|
11
|
+
//name: test
|
|
12
|
+
//input: string category {optional: true}
|
|
13
|
+
//input: string test {optional: true}
|
|
14
|
+
//output: dataframe result
|
|
15
|
+
//top-menu: Tools | Dev | JS API Tests
|
|
16
|
+
export async function test(category: string, test: string): Promise<DG.DataFrame> {
|
|
17
|
+
const data = await runTests({category, test});
|
|
18
|
+
return DG.DataFrame.fromObjects(data)!;
|
|
19
|
+
}
|