@datagrok/bio 2.22.10 → 2.22.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +6 -0
- package/dist/package-test.js +3 -3
- package/dist/package-test.js.map +1 -1
- package/dist/package.js +1 -1
- package/dist/package.js.map +1 -1
- package/dockerfiles/container.json +2 -2
- package/files/monomer-libraries/polytool-lib.json +227 -1
- package/package.json +3 -3
- package/src/package-api.ts +4 -0
- package/src/package.g.ts +37 -24
- package/src/package.ts +19 -11
- package/src/tests/renderers-test.ts +1 -1
- package/src/tests/viewers.ts +11 -16
- package/src/utils/monomer-lib/consts.ts +1 -1
- package/src/utils/monomer-lib/monomer-manager/monomer-manager.ts +48 -1
- package/src/utils/save-as-fasta.ts +25 -22
- package/test-console-output-1.log +1125 -3131
- package/test-record-1.mp4 +0 -0
|
@@ -57,6 +57,53 @@ export async function standardiseMonomers(monomers: Monomer[]) {
|
|
|
57
57
|
return fixedMonomers;
|
|
58
58
|
}
|
|
59
59
|
|
|
60
|
+
/// matches molecules in the dataframe with monomers in the library by canonical smiles
|
|
61
|
+
export async function matchMoleculesWithMonomers(molDf: DG.DataFrame, molColName: string, monomerLib: IMonomerLib, polymerType: PolymerType = 'PEPTIDE'): Promise<DG.DataFrame> {
|
|
62
|
+
const converterFunc = DG.Func.find({package: 'Chem', name: 'convertMoleculeNotation'})[0];
|
|
63
|
+
if (!converterFunc)
|
|
64
|
+
throw new Error('Function convertMoleculeNotation not found, please install Chem package');
|
|
65
|
+
// first: stamdardize monomers
|
|
66
|
+
const monomers = monomerLib.getMonomerSymbolsByType(polymerType).map((s) => monomerLib.getMonomer(polymerType, s)!).filter((m) => m && (m.smiles || m.molfile));
|
|
67
|
+
const fixedMonomers = await standardiseMonomers(monomers);
|
|
68
|
+
const cappedSmilse = fixedMonomers.map((m, i) => ({sym: m.symbol, smiles: capSmiles(m.smiles ?? '', m.rgroups ?? []), original: m.smiles, source: monomers[i]?.lib?.source})).filter((s) => !!s?.smiles && !s.smiles.includes('[*:'));
|
|
69
|
+
// canonicalize all monomer smiles
|
|
70
|
+
const monomerSmilesCol = DG.Column.fromList(DG.COLUMN_TYPE.STRING, 'MonomerSmiles', cappedSmilse.map((m) => m.smiles!));
|
|
71
|
+
monomerSmilesCol.semType = DG.SEMTYPE.MOLECULE;
|
|
72
|
+
const canonicalizedMonomersSmilesCol: DG.Column = await converterFunc.apply({molecule: monomerSmilesCol, targetNotation: DG.chem.Notation.Smiles});
|
|
73
|
+
if (!canonicalizedMonomersSmilesCol || canonicalizedMonomersSmilesCol.length !== monomerSmilesCol.length)
|
|
74
|
+
throw new Error('Error canonicalizing monomer smiles');
|
|
75
|
+
canonicalizedMonomersSmilesCol.toList().forEach((s, i) => cappedSmilse[i].smiles = s);
|
|
76
|
+
|
|
77
|
+
const molecules = molDf.col(molColName)!;
|
|
78
|
+
const canonicalizedMoleculesCol: DG.Column = await converterFunc.apply({molecule: molecules, targetNotation: DG.chem.Notation.Smiles});
|
|
79
|
+
if (!canonicalizedMoleculesCol || canonicalizedMoleculesCol.length !== molecules.length)
|
|
80
|
+
throw new Error('Error canonicalizing molecules');
|
|
81
|
+
|
|
82
|
+
const canonicalizedMolecules = canonicalizedMoleculesCol.toList();
|
|
83
|
+
|
|
84
|
+
const resultDf = molDf.clone();
|
|
85
|
+
const matchingMonomerSmilesCol = resultDf.columns.addNewString(resultDf.columns.getUnusedName('Matched monomer smiles'));
|
|
86
|
+
matchingMonomerSmilesCol.semType = DG.SEMTYPE.MOLECULE;
|
|
87
|
+
const matchingMonomerSymbolCol = resultDf.columns.addNewString(resultDf.columns.getUnusedName('Matched monomer symbol'));
|
|
88
|
+
matchingMonomerSymbolCol.semType = 'Monomer';
|
|
89
|
+
const sourceLibCol = resultDf.columns.addNewString(resultDf.columns.getUnusedName('Matched monomer source'));
|
|
90
|
+
resultDf.columns.setOrder([molColName, matchingMonomerSymbolCol.name, matchingMonomerSmilesCol.name, sourceLibCol.name]);
|
|
91
|
+
|
|
92
|
+
for (let i = 0; i < canonicalizedMolecules.length; i++) {
|
|
93
|
+
const mol = canonicalizedMolecules[i];
|
|
94
|
+
if (!mol) continue;
|
|
95
|
+
for (let j = 0; j < cappedSmilse.length; j++) {
|
|
96
|
+
if (cappedSmilse[j].smiles === mol) {
|
|
97
|
+
matchingMonomerSmilesCol.set(i, cappedSmilse[j].original!, false);
|
|
98
|
+
matchingMonomerSymbolCol.set(i, cappedSmilse[j].sym, false);
|
|
99
|
+
sourceLibCol.set(i, cappedSmilse[j].source ?? '', false);
|
|
100
|
+
break;
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
return resultDf;
|
|
105
|
+
}
|
|
106
|
+
|
|
60
107
|
/** Standardizes the monomer library
|
|
61
108
|
* warning: throws error if the library is not valid or has invalid monomers
|
|
62
109
|
*/
|
|
@@ -127,7 +174,7 @@ export function getMonomersDataFrame(monomers: Monomer[]) {
|
|
|
127
174
|
monomers[i].id,
|
|
128
175
|
JSON.stringify(monomers[i].meta ?? {}),
|
|
129
176
|
monomers[i].lib?.source ?? '',
|
|
130
|
-
]);
|
|
177
|
+
], false);
|
|
131
178
|
// something is wrong with setting dates, so setting it manually for now
|
|
132
179
|
try {
|
|
133
180
|
if (date)
|
|
@@ -15,23 +15,21 @@ const FASTA_LINE_WIDTH = 60;
|
|
|
15
15
|
export function saveAsFastaUI(): void {
|
|
16
16
|
// Use grid for column order adjusted by user
|
|
17
17
|
const grid: DG.Grid = grok.shell.tv.grid;
|
|
18
|
+
const dataFrame: DG.DataFrame = grid.dataFrame;
|
|
18
19
|
|
|
19
|
-
const idGColList: DG.
|
|
20
|
-
.map((colI: number) =>
|
|
21
|
-
.filter((
|
|
22
|
-
const defaultIdGCol: DG.
|
|
23
|
-
.find((
|
|
24
|
-
const idDefaultValue = defaultIdGCol ? [defaultIdGCol
|
|
20
|
+
const idGColList: DG.Column[] = wu.count(0).take(dataFrame.columns.length)
|
|
21
|
+
.map((colI: number) => dataFrame.columns.byIndex(colI)!)
|
|
22
|
+
.filter((col: DG.Column) => col.semType !== DG.SEMTYPE.MACROMOLECULE).toArray();
|
|
23
|
+
const defaultIdGCol: DG.Column | undefined = idGColList
|
|
24
|
+
.find((col: DG.Column) => col.name.toLowerCase().indexOf('id') !== -1);
|
|
25
|
+
const idDefaultValue = defaultIdGCol ? [defaultIdGCol] : [];
|
|
25
26
|
|
|
26
|
-
const idGColListInput = ui.input.
|
|
27
|
-
|
|
28
|
-
items: idGColList.map((gcol: DG.GridColumn) => gcol.name)
|
|
29
|
-
});
|
|
27
|
+
const idGColListInput = ui.input.columns('Seq id columns', {table: dataFrame, value: idDefaultValue,
|
|
28
|
+
filter: (col: DG.Column) => col.semType !== DG.SEMTYPE.MACROMOLECULE});
|
|
30
29
|
|
|
31
|
-
const seqGColList: DG.
|
|
32
|
-
.map((colI: number) =>
|
|
33
|
-
.filter((
|
|
34
|
-
const col: DG.Column | null = gc.column;
|
|
30
|
+
const seqGColList: DG.Column[] = wu.count(0).take(dataFrame.columns.length)/* range rom 0 to grid.columns.length */
|
|
31
|
+
.map((colI: number) => dataFrame.columns.byIndex(colI)!)
|
|
32
|
+
.filter((col: DG.Column) => {
|
|
35
33
|
if (col && col.semType === DG.SEMTYPE.MACROMOLECULE) {
|
|
36
34
|
const sh = _package.seqHelper.getSeqHandler(col);
|
|
37
35
|
return sh.isFasta();
|
|
@@ -39,10 +37,17 @@ export function saveAsFastaUI(): void {
|
|
|
39
37
|
return false;
|
|
40
38
|
}).toArray();
|
|
41
39
|
|
|
42
|
-
const seqDefaultValue = seqGColList.length > 0 ? seqGColList[0]
|
|
43
|
-
const seqColInput = ui.input.
|
|
44
|
-
|
|
45
|
-
|
|
40
|
+
const seqDefaultValue = seqGColList.length > 0 ? seqGColList[0] : null;
|
|
41
|
+
const seqColInput = ui.input.column('Seq column', {
|
|
42
|
+
table: dataFrame,
|
|
43
|
+
value: seqDefaultValue!,
|
|
44
|
+
filter: (col) => {
|
|
45
|
+
if (col && col.semType === DG.SEMTYPE.MACROMOLECULE) {
|
|
46
|
+
const sh = _package.seqHelper.getSeqHandler(col);
|
|
47
|
+
return sh.isFasta();
|
|
48
|
+
}
|
|
49
|
+
return false;
|
|
50
|
+
}
|
|
46
51
|
});
|
|
47
52
|
|
|
48
53
|
const lineWidthInput = ui.input.int('FASTA line width', {value: FASTA_LINE_WIDTH});
|
|
@@ -54,10 +59,8 @@ export function saveAsFastaUI(): void {
|
|
|
54
59
|
lineWidthInput,
|
|
55
60
|
]))
|
|
56
61
|
.onOK(() => {
|
|
57
|
-
const valueIdColList: DG.Column[] = idGColListInput.value
|
|
58
|
-
|
|
59
|
-
const valueSeqCol: DG.Column | null = seqColInput.value ?
|
|
60
|
-
grid.columns.byName(seqColInput.value as string)!.column : null;
|
|
62
|
+
const valueIdColList: DG.Column[] = idGColListInput.value ?? [];
|
|
63
|
+
const valueSeqCol: DG.Column | null = seqColInput.value ?? null;
|
|
61
64
|
const valueLineWidth = lineWidthInput.value ?? FASTA_LINE_WIDTH;
|
|
62
65
|
|
|
63
66
|
if (!valueSeqCol)
|