@datagrok/bio 2.18.2 → 2.18.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.eslintrc.json +1 -0
- package/CHANGELOG.md +9 -0
- package/dist/455.js +1 -1
- package/dist/455.js.map +1 -1
- package/dist/package-test.js +3 -3
- package/dist/package-test.js.map +1 -1
- package/dist/package.js +2 -2
- package/dist/package.js.map +1 -1
- package/package.json +2 -2
- package/src/analysis/sequence-search-base-viewer.ts +1 -1
- package/src/package.ts +2 -1
- package/src/tests/seq-handler-get-region-tests.ts +2 -1
- package/src/utils/monomer-lib/monomer-manager/duplicate-monomer-manager.ts +2 -0
- package/src/utils/monomer-lib/monomer-manager/monomer-manager.ts +33 -3
- package/src/utils/seq-helper/seq-handler.ts +10 -1
- package/src/utils/seq-helper/seq-helper.ts +1 -1
- package/src/widgets/composition-analysis-widget.ts +3 -1
- package/test-console-output-1.log +346 -348
- package/test-record-1.mp4 +0 -0
package/package.json
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
"name": "Leonid Stolbov",
|
|
6
6
|
"email": "lstolbov@datagrok.ai"
|
|
7
7
|
},
|
|
8
|
-
"version": "2.18.
|
|
8
|
+
"version": "2.18.4",
|
|
9
9
|
"description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
|
|
10
10
|
"repository": {
|
|
11
11
|
"type": "git",
|
|
@@ -44,7 +44,7 @@
|
|
|
44
44
|
],
|
|
45
45
|
"dependencies": {
|
|
46
46
|
"@biowasm/aioli": "^3.1.0",
|
|
47
|
-
"@datagrok-libraries/bio": "^5.
|
|
47
|
+
"@datagrok-libraries/bio": "^5.49.1",
|
|
48
48
|
"@datagrok-libraries/chem-meta": "^1.2.7",
|
|
49
49
|
"@datagrok-libraries/math": "^1.2.4",
|
|
50
50
|
"@datagrok-libraries/ml": "^6.7.6",
|
|
@@ -70,7 +70,7 @@ export class SequenceSearchBaseViewer extends DG.JsViewer {
|
|
|
70
70
|
this.render();
|
|
71
71
|
}
|
|
72
72
|
|
|
73
|
-
/** For tests */ public computeRequested: boolean;
|
|
73
|
+
/** For tests */ public computeRequested: boolean = false;
|
|
74
74
|
public renderPromise: Promise<void> = Promise.resolve();
|
|
75
75
|
|
|
76
76
|
protected render(computeData = true): void {
|
package/src/package.ts
CHANGED
|
@@ -979,7 +979,8 @@ export async function manageLibrariesApp(): Promise<DG.View> {
|
|
|
979
979
|
export async function manageLibrariesAppTreeBrowser(treeNode: DG.TreeViewGroup, browseView: DG.BrowseView) {
|
|
980
980
|
const libraries = (await (await MonomerLibManager.getInstance()).getFileManager()).getValidLibraryPaths();
|
|
981
981
|
libraries.forEach((libName) => {
|
|
982
|
-
const
|
|
982
|
+
const nodeName = libName.endsWith('.json') ? libName.substring(0, libName.length - 5) : libName;
|
|
983
|
+
const libNode = treeNode.item(nodeName);
|
|
983
984
|
// eslint-disable-next-line rxjs/no-ignored-subscription, rxjs/no-async-subscribe
|
|
984
985
|
libNode.onSelected.subscribe(async () => {
|
|
985
986
|
const monomerManager = await MonomerManager.getNewInstance();
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
/* eslint-disable max-lines-per-function */
|
|
1
2
|
import * as grok from 'datagrok-api/grok';
|
|
2
3
|
import * as DG from 'datagrok-api/dg';
|
|
3
4
|
|
|
@@ -68,7 +69,7 @@ PEPTIDE1{N.T.[dE].[Thr_PO3H2]}$$$$
|
|
|
68
69
|
PEPTIDE1{[Cys_SEt].T.[dK].[Thr_PO3H2]}$$$$
|
|
69
70
|
PEPTIDE1{[Cys_SEt].T.*.*}$$$$`,
|
|
70
71
|
units: NOTATION.HELM,
|
|
71
|
-
alphabet:
|
|
72
|
+
alphabet: ALPHABET.UN,
|
|
72
73
|
|
|
73
74
|
positionNames: {tag: null, start: '4', end: '7'}
|
|
74
75
|
}
|
|
@@ -97,6 +97,8 @@ export class DuplicateMonomerManager {
|
|
|
97
97
|
DuplicateMonomerManager._instance = new DuplicateMonomerManager();
|
|
98
98
|
await DuplicateMonomerManager._instance.refresh();
|
|
99
99
|
const libManager = await MonomerLibManager.getInstance();
|
|
100
|
+
// reason: subscription happens only once, and is needed throught the lifetime of the app
|
|
101
|
+
// eslint-disable-next-line rxjs/no-async-subscribe, rxjs/no-ignored-subscription
|
|
100
102
|
libManager.getMonomerLib().onChanged.subscribe(async () => await DuplicateMonomerManager._instance.refresh());
|
|
101
103
|
}
|
|
102
104
|
DuplicateMonomerManager._instance.refresh();
|
|
@@ -1028,6 +1028,7 @@ function getCorrectedMolBlock(molBlock: string) {
|
|
|
1028
1028
|
// 2. RGP field is present in the correct format
|
|
1029
1029
|
// 3. R group labels are written as R# and not just R
|
|
1030
1030
|
// 4. there is no ISO field in the molblock. if there is, it needs to be substituted with RGP field and thats it.
|
|
1031
|
+
// 5. make sure that R groups have no metadata in the atomblocks
|
|
1031
1032
|
|
|
1032
1033
|
const lines = molBlock.split('\n');
|
|
1033
1034
|
|
|
@@ -1055,7 +1056,7 @@ function getCorrectedMolBlock(molBlock: string) {
|
|
|
1055
1056
|
rgroupLineNumbers[atomI - molStartIdx] = rgroupNum;
|
|
1056
1057
|
}
|
|
1057
1058
|
|
|
1058
|
-
const rgroupLineNums = Object.
|
|
1059
|
+
const rgroupLineNums = Object.keys(rgroupLineNumbers);
|
|
1059
1060
|
// find and possibly add rgp field
|
|
1060
1061
|
|
|
1061
1062
|
const rgpLineIdx = lines.findIndex((line) => line.startsWith('M') && line.includes('RGP'));
|
|
@@ -1066,6 +1067,23 @@ function getCorrectedMolBlock(molBlock: string) {
|
|
|
1066
1067
|
const mEndIdx = lines.findIndex((line) => line.startsWith('M') && line.includes('END'));
|
|
1067
1068
|
lines.splice(mEndIdx, 0, rgpLine);
|
|
1068
1069
|
}
|
|
1070
|
+
|
|
1071
|
+
//make sure that R# lines do not have any metadata that can be interpreted as isotopes or anything else
|
|
1072
|
+
//for example, following line could be interpreted as isotope with mass 2 in some cases
|
|
1073
|
+
//" 3.9970 0.3462 0.0000 R# 0 0 0 0 0 1 0 0 0 0 2 0"
|
|
1074
|
+
const rGroupActualLines = rgroupLineNums.filter((rLine) => !!Number.parseInt(rLine)).map((atomLine) => Number.parseInt(atomLine) + molStartIdx);
|
|
1075
|
+
rGroupActualLines.forEach((lineIdx) => {
|
|
1076
|
+
const splitLine = lines[lineIdx].split(' ');
|
|
1077
|
+
const rIdx = splitLine.findIndex((s) => s === 'R#');
|
|
1078
|
+
if (rIdx === -1)
|
|
1079
|
+
return;
|
|
1080
|
+
for (let i = rIdx + 1; i < splitLine.length; i++) {
|
|
1081
|
+
if (!!splitLine[i] && splitLine[i].length == 1 && (Number.parseInt(splitLine[i]) ?? 0) > 0)
|
|
1082
|
+
splitLine[i] = '0';
|
|
1083
|
+
}
|
|
1084
|
+
lines[lineIdx] = splitLine.join(' ');
|
|
1085
|
+
});
|
|
1086
|
+
|
|
1069
1087
|
return lines.join('\n');
|
|
1070
1088
|
}
|
|
1071
1089
|
|
|
@@ -1087,12 +1105,24 @@ function monomerFromDfRow(dfRow: DG.Row): Monomer {
|
|
|
1087
1105
|
if (typeof metaJSON[key] === 'object')
|
|
1088
1106
|
metaJSON[key] = JSON.stringify(metaJSON[key]);
|
|
1089
1107
|
}
|
|
1108
|
+
const smiles = dfRow.get(MONOMER_DF_COLUMN_NAMES.MONOMER);
|
|
1109
|
+
if (!smiles)
|
|
1110
|
+
throw new Error('Monomer SMILES is empty');
|
|
1111
|
+
let molfile = '';
|
|
1112
|
+
|
|
1113
|
+
try {
|
|
1114
|
+
molfile = grok.chem.convert(smiles, DG.chem.Notation.Smiles, DG.chem.Notation.MolBlock);
|
|
1115
|
+
molfile = getCorrectedMolBlock(molfile);
|
|
1116
|
+
} catch (e) {
|
|
1117
|
+
grok.shell.error(`Error converting SMILES to molfile, \n ${smiles}`);
|
|
1118
|
+
console.error(e);
|
|
1119
|
+
}
|
|
1090
1120
|
|
|
1091
1121
|
return {
|
|
1092
1122
|
symbol: dfRow.get(MONOMER_DF_COLUMN_NAMES.SYMBOL),
|
|
1093
1123
|
name: dfRow.get(MONOMER_DF_COLUMN_NAMES.NAME),
|
|
1094
|
-
molfile:
|
|
1095
|
-
smiles:
|
|
1124
|
+
molfile: molfile,
|
|
1125
|
+
smiles: smiles,
|
|
1096
1126
|
polymerType: dfRow.get(MONOMER_DF_COLUMN_NAMES.POLYMER_TYPE),
|
|
1097
1127
|
monomerType: dfRow.get(MONOMER_DF_COLUMN_NAMES.MONOMER_TYPE),
|
|
1098
1128
|
naturalAnalog: dfRow.get(MONOMER_DF_COLUMN_NAMES.NATURAL_ANALOG),
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
/* eslint-disable max-lines */
|
|
2
|
+
/* eslint-disable @typescript-eslint/no-unused-vars */
|
|
1
3
|
import * as DG from 'datagrok-api/dg';
|
|
2
4
|
|
|
3
5
|
import wu from 'wu';
|
|
@@ -5,7 +7,7 @@ import wu from 'wu';
|
|
|
5
7
|
/* eslint-disable max-len */
|
|
6
8
|
import {ALIGNMENT, ALPHABET, candidateAlphabets, getSplitterWithSeparator, NOTATION, positionSeparator, splitterAsFasta, splitterAsHelm, TAGS} from '@datagrok-libraries/bio/src/utils/macromolecule/index';
|
|
7
9
|
import {INotationProvider, ISeqSplitted, SeqColStats, SplitterFunc,} from '@datagrok-libraries/bio/src/utils/macromolecule/types';
|
|
8
|
-
import {detectAlphabet, splitterAsFastaSimple, StringListSeqSplitted} from '@datagrok-libraries/bio/src/utils/macromolecule/utils';
|
|
10
|
+
import {detectAlphabet, detectHelmAlphabet, splitterAsFastaSimple, StringListSeqSplitted} from '@datagrok-libraries/bio/src/utils/macromolecule/utils';
|
|
9
11
|
import {mmDistanceFunctions, MmDistanceFunctionsNames} from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
|
|
10
12
|
import {mmDistanceFunctionType} from '@datagrok-libraries/ml/src/macromolecule-distance-functions/types';
|
|
11
13
|
import {getMonomerLibHelper, IMonomerLibHelper} from '@datagrok-libraries/bio/src/monomer-works/monomer-utils';
|
|
@@ -123,6 +125,12 @@ export class SeqHandler implements ISeqHandler {
|
|
|
123
125
|
uh.column.setTag(TAGS.alphabetSize, alphabetSize.toString());
|
|
124
126
|
uh.column.setTag(TAGS.alphabetIsMultichar, alphabetIsMultichar ? 'true' : 'false');
|
|
125
127
|
}
|
|
128
|
+
} else if (units === NOTATION.HELM) {
|
|
129
|
+
let alphabet = uh.column.getTag(TAGS.alphabet);
|
|
130
|
+
if (alphabet === null) {
|
|
131
|
+
alphabet = detectHelmAlphabet(uh.stats.freq, candidateAlphabets, uh.defaultGapOriginal);
|
|
132
|
+
uh.column.setTag(TAGS.alphabet, alphabet);
|
|
133
|
+
}
|
|
126
134
|
}
|
|
127
135
|
}
|
|
128
136
|
|
|
@@ -539,6 +547,7 @@ export class SeqHandler implements ISeqHandler {
|
|
|
539
547
|
// convert the peptides list to a set for faster lookup
|
|
540
548
|
const peptidesSet = new Set(peptides);
|
|
541
549
|
// get splitter for given separator and check if all monomers are in the lib
|
|
550
|
+
// eslint-disable-next-line @typescript-eslint/no-unused-vars
|
|
542
551
|
const splitterFunc = getSplitterWithSeparator(this.separator!);
|
|
543
552
|
// iterate over the columns, split them and check if all monomers are in the lib
|
|
544
553
|
//TODO maybe add missing threshold so that if there are not too many missing monomers
|
|
@@ -87,7 +87,7 @@ export class SeqHelper implements ISeqHelper {
|
|
|
87
87
|
|
|
88
88
|
//#endregion From HelmToMolfileConverter
|
|
89
89
|
|
|
90
|
-
const molHlList = molfilesV3K.map((item: MolfileWithMap) => getMolHighlight(item.monomers.values(), monomerLib));
|
|
90
|
+
//const molHlList = molfilesV3K.map((item: MolfileWithMap) => getMolHighlight(item.monomers.values(), monomerLib));
|
|
91
91
|
|
|
92
92
|
const molCol = DG.Column.fromStrings(molColName, molList);
|
|
93
93
|
molCol.semType = DG.SEMTYPE.MOLECULE;
|
|
@@ -26,7 +26,9 @@ export function getCompositionAnalysisWidget(
|
|
|
26
26
|
const rowIdx = val.cell.rowIndex;
|
|
27
27
|
const seqSS = sh.getSplitted(rowIdx);
|
|
28
28
|
wu.count(0).take(seqSS.length).filter((posIdx) => !seqSS.isGap(posIdx)).forEach((posIdx) => {
|
|
29
|
-
|
|
29
|
+
let cm = seqSS.getCanonical(posIdx);
|
|
30
|
+
if (biotype === HelmTypes.NUCLEOTIDE && sh.isHelm() && cm[1] === '(' && cm[cm.length - 2] === ')')
|
|
31
|
+
cm = cm.substring(2, cm.length - 2);
|
|
30
32
|
const count = counts[cm] || 0;
|
|
31
33
|
counts[cm] = count + 1;
|
|
32
34
|
});
|