@datagrok-libraries/bio 5.36.1 → 5.37.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md
CHANGED
|
@@ -1,10 +1,17 @@
|
|
|
1
1
|
# bio changelog
|
|
2
2
|
|
|
3
|
-
## 5.37.0 (
|
|
3
|
+
## 5.37.0 (2023-08-30)
|
|
4
|
+
|
|
5
|
+
### Features
|
|
6
|
+
|
|
7
|
+
* Added `calculateSimilarity` and `calculateIdentity` functions.
|
|
8
|
+
* Added `calculateScores` function.
|
|
9
|
+
|
|
10
|
+
## 5.36.1 (2023-08-18)
|
|
4
11
|
|
|
5
12
|
### Bug fixes
|
|
6
13
|
|
|
7
|
-
* Restore utils getSplitter method, it is required for MacromoleculeDifference column
|
|
14
|
+
* Restore utils `getSplitter` method, it is required for MacromoleculeDifference column.
|
|
8
15
|
|
|
9
16
|
## 5.36.0 (2023-08-10)
|
|
10
17
|
|
package/package.json
CHANGED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
import { ISeqSplitted } from '../../utils/macromolecule/types';
|
|
2
|
+
import * as DG from 'datagrok-api/dg';
|
|
3
|
+
export declare enum SCORE {
|
|
4
|
+
IDENTITY = "identity",
|
|
5
|
+
SIMILARITY = "similarity"
|
|
6
|
+
}
|
|
7
|
+
/** Performs transformations and sequence scoring.
|
|
8
|
+
* @param {DG.DataFrame} table Table to attach results to.
|
|
9
|
+
* @param {DG.Column<string>} col Sequences column to score. Must have Macromolecule semantic type.
|
|
10
|
+
* @param {string} ref Reference sequence to score against.
|
|
11
|
+
* @param {SCORE} scoring Scoring method.
|
|
12
|
+
* @returns {DG.Column<number>} Scores column. */
|
|
13
|
+
export declare function calculateScores(table: DG.DataFrame, col: DG.Column<string>, ref: string, scoring: SCORE): Promise<DG.Column<number>>;
|
|
14
|
+
/** Calculates identity scores as fraction of matching monomers on the same position.
|
|
15
|
+
* @param {ISeqSplitted} reference Splitted reference sequence.
|
|
16
|
+
* @param {DG.DataFrame} positionsDf Table which only contains position columns with semantic type Monomer.
|
|
17
|
+
* @returns {DG.Column<number>} Scores column. */
|
|
18
|
+
export declare function calculateIdentity(reference: ISeqSplitted, positionsDf: DG.DataFrame): DG.Column<number>;
|
|
19
|
+
/** Calculates similarity scores as sum of monomer fingerprint similarities on the same position.
|
|
20
|
+
* @param {ISeqSplitted} reference Splitted reference sequence.
|
|
21
|
+
* @param {DG.DataFrame} positionsDf Table which only contains position columns with semantic type Monomer.
|
|
22
|
+
* @returns {DG.Column<number>} Scores column. */
|
|
23
|
+
export declare function calculateSimilarity(reference: ISeqSplitted, positionsDf: DG.DataFrame): Promise<DG.Column<number>>;
|
|
24
|
+
//# sourceMappingURL=scoring.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"scoring.d.ts","sourceRoot":"","sources":["scoring.ts"],"names":[],"mappings":"AAEA,OAAO,EAAC,YAAY,EAAC,MAAM,iCAAiC,CAAC;AAC7D,OAAO,KAAK,EAAE,MAAM,iBAAiB,CAAC;AAKtC,oBAAY,KAAK;IACf,QAAQ,aAAa;IACrB,UAAU,eAAe;CAC1B;AAED;;;;;iDAKiD;AACjD,wBAAsB,eAAe,CAAC,KAAK,EAAE,EAAE,CAAC,SAAS,EAAE,GAAG,EAAE,EAAE,CAAC,MAAM,CAAC,MAAM,CAAC,EAAE,GAAG,EAAE,MAAM,EAAE,OAAO,EAAE,KAAK,GAAG,OAAO,CAAC,EAAE,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,CAY1I;AAED;;;iDAGiD;AACjD,wBAAgB,iBAAiB,CAAC,SAAS,EAAE,YAAY,EAAE,WAAW,EAAE,EAAE,CAAC,SAAS,GAAG,EAAE,CAAC,MAAM,CAAC,MAAM,CAAC,CA0BvG;AAED;;;iDAGiD;AACjD,wBAAsB,mBAAmB,CAAC,SAAS,EAAE,YAAY,EAAE,WAAW,EAAE,EAAE,CAAC,SAAS,GAAG,OAAO,CAAC,EAAE,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,CAIxH"}
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
import { sequenceChemSimilarity } from '../../monomer-works/monomer-utils';
|
|
2
|
+
import * as DG from 'datagrok-api/dg';
|
|
3
|
+
import { splitAlignedSequences } from '../splitter';
|
|
4
|
+
import { getSplitter } from './utils';
|
|
5
|
+
export var SCORE;
|
|
6
|
+
(function (SCORE) {
|
|
7
|
+
SCORE["IDENTITY"] = "identity";
|
|
8
|
+
SCORE["SIMILARITY"] = "similarity";
|
|
9
|
+
})(SCORE || (SCORE = {}));
|
|
10
|
+
/** Performs transformations and sequence scoring.
|
|
11
|
+
* @param {DG.DataFrame} table Table to attach results to.
|
|
12
|
+
* @param {DG.Column<string>} col Sequences column to score. Must have Macromolecule semantic type.
|
|
13
|
+
* @param {string} ref Reference sequence to score against.
|
|
14
|
+
* @param {SCORE} scoring Scoring method.
|
|
15
|
+
* @returns {DG.Column<number>} Scores column. */
|
|
16
|
+
export async function calculateScores(table, col, ref, scoring) {
|
|
17
|
+
const splitSeqDf = splitAlignedSequences(col);
|
|
18
|
+
const splitter = getSplitter(col.getTag(DG.TAGS.UNITS), col.getTag("separator" /* bioTAGS.separator */));
|
|
19
|
+
const refSplitted = splitter(ref);
|
|
20
|
+
const scoresCol = scoring === SCORE.IDENTITY ? calculateIdentity(refSplitted, splitSeqDf) :
|
|
21
|
+
scoring === SCORE.SIMILARITY ? await calculateSimilarity(refSplitted, splitSeqDf) : null;
|
|
22
|
+
if (scoresCol === null)
|
|
23
|
+
throw new Error(`In bio library: Unkown sequence scoring method: ${scoring}`);
|
|
24
|
+
scoresCol.name = table.columns.getUnusedName(scoresCol.name);
|
|
25
|
+
table.columns.add(scoresCol);
|
|
26
|
+
return scoresCol;
|
|
27
|
+
}
|
|
28
|
+
/** Calculates identity scores as fraction of matching monomers on the same position.
|
|
29
|
+
* @param {ISeqSplitted} reference Splitted reference sequence.
|
|
30
|
+
* @param {DG.DataFrame} positionsDf Table which only contains position columns with semantic type Monomer.
|
|
31
|
+
* @returns {DG.Column<number>} Scores column. */
|
|
32
|
+
export function calculateIdentity(reference, positionsDf) {
|
|
33
|
+
const numPositions = positionsDf.columns.length;
|
|
34
|
+
const positionCols = new Array(numPositions);
|
|
35
|
+
const positionEmptyCategories = new Array(numPositions);
|
|
36
|
+
const categoryIndexesTemplate = new Array(numPositions);
|
|
37
|
+
for (let posIdx = 0; posIdx < numPositions; ++posIdx) {
|
|
38
|
+
const posCol = positionsDf.columns.byIndex(posIdx);
|
|
39
|
+
positionCols[posIdx] = posCol.getRawData();
|
|
40
|
+
positionEmptyCategories[posIdx] = posCol.categories.indexOf('');
|
|
41
|
+
categoryIndexesTemplate[posIdx] = posCol.categories.indexOf(reference[posIdx] ?? '');
|
|
42
|
+
}
|
|
43
|
+
const identityScoresCol = DG.Column.float('Identity', positionsDf.rowCount);
|
|
44
|
+
const identityScoresData = identityScoresCol.getRawData();
|
|
45
|
+
for (let rowIndex = 0; rowIndex < positionsDf.rowCount; ++rowIndex) {
|
|
46
|
+
identityScoresData[rowIndex] = 0;
|
|
47
|
+
for (let posIdx = 0; posIdx < reference.length; ++posIdx) {
|
|
48
|
+
const categoryIndex = positionCols[posIdx][rowIndex];
|
|
49
|
+
if (categoryIndex === categoryIndexesTemplate[posIdx])
|
|
50
|
+
++identityScoresData[rowIndex];
|
|
51
|
+
}
|
|
52
|
+
identityScoresData[rowIndex] /= reference.length;
|
|
53
|
+
}
|
|
54
|
+
return identityScoresCol;
|
|
55
|
+
}
|
|
56
|
+
/** Calculates similarity scores as sum of monomer fingerprint similarities on the same position.
|
|
57
|
+
* @param {ISeqSplitted} reference Splitted reference sequence.
|
|
58
|
+
* @param {DG.DataFrame} positionsDf Table which only contains position columns with semantic type Monomer.
|
|
59
|
+
* @returns {DG.Column<number>} Scores column. */
|
|
60
|
+
export async function calculateSimilarity(reference, positionsDf) {
|
|
61
|
+
const monomerColumns = positionsDf.columns.toList();
|
|
62
|
+
const scoresCol = await sequenceChemSimilarity(monomerColumns, reference);
|
|
63
|
+
return scoresCol;
|
|
64
|
+
}
|
|
65
|
+
//# sourceMappingURL=scoring.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"scoring.js","sourceRoot":"","sources":["scoring.ts"],"names":[],"mappings":"AACA,OAAO,EAAC,sBAAsB,EAAC,MAAM,mCAAmC,CAAC;AAEzE,OAAO,KAAK,EAAE,MAAM,iBAAiB,CAAC;AACtC,OAAO,EAAE,qBAAqB,EAAE,MAAM,aAAa,CAAC;AACpD,OAAO,EAAE,WAAW,EAAE,MAAM,SAAS,CAAC;AAGtC,MAAM,CAAN,IAAY,KAGX;AAHD,WAAY,KAAK;IACf,8BAAqB,CAAA;IACrB,kCAAyB,CAAA;AAC3B,CAAC,EAHW,KAAK,KAAL,KAAK,QAGhB;AAED;;;;;iDAKiD;AACjD,MAAM,CAAC,KAAK,UAAU,eAAe,CAAC,KAAmB,EAAE,GAAsB,EAAE,GAAW,EAAE,OAAc;IAC5G,MAAM,UAAU,GAAG,qBAAqB,CAAC,GAAG,CAAC,CAAC;IAC9C,MAAM,QAAQ,GAAG,WAAW,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,GAAG,CAAC,MAAM,qCAAmB,CAAC,CAAC;IACvF,MAAM,WAAW,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC;IAElC,MAAM,SAAS,GAAG,OAAO,KAAK,KAAK,CAAC,QAAQ,CAAC,CAAC,CAAC,iBAAiB,CAAC,WAAW,EAAE,UAAU,CAAC,CAAC,CAAC;QACzF,OAAO,KAAK,KAAK,CAAC,UAAU,CAAC,CAAC,CAAC,MAAM,mBAAmB,CAAC,WAAW,EAAE,UAAU,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;IAC3F,IAAI,SAAS,KAAK,IAAI;QACpB,MAAM,IAAI,KAAK,CAAC,mDAAmD,OAAO,EAAE,CAAC,CAAC;IAChF,SAAS,CAAC,IAAI,GAAG,KAAK,CAAC,OAAO,CAAC,aAAa,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;IAC7D,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;IAC7B,OAAO,SAAS,CAAC;AACnB,CAAC;AAED;;;iDAGiD;AACjD,MAAM,UAAU,iBAAiB,CAAC,SAAuB,EAAE,WAAyB;IAClF,MAAM,YAAY,GAAG,WAAW,CAAC,OAAO,CAAC,MAAM,CAAC;IAChD,MAAM,YAAY,GAAkB,IAAI,KAAK,CAAC,YAAY,CAAC,CAAC;IAC5D,MAAM,uBAAuB,GAAa,IAAI,KAAK,CAAC,YAAY,CAAC,CAAC;IAClE,MAAM,uBAAuB,GAAa,IAAI,KAAK,CAAC,YAAY,CAAC,CAAC;IAElE,KAAK,IAAI,MAAM,GAAG,CAAC,EAAE,MAAM,GAAG,YAAY,EAAE,EAAE,MAAM,EAAE;QACpD,MAAM,MAAM,GAAG,WAAW,CAAC,OAAO,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC;QACnD,YAAY,CAAC,MAAM,CAAC,GAAG,MAAM,CAAC,UAAU,EAAiB,CAAC;QAC1D,uBAAuB,CAAC,MAAM,CAAC,GAAG,MAAM,CAAC,UAAU,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC;QAChE,uBAAuB,CAAC,MAAM,CAAC,GAAG,MAAM,CAAC,UAAU,CAAC,OAAO,CAAC,SAAS,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC,CAAC;KACtF;IAED,MAAM,iBAAiB,GAAG,EAAE,CAAC,MAAM,CAAC,KAAK,CAAC,UAAU,EAAE,WAAW,CAAC,QAAQ,CAAC,CAAC;IAC5E,MAAM,kBAAkB,GAAG,iBAAiB,CAAC,UAAU,EAAE,CAAC;IAC1D,KAAK,IAAI,QAAQ,GAAG,CAAC,EAAE,QAAQ,GAAG,WAAW,CAAC,QAAQ,EAAE,EAAE,QAAQ,EAAE;QAClE,kBAAkB,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC;QACjC,KAAK,IAAI,MAAM,GAAG,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC,MAAM,EAAE,EAAE,MAAM,EAAE;YACxD,MAAM,aAAa,GAAG,YAAY,CAAC,MAAM,CAAC,CAAC,QAAQ,CAAC,CAAC;YACrD,IAAI,aAAa,KAAK,uBAAuB,CAAC,MAAM,CAAC;gBACnD,EAAE,kBAAkB,CAAC,QAAQ,CAAC,CAAC;SAClC;QACD,kBAAkB,CAAC,QAAQ,CAAC,IAAI,SAAS,CAAC,MAAM,CAAC;KAClD;IAED,OAAO,iBAAiB,CAAC;AAC3B,CAAC;AAED;;;iDAGiD;AACjD,MAAM,CAAC,KAAK,UAAU,mBAAmB,CAAC,SAAuB,EAAE,WAAyB;IAC1F,MAAM,cAAc,GAAG,WAAW,CAAC,OAAO,CAAC,MAAM,EAAyB,CAAC;IAC3E,MAAM,SAAS,GAAG,MAAM,sBAAsB,CAAC,cAAc,EAAE,SAAS,CAAC,CAAC;IAC1E,OAAO,SAAS,CAAC;AACnB,CAAC"}
|