@datagrok-libraries/bio 2.8.5 → 2.8.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +7 -7
- package/src/utils/fasta-handler.d.ts +28 -0
- package/src/utils/fasta-handler.d.ts.map +1 -0
- package/src/utils/fasta-handler.js +59 -0
- package/src/utils/notation-converter.d.ts +2 -36
- package/src/utils/notation-converter.d.ts.map +1 -1
- package/src/utils/notation-converter.js +10 -77
- package/src/utils/units-handler.d.ts +74 -0
- package/src/utils/units-handler.d.ts.map +1 -0
- package/src/utils/units-handler.js +141 -0
package/package.json
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
},
|
|
6
6
|
"beta": true,
|
|
7
7
|
"friendlyName": "Datagrok bio library",
|
|
8
|
-
"version": "2.8.
|
|
8
|
+
"version": "2.8.8",
|
|
9
9
|
"description": "",
|
|
10
10
|
"dependencies": {
|
|
11
11
|
"@datagrok-libraries/utils": "^1.0.0",
|
|
@@ -29,14 +29,14 @@
|
|
|
29
29
|
"link-api": "npm link datagrok-api",
|
|
30
30
|
"link-utils": "npm link @datagrok-libraries/utils",
|
|
31
31
|
"link-all": "npm link datagrok-api @datagrok-libraries/utils",
|
|
32
|
-
"debug-bio": "grok publish
|
|
33
|
-
"release-bio": "grok publish --
|
|
32
|
+
"debug-bio": "grok publish",
|
|
33
|
+
"release-bio": "grok publish --release",
|
|
34
34
|
"build-bio": "tsc",
|
|
35
35
|
"build": "tsc",
|
|
36
|
-
"debug-bio-public": "grok publish public
|
|
37
|
-
"release-bio-public": "grok publish public --
|
|
38
|
-
"debug-bio-local": "grok publish local
|
|
39
|
-
"release-bio-local": "grok publish local --
|
|
36
|
+
"debug-bio-public": "grok publish public",
|
|
37
|
+
"release-bio-public": "grok publish public --release",
|
|
38
|
+
"debug-bio-local": "grok publish local",
|
|
39
|
+
"release-bio-local": "grok publish local --release",
|
|
40
40
|
"lint": "eslint \"./src/**/*.ts\"",
|
|
41
41
|
"lint-fix": "eslint \"./src/**/*.ts\" --fix"
|
|
42
42
|
}
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
import * as DG from 'datagrok-api/dg';
|
|
2
|
+
/** Class for parsing FASTA files */
|
|
3
|
+
export declare class FastaFileHandler {
|
|
4
|
+
private _fileContent;
|
|
5
|
+
private _descriptionsArray;
|
|
6
|
+
private _sequencesArray;
|
|
7
|
+
get descriptionsArray(): string[];
|
|
8
|
+
get sequencesArray(): string[];
|
|
9
|
+
/**
|
|
10
|
+
* Helper method to parse a macromolecule from a FASTA file (string)
|
|
11
|
+
*
|
|
12
|
+
* @param {number} startOfSequence index of macromolecule substring beginning
|
|
13
|
+
* @param {number} endOfSequence index of macromolecule substring end
|
|
14
|
+
|
|
15
|
+
* @return {string} parsed macromolecule
|
|
16
|
+
*/
|
|
17
|
+
private parseMacromolecule;
|
|
18
|
+
/** Parse descriptions and sequences from a FASTA string */
|
|
19
|
+
private parseColumns;
|
|
20
|
+
/**
|
|
21
|
+
* File-handler method for import as FASTA
|
|
22
|
+
*
|
|
23
|
+
* @return {DG.DataFrame[]} dataframe with parsed FASTA content
|
|
24
|
+
*/
|
|
25
|
+
importFasta(): DG.DataFrame[];
|
|
26
|
+
constructor(fileContent: string);
|
|
27
|
+
}
|
|
28
|
+
//# sourceMappingURL=fasta-handler.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"fasta-handler.d.ts","sourceRoot":"","sources":["fasta-handler.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,MAAM,iBAAiB,CAAC;AAItC,oCAAoC;AACpC,qBAAa,gBAAgB;IAC3B,OAAO,CAAC,YAAY,CAAS;IAC7B,OAAO,CAAC,kBAAkB,CAAgB;IAC1C,OAAO,CAAC,eAAe,CAAgB;IAGvC,IAAW,iBAAiB,IAAI,MAAM,EAAE,CAAoC;IAE5E,IAAW,cAAc,IAAI,MAAM,EAAE,CAAiC;IAEtE;;;;;;;OAOG;IACH,OAAO,CAAC,kBAAkB;IAS1B,2DAA2D;IAC3D,OAAO,CAAC,YAAY;IAiBpB;;;;OAIG;IACI,WAAW,IAAI,EAAE,CAAC,SAAS,EAAG;gBAczB,WAAW,EAAE,MAAM;CAIhC"}
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
import * as DG from 'datagrok-api/dg';
|
|
2
|
+
import { UnitsHandler } from './units-handler';
|
|
3
|
+
/** Class for parsing FASTA files */
|
|
4
|
+
export class FastaFileHandler {
|
|
5
|
+
constructor(fileContent) {
|
|
6
|
+
this._descriptionsArray = []; // parsed FASTA descriptions
|
|
7
|
+
this._sequencesArray = []; // parsed FASTA sequeces
|
|
8
|
+
this._fileContent = fileContent;
|
|
9
|
+
this.parseColumns();
|
|
10
|
+
}
|
|
11
|
+
// private _columnsParsed: boolean = false;
|
|
12
|
+
get descriptionsArray() { return this._descriptionsArray; }
|
|
13
|
+
get sequencesArray() { return this._sequencesArray; }
|
|
14
|
+
/**
|
|
15
|
+
* Helper method to parse a macromolecule from a FASTA file (string)
|
|
16
|
+
*
|
|
17
|
+
* @param {number} startOfSequence index of macromolecule substring beginning
|
|
18
|
+
* @param {number} endOfSequence index of macromolecule substring end
|
|
19
|
+
|
|
20
|
+
* @return {string} parsed macromolecule
|
|
21
|
+
*/
|
|
22
|
+
parseMacromolecule(startOfSequence, endOfSequence) {
|
|
23
|
+
const seq = this._fileContent.slice(startOfSequence, endOfSequence);
|
|
24
|
+
const seqArray = seq.split(/\s/);
|
|
25
|
+
return seqArray.join('');
|
|
26
|
+
}
|
|
27
|
+
/** Parse descriptions and sequences from a FASTA string */
|
|
28
|
+
parseColumns() {
|
|
29
|
+
const regex = /^>(.*)$/gm; // match 'description' lines starting with >
|
|
30
|
+
let startOfSequence = 0;
|
|
31
|
+
let match; // match.index is the beginning of the matched line
|
|
32
|
+
while (match = regex.exec(this._fileContent)) {
|
|
33
|
+
const description = this._fileContent.substring(match.index + 1, regex.lastIndex);
|
|
34
|
+
this._descriptionsArray.push(description);
|
|
35
|
+
if (startOfSequence !== 0)
|
|
36
|
+
this._sequencesArray.push(this.parseMacromolecule(startOfSequence, match.index));
|
|
37
|
+
startOfSequence = regex.lastIndex + 1;
|
|
38
|
+
}
|
|
39
|
+
this._sequencesArray.push(this.parseMacromolecule(startOfSequence, -1));
|
|
40
|
+
// this._columnsParsed = true;
|
|
41
|
+
}
|
|
42
|
+
/**
|
|
43
|
+
* File-handler method for import as FASTA
|
|
44
|
+
*
|
|
45
|
+
* @return {DG.DataFrame[]} dataframe with parsed FASTA content
|
|
46
|
+
*/
|
|
47
|
+
importFasta() {
|
|
48
|
+
const descriptionsArrayCol = DG.Column.fromStrings('description', this.descriptionsArray);
|
|
49
|
+
const sequenceCol = DG.Column.fromStrings('sequence', this.sequencesArray);
|
|
50
|
+
sequenceCol.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
51
|
+
// here should go the code from units handler
|
|
52
|
+
UnitsHandler.setUnitsToFastaColumn(sequenceCol);
|
|
53
|
+
return [DG.DataFrame.fromColumns([
|
|
54
|
+
descriptionsArrayCol,
|
|
55
|
+
sequenceCol,
|
|
56
|
+
])];
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
//# sourceMappingURL=data:application/json;base64,eyJ2ZXJzaW9uIjozLCJmaWxlIjoiZmFzdGEtaGFuZGxlci5qcyIsInNvdXJjZVJvb3QiOiIiLCJzb3VyY2VzIjpbImZhc3RhLWhhbmRsZXIudHMiXSwibmFtZXMiOltdLCJtYXBwaW5ncyI6IkFBR0EsT0FBTyxLQUFLLEVBQUUsTUFBTSxpQkFBaUIsQ0FBQztBQUV0QyxPQUFPLEVBQUMsWUFBWSxFQUFDLE1BQU0saUJBQWlCLENBQUM7QUFFN0Msb0NBQW9DO0FBQ3BDLE1BQU0sT0FBTyxnQkFBZ0I7SUFnRTNCLFlBQVksV0FBbUI7UUE5RHZCLHVCQUFrQixHQUFhLEVBQUUsQ0FBQyxDQUFDLDRCQUE0QjtRQUMvRCxvQkFBZSxHQUFhLEVBQUUsQ0FBQyxDQUFDLHdCQUF3QjtRQThEOUQsSUFBSSxDQUFDLFlBQVksR0FBRyxXQUFXLENBQUM7UUFDaEMsSUFBSSxDQUFDLFlBQVksRUFBRSxDQUFDO0lBQ3RCLENBQUM7SUEvREQsMkNBQTJDO0lBRTNDLElBQVcsaUJBQWlCLEtBQWUsT0FBTyxJQUFJLENBQUMsa0JBQWtCLENBQUMsQ0FBQyxDQUFDO0lBRTVFLElBQVcsY0FBYyxLQUFlLE9BQU8sSUFBSSxDQUFDLGVBQWUsQ0FBQyxDQUFDLENBQUM7SUFFdEU7Ozs7Ozs7T0FPRztJQUNLLGtCQUFrQixDQUN4QixlQUF1QixFQUN2QixhQUFxQjtRQUVyQixNQUFNLEdBQUcsR0FBRyxJQUFJLENBQUMsWUFBWSxDQUFDLEtBQUssQ0FBQyxlQUFlLEVBQUUsYUFBYSxDQUFDLENBQUM7UUFDcEUsTUFBTSxRQUFRLEdBQUcsR0FBRyxDQUFDLEtBQUssQ0FBQyxJQUFJLENBQUMsQ0FBQztRQUNqQyxPQUFPLFFBQVEsQ0FBQyxJQUFJLENBQUMsRUFBRSxDQUFDLENBQUM7SUFDM0IsQ0FBQztJQUVELDJEQUEyRDtJQUNuRCxZQUFZO1FBQ2xCLE1BQU0sS0FBSyxHQUFHLFdBQVcsQ0FBQyxDQUFDLDRDQUE0QztRQUV2RSxJQUFJLGVBQWUsR0FBRyxDQUFDLENBQUM7UUFDeEIsSUFBSSxLQUFLLENBQUMsQ0FBQyxtREFBbUQ7UUFDOUQsT0FBTyxLQUFLLEdBQUcsS0FBSyxDQUFDLElBQUksQ0FBQyxJQUFJLENBQUMsWUFBWSxDQUFDLEVBQUU7WUFDNUMsTUFBTSxXQUFXLEdBQUcsSUFBSSxDQUFDLFlBQVksQ0FBQyxTQUFTLENBQUMsS0FBSyxDQUFDLEtBQUssR0FBRyxDQUFDLEVBQUUsS0FBSyxDQUFDLFNBQVMsQ0FBQyxDQUFDO1lBQ2xGLElBQUksQ0FBQyxrQkFBa0IsQ0FBQyxJQUFJLENBQUMsV0FBVyxDQUFDLENBQUM7WUFDMUMsSUFBSSxlQUFlLEtBQUssQ0FBQztnQkFDdkIsSUFBSSxDQUFDLGVBQWUsQ0FBQyxJQUFJLENBQUMsSUFBSSxDQUFDLGtCQUFrQixDQUFDLGVBQWUsRUFBRSxLQUFLLENBQUMsS0FBSyxDQUFDLENBQUMsQ0FBQztZQUNuRixlQUFlLEdBQUcsS0FBSyxDQUFDLFNBQVMsR0FBRyxDQUFDLENBQUM7U0FDdkM7UUFDRCxJQUFJLENBQUMsZUFBZSxDQUFDLElBQUksQ0FBQyxJQUFJLENBQUMsa0JBQWtCLENBQUMsZUFBZSxFQUFFLENBQUMsQ0FBQyxDQUFDLENBQUMsQ0FBQztRQUV4RSw4QkFBOEI7SUFDaEMsQ0FBQztJQUVEOzs7O09BSUc7SUFDSSxXQUFXO1FBQ2hCLE1BQU0sb0JBQW9CLEdBQUcsRUFBRSxDQUFDLE1BQU0sQ0FBQyxXQUFXLENBQUMsYUFBYSxFQUFFLElBQUksQ0FBQyxpQkFBaUIsQ0FBQyxDQUFDO1FBQzFGLE1BQU0sV0FBVyxHQUFHLEVBQUUsQ0FBQyxNQUFNLENBQUMsV0FBVyxDQUFDLFVBQVUsRUFBRSxJQUFJLENBQUMsY0FBYyxDQUFDLENBQUM7UUFDM0UsV0FBVyxDQUFDLE9BQU8sR0FBRyxFQUFFLENBQUMsT0FBTyxDQUFDLGFBQWEsQ0FBQztRQUUvQyw2Q0FBNkM7UUFDN0MsWUFBWSxDQUFDLHFCQUFxQixDQUFDLFdBQVcsQ0FBQyxDQUFDO1FBRWhELE9BQU8sQ0FBQyxFQUFFLENBQUMsU0FBUyxDQUFDLFdBQVcsQ0FBQztnQkFDL0Isb0JBQW9CO2dCQUNwQixXQUFXO2FBQ1osQ0FBQyxDQUFDLENBQUM7SUFDTixDQUFDO0NBTUYiLCJzb3VyY2VzQ29udGVudCI6WyIvKiBEbyBub3QgY2hhbmdlIHRoZXNlIGltcG9ydCBsaW5lcyB0byBtYXRjaCBleHRlcm5hbCBtb2R1bGVzIGluIHdlYnBhY2sgY29uZmlndXJhdGlvbiAqL1xuaW1wb3J0ICogYXMgZ3JvayBmcm9tICdkYXRhZ3Jvay1hcGkvZ3Jvayc7XG5pbXBvcnQgKiBhcyB1aSBmcm9tICdkYXRhZ3Jvay1hcGkvdWknO1xuaW1wb3J0ICogYXMgREcgZnJvbSAnZGF0YWdyb2stYXBpL2RnJztcblxuaW1wb3J0IHtVbml0c0hhbmRsZXJ9IGZyb20gJy4vdW5pdHMtaGFuZGxlcic7XG5cbi8qKiBDbGFzcyBmb3IgcGFyc2luZyBGQVNUQSBmaWxlcyAqL1xuZXhwb3J0IGNsYXNzIEZhc3RhRmlsZUhhbmRsZXIge1xuICBwcml2YXRlIF9maWxlQ29udGVudDogc3RyaW5nO1xuICBwcml2YXRlIF9kZXNjcmlwdGlvbnNBcnJheTogc3RyaW5nW10gPSBbXTsgLy8gcGFyc2VkIEZBU1RBIGRlc2NyaXB0aW9uc1xuICBwcml2YXRlIF9zZXF1ZW5jZXNBcnJheTogc3RyaW5nW10gPSBbXTsgLy8gcGFyc2VkIEZBU1RBIHNlcXVlY2VzXG4gIC8vIHByaXZhdGUgX2NvbHVtbnNQYXJzZWQ6IGJvb2xlYW4gPSBmYWxzZTtcblxuICBwdWJsaWMgZ2V0IGRlc2NyaXB0aW9uc0FycmF5KCk6IHN0cmluZ1tdIHsgcmV0dXJuIHRoaXMuX2Rlc2NyaXB0aW9uc0FycmF5OyB9XG5cbiAgcHVibGljIGdldCBzZXF1ZW5jZXNBcnJheSgpOiBzdHJpbmdbXSB7IHJldHVybiB0aGlzLl9zZXF1ZW5jZXNBcnJheTsgfVxuXG4gIC8qKlxuICAgKiBIZWxwZXIgbWV0aG9kIHRvIHBhcnNlIGEgbWFjcm9tb2xlY3VsZSBmcm9tIGEgRkFTVEEgZmlsZSAoc3RyaW5nKVxuICAgKlxuICAgKiBAcGFyYW0ge251bWJlcn0gc3RhcnRPZlNlcXVlbmNlICBpbmRleCBvZiBtYWNyb21vbGVjdWxlIHN1YnN0cmluZyBiZWdpbm5pbmdcbiAgICogQHBhcmFtIHtudW1iZXJ9IGVuZE9mU2VxdWVuY2UgIGluZGV4IG9mIG1hY3JvbW9sZWN1bGUgc3Vic3RyaW5nIGVuZFxuXG4gICAqIEByZXR1cm4ge3N0cmluZ30gcGFyc2VkIG1hY3JvbW9sZWN1bGVcbiAgICovXG4gIHByaXZhdGUgcGFyc2VNYWNyb21vbGVjdWxlKFxuICAgIHN0YXJ0T2ZTZXF1ZW5jZTogbnVtYmVyLFxuICAgIGVuZE9mU2VxdWVuY2U6IG51bWJlclxuICApOiBzdHJpbmcge1xuICAgIGNvbnN0IHNlcSA9IHRoaXMuX2ZpbGVDb250ZW50LnNsaWNlKHN0YXJ0T2ZTZXF1ZW5jZSwgZW5kT2ZTZXF1ZW5jZSk7XG4gICAgY29uc3Qgc2VxQXJyYXkgPSBzZXEuc3BsaXQoL1xccy8pO1xuICAgIHJldHVybiBzZXFBcnJheS5qb2luKCcnKTtcbiAgfVxuXG4gIC8qKiBQYXJzZSBkZXNjcmlwdGlvbnMgYW5kIHNlcXVlbmNlcyBmcm9tIGEgRkFTVEEgc3RyaW5nICovXG4gIHByaXZhdGUgcGFyc2VDb2x1bW5zKCkge1xuICAgIGNvbnN0IHJlZ2V4ID0gL14+KC4qKSQvZ207IC8vIG1hdGNoICdkZXNjcmlwdGlvbicgbGluZXMgc3RhcnRpbmcgd2l0aCA+XG5cbiAgICBsZXQgc3RhcnRPZlNlcXVlbmNlID0gMDtcbiAgICBsZXQgbWF0Y2g7IC8vIG1hdGNoLmluZGV4IGlzIHRoZSBiZWdpbm5pbmcgb2YgdGhlIG1hdGNoZWQgbGluZVxuICAgIHdoaWxlIChtYXRjaCA9IHJlZ2V4LmV4ZWModGhpcy5fZmlsZUNvbnRlbnQpKSB7XG4gICAgICBjb25zdCBkZXNjcmlwdGlvbiA9IHRoaXMuX2ZpbGVDb250ZW50LnN1YnN0cmluZyhtYXRjaC5pbmRleCArIDEsIHJlZ2V4Lmxhc3RJbmRleCk7XG4gICAgICB0aGlzLl9kZXNjcmlwdGlvbnNBcnJheS5wdXNoKGRlc2NyaXB0aW9uKTtcbiAgICAgIGlmIChzdGFydE9mU2VxdWVuY2UgIT09IDApXG4gICAgICAgIHRoaXMuX3NlcXVlbmNlc0FycmF5LnB1c2godGhpcy5wYXJzZU1hY3JvbW9sZWN1bGUoc3RhcnRPZlNlcXVlbmNlLCBtYXRjaC5pbmRleCkpO1xuICAgICAgc3RhcnRPZlNlcXVlbmNlID0gcmVnZXgubGFzdEluZGV4ICsgMTtcbiAgICB9XG4gICAgdGhpcy5fc2VxdWVuY2VzQXJyYXkucHVzaCh0aGlzLnBhcnNlTWFjcm9tb2xlY3VsZShzdGFydE9mU2VxdWVuY2UsIC0xKSk7XG5cbiAgICAvLyB0aGlzLl9jb2x1bW5zUGFyc2VkID0gdHJ1ZTtcbiAgfVxuXG4gIC8qKlxuICAgKiBGaWxlLWhhbmRsZXIgbWV0aG9kIGZvciBpbXBvcnQgYXMgRkFTVEFcbiAgICpcbiAgICogQHJldHVybiB7REcuRGF0YUZyYW1lW119IGRhdGFmcmFtZSB3aXRoIHBhcnNlZCBGQVNUQSBjb250ZW50XG4gICAqL1xuICBwdWJsaWMgaW1wb3J0RmFzdGEoKTogREcuRGF0YUZyYW1lIFtdIHtcbiAgICBjb25zdCBkZXNjcmlwdGlvbnNBcnJheUNvbCA9IERHLkNvbHVtbi5mcm9tU3RyaW5ncygnZGVzY3JpcHRpb24nLCB0aGlzLmRlc2NyaXB0aW9uc0FycmF5KTtcbiAgICBjb25zdCBzZXF1ZW5jZUNvbCA9IERHLkNvbHVtbi5mcm9tU3RyaW5ncygnc2VxdWVuY2UnLCB0aGlzLnNlcXVlbmNlc0FycmF5KTtcbiAgICBzZXF1ZW5jZUNvbC5zZW1UeXBlID0gREcuU0VNVFlQRS5NQUNST01PTEVDVUxFO1xuXG4gICAgLy8gaGVyZSBzaG91bGQgZ28gdGhlIGNvZGUgZnJvbSB1bml0cyBoYW5kbGVyXG4gICAgVW5pdHNIYW5kbGVyLnNldFVuaXRzVG9GYXN0YUNvbHVtbihzZXF1ZW5jZUNvbCk7XG5cbiAgICByZXR1cm4gW0RHLkRhdGFGcmFtZS5mcm9tQ29sdW1ucyhbXG4gICAgICBkZXNjcmlwdGlvbnNBcnJheUNvbCxcbiAgICAgIHNlcXVlbmNlQ29sLFxuICAgIF0pXTtcbiAgfVxuXG4gIGNvbnN0cnVjdG9yKGZpbGVDb250ZW50OiBzdHJpbmcpIHtcbiAgICB0aGlzLl9maWxlQ29udGVudCA9IGZpbGVDb250ZW50O1xuICAgIHRoaXMucGFyc2VDb2x1bW5zKCk7XG4gIH1cbn1cbiJdfQ==
|
|
@@ -1,47 +1,13 @@
|
|
|
1
1
|
import * as DG from 'datagrok-api/dg';
|
|
2
2
|
import { SplitterFunc } from '../viewers/web-logo';
|
|
3
|
-
|
|
4
|
-
export declare const enum NOTATION {
|
|
5
|
-
FASTA = "FASTA",
|
|
6
|
-
SEPARATOR = "SEPARATOR",
|
|
7
|
-
HELM = "HELM"
|
|
8
|
-
}
|
|
3
|
+
import { UnitsHandler, NOTATION } from './units-handler';
|
|
9
4
|
/** Class for handling conversion of notation systems in Macromolecule columns */
|
|
10
|
-
export declare class NotationConverter {
|
|
11
|
-
private readonly _sourceColumn;
|
|
12
|
-
private _sourceUnits;
|
|
13
|
-
private _sourceNotation;
|
|
14
|
-
private _defaultGapSymbol;
|
|
15
|
-
private _defaultGapSymbolsDict;
|
|
5
|
+
export declare class NotationConverter extends UnitsHandler {
|
|
16
6
|
private _splitter;
|
|
17
7
|
protected get splitter(): SplitterFunc;
|
|
18
|
-
private get sourceUnits();
|
|
19
|
-
private get sourceColumn();
|
|
20
|
-
get sourceNotation(): NOTATION;
|
|
21
|
-
get defaultGapSymbol(): string;
|
|
22
|
-
get separator(): string;
|
|
23
|
-
isFasta(): boolean;
|
|
24
|
-
isSeparator(): boolean;
|
|
25
|
-
isHelm(): boolean;
|
|
26
8
|
toFasta(targetNotation: NOTATION): boolean;
|
|
27
9
|
toSeparator(targetNotation: NOTATION): boolean;
|
|
28
10
|
toHelm(targetNotation: NOTATION): boolean;
|
|
29
|
-
isRna(): boolean;
|
|
30
|
-
isDna(): boolean;
|
|
31
|
-
isPeptide(): boolean;
|
|
32
|
-
/** Associate notation types with the corresponding units */
|
|
33
|
-
/**
|
|
34
|
-
* @return {NOTATION} Notation associated with the units type
|
|
35
|
-
*/
|
|
36
|
-
private getSourceNotation;
|
|
37
|
-
/**
|
|
38
|
-
* Create a new empty column of the specified notation type and the same
|
|
39
|
-
* length as sourceColumn
|
|
40
|
-
*
|
|
41
|
-
* @param {NOTATION} targetNotation
|
|
42
|
-
* @return {DG.Column}
|
|
43
|
-
*/
|
|
44
|
-
private getNewColumn;
|
|
45
11
|
/**
|
|
46
12
|
* Convert a Macromolecule column from FASTA to SEPARATOR notation
|
|
47
13
|
*
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"notation-converter.d.ts","sourceRoot":"","sources":["notation-converter.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"notation-converter.d.ts","sourceRoot":"","sources":["notation-converter.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,MAAM,iBAAiB,CAAC;AACtC,OAAO,EAAC,YAAY,EAAU,MAAM,qBAAqB,CAAC;AAC1D,OAAO,EAAC,YAAY,EAAE,QAAQ,EAAC,MAAM,iBAAiB,CAAC;AAEvD,iFAAiF;AACjF,qBAAa,iBAAkB,SAAQ,YAAY;IACjD,OAAO,CAAC,SAAS,CAA6B;IAC9C,SAAS,KAAK,QAAQ,IAAI,YAAY,CAIrC;IAEM,OAAO,CAAC,cAAc,EAAE,QAAQ,GAAG,OAAO;IAE1C,WAAW,CAAC,cAAc,EAAE,QAAQ,GAAG,OAAO;IAE9C,MAAM,CAAC,cAAc,EAAE,QAAQ,GAAG,OAAO;IAEhD;;;;;;OAMG;IACH,OAAO,CAAC,uBAAuB;IAmB/B;;;;;OAKG;IACH,OAAO,CAAC,eAAe;IAiBvB,OAAO,CAAC,mBAAmB;IAkB3B;;;;;;;OAOG;IACI,mBAAmB,CACxB,aAAa,EAAE,MAAM,EACrB,eAAe,GAAE,MAAM,GAAG,IAAW,GACnC,MAAM;IAOV;;;;;OAKG;IACH,OAAO,CAAC,aAAa;IAerB;;;;;OAKG;IACH,OAAO,CAAC,uBAAuB;IA4B/B;;;;;;;;OAQG;IACH,OAAO,CAAC,WAAW;IAiDnB,OAAO,CAAC,sBAAsB;IAK9B;;;;;OAKG;IACI,OAAO,CAAC,WAAW,EAAE,QAAQ,EAAE,YAAY,GAAE,MAAM,GAAG,IAAW,GAAG,EAAE,CAAC,MAAM;gBAmBjE,GAAG,EAAE,EAAE,CAAC,MAAM;CAGlC"}
|
|
@@ -1,87 +1,20 @@
|
|
|
1
|
-
import * as DG from 'datagrok-api/dg';
|
|
2
1
|
import { WebLogo } from '../viewers/web-logo';
|
|
2
|
+
import { UnitsHandler } from './units-handler';
|
|
3
3
|
/** Class for handling conversion of notation systems in Macromolecule columns */
|
|
4
|
-
export class NotationConverter {
|
|
4
|
+
export class NotationConverter extends UnitsHandler {
|
|
5
5
|
constructor(col) {
|
|
6
|
-
|
|
7
|
-
HELM: '*',
|
|
8
|
-
SEPARATOR: '',
|
|
9
|
-
FASTA: '-',
|
|
10
|
-
};
|
|
6
|
+
super(col);
|
|
11
7
|
this._splitter = null;
|
|
12
|
-
this._sourceColumn = col;
|
|
13
|
-
const units = this._sourceColumn.tags[DG.TAGS.UNITS];
|
|
14
|
-
if (units !== null)
|
|
15
|
-
this._sourceUnits = units;
|
|
16
|
-
else
|
|
17
|
-
throw new Error('Units are not specified in column');
|
|
18
|
-
this._sourceNotation = this.getSourceNotation();
|
|
19
|
-
this._defaultGapSymbol = (this.isFasta()) ? this._defaultGapSymbolsDict.FASTA :
|
|
20
|
-
(this.isHelm()) ? this._defaultGapSymbolsDict.HELM :
|
|
21
|
-
this._defaultGapSymbolsDict.SEPARATOR;
|
|
22
8
|
}
|
|
23
9
|
get splitter() {
|
|
24
10
|
if (this._splitter === null)
|
|
25
|
-
this._splitter = WebLogo.getSplitterForColumn(this.
|
|
11
|
+
this._splitter = WebLogo.getSplitterForColumn(this.column);
|
|
26
12
|
return this._splitter;
|
|
27
13
|
}
|
|
28
14
|
;
|
|
29
|
-
get sourceUnits() { return this._sourceUnits; }
|
|
30
|
-
get sourceColumn() { return this._sourceColumn; }
|
|
31
|
-
get sourceNotation() { return this._sourceNotation; }
|
|
32
|
-
get defaultGapSymbol() { return this._defaultGapSymbol; }
|
|
33
|
-
get separator() {
|
|
34
|
-
const separator = this.sourceColumn.getTag('separator');
|
|
35
|
-
if (separator !== null)
|
|
36
|
-
return separator;
|
|
37
|
-
else
|
|
38
|
-
throw new Error('Separator not set');
|
|
39
|
-
}
|
|
40
|
-
isFasta() { return this.sourceNotation === "FASTA" /* NOTATION.FASTA */; }
|
|
41
|
-
isSeparator() { return this.sourceNotation === "SEPARATOR" /* NOTATION.SEPARATOR */; }
|
|
42
|
-
isHelm() { return this.sourceNotation === "HELM" /* NOTATION.HELM */; }
|
|
43
15
|
toFasta(targetNotation) { return targetNotation === "FASTA" /* NOTATION.FASTA */; }
|
|
44
16
|
toSeparator(targetNotation) { return targetNotation === "SEPARATOR" /* NOTATION.SEPARATOR */; }
|
|
45
17
|
toHelm(targetNotation) { return targetNotation === "HELM" /* NOTATION.HELM */; }
|
|
46
|
-
isRna() { return this.sourceUnits.toLowerCase().endsWith('rna'); }
|
|
47
|
-
isDna() { return this.sourceUnits.toLowerCase().endsWith('dna'); }
|
|
48
|
-
isPeptide() { return this.sourceUnits.toLowerCase().endsWith('pt'); }
|
|
49
|
-
/** Associate notation types with the corresponding units */
|
|
50
|
-
/**
|
|
51
|
-
* @return {NOTATION} Notation associated with the units type
|
|
52
|
-
*/
|
|
53
|
-
getSourceNotation() {
|
|
54
|
-
if (this.sourceUnits.toLowerCase().startsWith('fasta'))
|
|
55
|
-
return "FASTA" /* NOTATION.FASTA */;
|
|
56
|
-
else if (this.sourceUnits.toLowerCase().startsWith('separator'))
|
|
57
|
-
return "SEPARATOR" /* NOTATION.SEPARATOR */;
|
|
58
|
-
else if (this.sourceUnits.toLowerCase().startsWith('helm'))
|
|
59
|
-
return "HELM" /* NOTATION.HELM */;
|
|
60
|
-
else
|
|
61
|
-
throw new Error('The column has units that do not correspond to any notation');
|
|
62
|
-
}
|
|
63
|
-
/**
|
|
64
|
-
* Create a new empty column of the specified notation type and the same
|
|
65
|
-
* length as sourceColumn
|
|
66
|
-
*
|
|
67
|
-
* @param {NOTATION} targetNotation
|
|
68
|
-
* @return {DG.Column}
|
|
69
|
-
*/
|
|
70
|
-
getNewColumn(targetNotation) {
|
|
71
|
-
const col = this.sourceColumn;
|
|
72
|
-
const len = col.length;
|
|
73
|
-
const name = targetNotation.toLowerCase() + '(' + col.name + ')';
|
|
74
|
-
const newColName = col.dataFrame.columns.getUnusedName(name);
|
|
75
|
-
// dummy code
|
|
76
|
-
const newColumn = DG.Column.fromList('string', newColName, new Array(len).fill(''));
|
|
77
|
-
newColumn.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
78
|
-
newColumn.setTag(DG.TAGS.UNITS, this.sourceUnits.replace(this.sourceNotation.toLowerCase().toString(), targetNotation.toLowerCase().toString()));
|
|
79
|
-
// TODO: specify cell renderers for all cases
|
|
80
|
-
if (this.toFasta(targetNotation)) {
|
|
81
|
-
newColumn.setTag(DG.TAGS.CELL_RENDERER, 'Macromolecule');
|
|
82
|
-
}
|
|
83
|
-
return newColumn;
|
|
84
|
-
}
|
|
85
18
|
/**
|
|
86
19
|
* Convert a Macromolecule column from FASTA to SEPARATOR notation
|
|
87
20
|
*
|
|
@@ -95,7 +28,7 @@ export class NotationConverter {
|
|
|
95
28
|
const newColumn = this.getNewColumn("SEPARATOR" /* NOTATION.SEPARATOR */);
|
|
96
29
|
// assign the values to the newly created empty column
|
|
97
30
|
newColumn.init((idx) => {
|
|
98
|
-
const fastaPolymer = this.
|
|
31
|
+
const fastaPolymer = this.column.get(idx);
|
|
99
32
|
const fastaMonomersArray = this.splitter(fastaPolymer);
|
|
100
33
|
for (let i = 0; i < fastaMonomersArray.length; i++) {
|
|
101
34
|
if (fastaMonomersArray[i] === fastaGapSymbol)
|
|
@@ -163,7 +96,7 @@ export class NotationConverter {
|
|
|
163
96
|
const newColumn = this.getNewColumn("HELM" /* NOTATION.HELM */);
|
|
164
97
|
// assign the values to the empty column
|
|
165
98
|
newColumn.init((idx) => {
|
|
166
|
-
const sourcePolymer = this.
|
|
99
|
+
const sourcePolymer = this.column.get(idx);
|
|
167
100
|
return this.convertToHelmHelper(sourcePolymer, sourceGapSymbol, prefix, leftWrapper, rightWrapper, postfix);
|
|
168
101
|
});
|
|
169
102
|
return newColumn;
|
|
@@ -180,7 +113,7 @@ export class NotationConverter {
|
|
|
180
113
|
const newColumn = this.getNewColumn("FASTA" /* NOTATION.FASTA */);
|
|
181
114
|
// assign the values to the empty column
|
|
182
115
|
newColumn.init((idx) => {
|
|
183
|
-
const separatorPolymer = this.
|
|
116
|
+
const separatorPolymer = this.column.get(idx);
|
|
184
117
|
// items can be monomers or separators
|
|
185
118
|
const separatorItemsArray = this.splitter(separatorPolymer);
|
|
186
119
|
const fastaMonomersArray = [];
|
|
@@ -225,7 +158,7 @@ export class NotationConverter {
|
|
|
225
158
|
const newColumn = this.getNewColumn(tgtNotation);
|
|
226
159
|
// assign the values to the empty column
|
|
227
160
|
newColumn.init((idx) => {
|
|
228
|
-
const helmPolymer = this.
|
|
161
|
+
const helmPolymer = this.column.get(idx);
|
|
229
162
|
// we cannot use isDna() or isRna() because source helm columns can
|
|
230
163
|
// contain DNA, RNA and PT in different cells, so the corresponding
|
|
231
164
|
// tags cannot be set for the whole column
|
|
@@ -265,7 +198,7 @@ export class NotationConverter {
|
|
|
265
198
|
*/
|
|
266
199
|
convert(tgtNotation, tgtSeparator = null) {
|
|
267
200
|
// possible exceptions
|
|
268
|
-
if (this.
|
|
201
|
+
if (this.notation === tgtNotation)
|
|
269
202
|
throw new Error('tgt notation is invalid');
|
|
270
203
|
if (this.toSeparator(tgtNotation) && tgtSeparator === null)
|
|
271
204
|
throw new Error('tgt separator is not specified');
|
|
@@ -281,4 +214,4 @@ export class NotationConverter {
|
|
|
281
214
|
return this.convertHelm(tgtNotation, tgtSeparator);
|
|
282
215
|
}
|
|
283
216
|
}
|
|
284
|
-
//# sourceMappingURL=data:application/json;base64,{"version":3,"file":"notation-converter.js","sourceRoot":"","sources":["notation-converter.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,MAAM,iBAAiB,CAAC;AACtC,OAAO,EAAe,OAAO,EAAC,MAAM,qBAAqB,CAAC;AAS1D,iFAAiF;AACjF,MAAM,OAAO,iBAAiB;IAuU5B,YAAmB,GAAc;QAlUzB,2BAAsB,GAAG;YAC/B,IAAI,EAAE,GAAG;YACT,SAAS,EAAE,EAAE;YACb,KAAK,EAAE,GAAG;SACX,CAAC;QAEM,cAAS,GAAwB,IAAI,CAAC;QA6T5C,IAAI,CAAC,aAAa,GAAG,GAAG,CAAC;QACzB,MAAM,KAAK,GAAG,IAAI,CAAC,aAAa,CAAC,IAAI,CAAC,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QACrD,IAAI,KAAK,KAAK,IAAI;YAChB,IAAI,CAAC,YAAY,GAAG,KAAK,CAAC;;YAE1B,MAAM,IAAI,KAAK,CAAC,mCAAmC,CAAC,CAAC;QACvD,IAAI,CAAC,eAAe,GAAG,IAAI,CAAC,iBAAiB,EAAE,CAAC;QAChD,IAAI,CAAC,iBAAiB,GAAG,CAAC,IAAI,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,sBAAsB,CAAC,KAAK,CAAC,CAAC;YAC7E,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,sBAAsB,CAAC,IAAI,CAAC,CAAC;gBAClD,IAAI,CAAC,sBAAsB,CAAC,SAAS,CAAC;IAC5C,CAAC;IAtUD,IAAc,QAAQ;QACpB,IAAI,IAAI,CAAC,SAAS,KAAK,IAAI;YACzB,IAAI,CAAC,SAAS,GAAG,OAAO,CAAC,oBAAoB,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;QACpE,OAAO,IAAI,CAAC,SAAS,CAAC;IACxB,CAAC;IAAA,CAAC;IAGF,IAAY,WAAW,KAAa,OAAO,IAAI,CAAC,YAAY,CAAC,CAAC,CAAC;IAE/D,IAAY,YAAY,KAAgB,OAAO,IAAI,CAAC,aAAa,CAAC,CAAC,CAAC;IAEpE,IAAW,cAAc,KAAe,OAAO,IAAI,CAAC,eAAe,CAAC,CAAC,CAAC;IAEtE,IAAW,gBAAgB,KAAa,OAAO,IAAI,CAAC,iBAAiB,CAAC,CAAC,CAAC;IAExE,IAAW,SAAS;QAClB,MAAM,SAAS,GAAG,IAAI,CAAC,YAAY,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC;QACxD,IAAI,SAAS,KAAK,IAAI;YACpB,OAAO,SAAS,CAAC;;YAEjB,MAAM,IAAI,KAAK,CAAC,mBAAmB,CAAC,CAAC;IACzC,CAAC;IAEM,OAAO,KAAc,OAAO,IAAI,CAAC,cAAc,iCAAmB,CAAC,CAAC,CAAC;IAErE,WAAW,KAAc,OAAO,IAAI,CAAC,cAAc,yCAAuB,CAAC,CAAC,CAAC;IAE7E,MAAM,KAAc,OAAO,IAAI,CAAC,cAAc,+BAAkB,CAAC,CAAC,CAAC;IAEnE,OAAO,CAAC,cAAwB,IAAa,OAAO,cAAc,iCAAmB,CAAC,CAAC,CAAC;IAExF,WAAW,CAAC,cAAwB,IAAa,OAAO,cAAc,yCAAuB,CAAC,CAAC,CAAC;IAEhG,MAAM,CAAC,cAAwB,IAAa,OAAO,cAAc,+BAAkB,CAAC,CAAC,CAAC;IAEtF,KAAK,KAAc,OAAO,IAAI,CAAC,WAAW,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;IAE3E,KAAK,KAAc,OAAO,IAAI,CAAC,WAAW,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;IAE3E,SAAS,KAAc,OAAO,IAAI,CAAC,WAAW,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAErF,4DAA4D;IAC5D;;OAEG;IACK,iBAAiB;QACvB,IAAI,IAAI,CAAC,WAAW,CAAC,WAAW,EAAE,CAAC,UAAU,CAAC,OAAO,CAAC;YACpD,oCAAsB;aACnB,IAAI,IAAI,CAAC,WAAW,CAAC,WAAW,EAAE,CAAC,UAAU,CAAC,WAAW,CAAC;YAC7D,4CAA0B;aACvB,IAAI,IAAI,CAAC,WAAW,CAAC,WAAW,EAAE,CAAC,UAAU,CAAC,MAAM,CAAC;YACxD,kCAAqB;;YAErB,MAAM,IAAI,KAAK,CAAC,6DAA6D,CAAC,CAAC;IACnF,CAAC;IAED;;;;;;OAMG;IACK,YAAY,CAAC,cAAwB;QAC3C,MAAM,GAAG,GAAG,IAAI,CAAC,YAAY,CAAC;QAC9B,MAAM,GAAG,GAAG,GAAG,CAAC,MAAM,CAAC;QACvB,MAAM,IAAI,GAAG,cAAc,CAAC,WAAW,EAAE,GAAG,GAAG,GAAG,GAAG,CAAC,IAAI,GAAG,GAAG,CAAC;QACjE,MAAM,UAAU,GAAG,GAAG,CAAC,SAAS,CAAC,OAAO,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC;QAC7D,aAAa;QACb,MAAM,SAAS,GAAG,EAAE,CAAC,MAAM,CAAC,QAAQ,CAAC,QAAQ,EAAE,UAAU,EAAE,IAAI,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC;QACpF,SAAS,CAAC,OAAO,GAAG,EAAE,CAAC,OAAO,CAAC,aAAa,CAAC;QAC7C,SAAS,CAAC,MAAM,CACd,EAAE,CAAC,IAAI,CAAC,KAAK,EACb,IAAI,CAAC,WAAW,CAAC,OAAO,CACtB,IAAI,CAAC,cAAc,CAAC,WAAW,EAAE,CAAC,QAAQ,EAAE,EAC5C,cAAc,CAAC,WAAW,EAAE,CAAC,QAAQ,EAAE,CACxC,CACF,CAAC;QACF,6CAA6C;QAC7C,IAAI,IAAI,CAAC,OAAO,CAAC,cAAc,CAAC,EAAE;YAChC,SAAS,CAAC,MAAM,CACd,EAAE,CAAC,IAAI,CAAC,aAAa,EACrB,eAAe,CAAC,CAAC;SACpB;QACD,OAAO,SAAS,CAAC;IACnB,CAAC;IAED;;;;;;OAMG;IACK,uBAAuB,CAAC,SAAiB,EAAE,iBAAgC,IAAI;QACrF,IAAI,cAAc,KAAK,IAAI;YACzB,cAAc,GAAG,IAAI,CAAC,gBAAgB,CAAC;QAEzC,MAAM,SAAS,GAAG,IAAI,CAAC,YAAY,sCAAoB,CAAC;QACxD,sDAAsD;QACtD,SAAS,CAAC,IAAI,CAAC,CAAC,GAAW,EAAE,EAAE;YAC7B,MAAM,YAAY,GAAG,IAAI,CAAC,YAAY,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;YAChD,MAAM,kBAAkB,GAAG,IAAI,CAAC,QAAQ,CAAC,YAAY,CAAC,CAAC;YACvD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,kBAAkB,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;gBAClD,IAAI,kBAAkB,CAAC,CAAC,CAAC,KAAK,cAAc;oBAC1C,kBAAkB,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,sBAAsB,CAAC,SAAS,CAAC;aACjE;YACD,OAAO,kBAAkB,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;QAC5C,CAAC,CAAC,CAAC;QACH,SAAS,CAAC,MAAM,CAAC,WAAW,EAAE,SAAS,CAAC,CAAC;QACzC,OAAO,SAAS,CAAC;IACnB,CAAC;IAED;;;;;OAKG;IACK,eAAe;QACrB,MAAM,MAAM,GAAG,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC;YACvC,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC;gBACxB,CAAC,IAAI,CAAC,SAAS,EAAE,CAAC,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC;oBAChC,SAAS,CAAC,CAAC,6CAA6C;QAE9D,IAAI,MAAM,KAAK,SAAS;YACtB,MAAM,IAAI,KAAK,CAAC,iCAAiC,CAAC,CAAC;QAErD,MAAM,OAAO,GAAG,MAAM,CAAC;QACvB,MAAM,WAAW,GAAG,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;YACzC,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,0BAA0B;QACxD,MAAM,YAAY,GAAG,CAAC,IAAI,CAAC,KAAK,EAAE,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,0BAA0B;QAC3F,OAAO,CAAC,MAAM,EAAE,WAAW,EAAE,YAAY,EAAE,OAAO,CAAC,CAAC;IACtD,CAAC;IAED,mDAAmD;IAC3C,mBAAmB,CACzB,aAAqB,EACrB,eAAuB,EACvB,MAAc,EACd,WAAmB,EACnB,YAAoB,EACpB,OAAe;QAEf,MAAM,YAAY,GAAG,IAAI,CAAC,QAAQ,CAAC,aAAa,CAAC,CAAC;QAClD,MAAM,gBAAgB,GAAa,YAAY,CAAC,GAAG,CAAC,CAAC,EAAU,EAAE,EAAE;YACjE,IAAI,EAAE,KAAK,eAAe;gBACxB,OAAO,IAAI,CAAC,sBAAsB,CAAC,IAAI,CAAC;;gBAExC,OAAO,GAAG,WAAW,GAAG,EAAE,GAAG,YAAY,EAAE,CAAC;QAChD,CAAC,CAAC,CAAC;QACH,OAAO,GAAG,MAAM,GAAG,gBAAgB,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,OAAO,EAAE,CAAC;IAC5D,CAAC;IAED;;;;;;;OAOG;IACI,mBAAmB,CACxB,aAAqB,EACrB,kBAAiC,IAAI;QAErC,IAAI,eAAe,KAAK,IAAI;YAC1B,eAAe,GAAG,IAAI,CAAC,gBAAgB,CAAC;QAC1C,MAAM,CAAC,MAAM,EAAE,WAAW,EAAE,YAAY,EAAE,OAAO,CAAC,GAAG,IAAI,CAAC,eAAe,EAAE,CAAC;QAC5E,OAAO,IAAI,CAAC,mBAAmB,CAAC,aAAa,EAAE,eAAe,EAAE,MAAM,EAAE,WAAW,EAAE,YAAY,EAAE,OAAO,CAAC,CAAC;IAC9G,CAAC;IAED;;;;;OAKG;IACK,aAAa,CAAC,kBAAiC,IAAI;QACzD,IAAI,eAAe,KAAK,IAAI;YAC1B,eAAe,GAAG,IAAI,CAAC,gBAAgB,CAAC;QAE1C,MAAM,CAAC,MAAM,EAAE,WAAW,EAAE,YAAY,EAAE,OAAO,CAAC,GAAG,IAAI,CAAC,eAAe,EAAE,CAAC;QAE5E,MAAM,SAAS,GAAG,IAAI,CAAC,YAAY,4BAAe,CAAC;QACnD,wCAAwC;QACxC,SAAS,CAAC,IAAI,CAAC,CAAC,GAAW,EAAE,EAAE;YAC7B,MAAM,aAAa,GAAG,IAAI,CAAC,YAAY,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;YACjD,OAAO,IAAI,CAAC,mBAAmB,CAAC,aAAa,EAAE,eAAgB,EAAE,MAAM,EAAE,WAAW,EAAE,YAAY,EAAE,OAAO,CAAC,CAAC;QAC/G,CAAC,CAAC,CAAC;QACH,OAAO,SAAS,CAAC;IACnB,CAAC;IAED;;;;;OAKG;IACK,uBAAuB,CAAC,iBAAgC,IAAI;QAClE,IAAI,cAAc,KAAK,IAAI;YACzB,cAAc,GAAG,IAAI,CAAC,sBAAsB,CAAC,KAAK,CAAC;QAErD,MAAM,SAAS,GAAG,IAAI,CAAC,YAAY,8BAAgB,CAAC;QACpD,wCAAwC;QACxC,SAAS,CAAC,IAAI,CAAC,CAAC,GAAW,EAAE,EAAE;YAC7B,MAAM,gBAAgB,GAAG,IAAI,CAAC,YAAY,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;YACpD,sCAAsC;YACtC,MAAM,mBAAmB,GAAG,IAAI,CAAC,QAAQ,CAAC,gBAAgB,CAAC,CAAC;YAC5D,MAAM,kBAAkB,GAAa,EAAE,CAAC;YACxC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,mBAAmB,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;gBACnD,MAAM,IAAI,GAAG,mBAAmB,CAAC,CAAC,CAAC,CAAC;gBACpC,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE;oBACrB,kBAAkB,CAAC,IAAI,CAAC,cAAe,CAAC,CAAC;iBAC1C;qBAAM,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE;oBAC1B,wCAAwC;oBACxC,MAAM,OAAO,GAAG,GAAG,GAAG,IAAI,GAAG,GAAG,CAAC;oBACjC,kBAAkB,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;iBAClC;qBAAM;oBACL,kBAAkB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;iBAC/B;aACF;YACD,OAAO,kBAAkB,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACrC,CAAC,CAAC,CAAC;QACH,OAAO,SAAS,CAAC;IACnB,CAAC;IAED;;;;;;;;OAQG;IACK,WAAW,CACjB,WAAmB,EACnB,eAAuB,EAAE,EACzB,eAA8B,IAAI;QAElC,mEAAmE;QACnE,wEAAwE;QACxE,IAAI,YAAY,KAAK,IAAI,EAAE;YACzB,YAAY,GAAG,CAAC,IAAI,CAAC,OAAO,CAAC,WAAuB,CAAC,CAAC,CAAC,CAAC;gBACtD,IAAI,CAAC,sBAAsB,CAAC,KAAK,CAAC,CAAC;gBACnC,IAAI,CAAC,sBAAsB,CAAC,SAAS,CAAC;SACzC;QAED,IAAI,IAAI,CAAC,WAAW,CAAC,WAAuB,CAAC,IAAI,YAAY,KAAK,EAAE;YAClE,YAAY,GAAG,IAAI,CAAC,SAAS,CAAC;QAEhC,MAAM,cAAc,GAAG,iBAAiB,CAAC;QACzC,MAAM,SAAS,GAAG,IAAI,CAAC,YAAY,CAAC,WAAuB,CAAC,CAAC;QAC7D,wCAAwC;QACxC,SAAS,CAAC,IAAI,CAAC,CAAC,GAAW,EAAE,EAAE;YAC7B,MAAM,WAAW,GAAG,IAAI,CAAC,YAAY,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;YAE/C,mEAAmE;YACnE,mEAAmE;YACnE,0CAA0C;YAC1C,MAAM,YAAY,GAAG,WAAW,CAAC,UAAU,CAAC,KAAK,CAAC,IAAI,WAAW,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC;YAEpF,iCAAiC;YACjC,MAAM,cAAc,GAAG,IAAI,CAAC,QAAQ,CAAC,WAAW,CAAC,CAAC;YAClD,MAAM,gBAAgB,GAAa,EAAE,CAAC;YACtC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,cAAc,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;gBAC9C,IAAI,IAAI,GAAG,cAAc,CAAC,CAAC,CAAC,CAAC;gBAC7B,IAAI,YAAY;oBACd,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,cAAc,EAAE,EAAE,CAAC,CAAC;gBAC1C,IAAI,IAAI,KAAK,IAAI,CAAC,sBAAsB,CAAC,IAAI,EAAE;oBAC7C,gBAAgB,CAAC,IAAI,CAAC,YAAa,CAAC,CAAC;iBACtC;qBAAM,IAAI,IAAI,CAAC,OAAO,CAAC,WAAuB,CAAC,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE;oBACnE,2DAA2D;oBAC3D,MAAM,OAAO,GAAG,GAAG,GAAG,IAAI,GAAG,GAAG,CAAC;oBACjC,gBAAgB,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;iBAChC;qBAAM;oBACL,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;iBAC7B;aACF;YACD,OAAO,gBAAgB,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;QAC7C,CAAC,CAAC,CAAC;QACH,OAAO,SAAS,CAAC;IACnB,CAAC;IAEO,sBAAsB;QAC5B,iDAAiD;QACjD,OAAO,IAAI,CAAC,YAAY,sCAAoB,CAAC;IAC/C,CAAC;IAED;;;;;OAKG;IACI,OAAO,CAAC,WAAqB,EAAE,eAA8B,IAAI;QACtE,sBAAsB;QACtB,IAAI,IAAI,CAAC,cAAc,KAAK,WAAW;YACrC,MAAM,IAAI,KAAK,CAAC,yBAAyB,CAAC,CAAC;QAC7C,IAAI,IAAI,CAAC,WAAW,CAAC,WAAW,CAAC,IAAI,YAAY,KAAK,IAAI;YACxD,MAAM,IAAI,KAAK,CAAC,gCAAgC,CAAC,CAAC;QAEpD,IAAI,IAAI,CAAC,OAAO,EAAE,IAAI,IAAI,CAAC,WAAW,CAAC,WAAW,CAAC,IAAI,YAAY,KAAK,IAAI;YAC1E,OAAO,IAAI,CAAC,uBAAuB,CAAC,YAAY,CAAC,CAAC;aAC/C,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,IAAI,IAAI,CAAC,WAAW,EAAE,CAAC,IAAI,IAAI,CAAC,MAAM,CAAC,WAAW,CAAC;YACzE,OAAO,IAAI,CAAC,aAAa,EAAE,CAAC;aACzB,IAAI,IAAI,CAAC,WAAW,EAAE,IAAI,IAAI,CAAC,OAAO,CAAC,WAAW,CAAC;YACtD,OAAO,IAAI,CAAC,uBAAuB,EAAE,CAAC;aACnC,IAAI,IAAI,CAAC,MAAM,EAAE,IAAI,IAAI,CAAC,OAAO,CAAC,WAAW,CAAC,EAAE,mBAAmB;YACtE,OAAO,IAAI,CAAC,WAAW,CAAC,WAAW,CAAC,CAAC;aAClC,iDAAiD;YACpD,OAAO,IAAI,CAAC,WAAW,CAAC,WAAW,EAAE,YAAa,CAAC,CAAC;IACxD,CAAC;CAcF","sourcesContent":["import * as DG from 'datagrok-api/dg';\nimport {SplitterFunc, WebLogo} from '../viewers/web-logo';\n\n/** enum type to simplify setting \"user-friendly\" notation if necessary */\nexport const enum NOTATION {\n  FASTA = 'FASTA',\n  SEPARATOR = 'SEPARATOR',\n  HELM = 'HELM'\n}\n\n/** Class for handling conversion of notation systems in Macromolecule columns */\nexport class NotationConverter {\n  private readonly _sourceColumn: DG.Column; // the column to be converted\n  private _sourceUnits: string; // units, of the form fasta:SEQ:NT, etc.\n  private _sourceNotation: NOTATION; // current notation (without :SEQ:NT, etc.)\n  private _defaultGapSymbol: string;\n  private _defaultGapSymbolsDict = {\n    HELM: '*',\n    SEPARATOR: '',\n    FASTA: '-',\n  };\n\n  private _splitter: SplitterFunc | null = null;\n  protected get splitter(): SplitterFunc {\n    if (this._splitter === null)\n      this._splitter = WebLogo.getSplitterForColumn(this._sourceColumn);\n    return this._splitter;\n  };\n\n\n  private get sourceUnits(): string { return this._sourceUnits; }\n\n  private get sourceColumn(): DG.Column { return this._sourceColumn; }\n\n  public get sourceNotation(): NOTATION { return this._sourceNotation; }\n\n  public get defaultGapSymbol(): string { return this._defaultGapSymbol; }\n\n  public get separator(): string {\n    const separator = this.sourceColumn.getTag('separator');\n    if (separator !== null)\n      return separator;\n    else\n      throw new Error('Separator not set');\n  }\n\n  public isFasta(): boolean { return this.sourceNotation === NOTATION.FASTA; }\n\n  public isSeparator(): boolean { return this.sourceNotation === NOTATION.SEPARATOR; }\n\n  public isHelm(): boolean { return this.sourceNotation === NOTATION.HELM; }\n\n  public toFasta(targetNotation: NOTATION): boolean { return targetNotation === NOTATION.FASTA; }\n\n  public toSeparator(targetNotation: NOTATION): boolean { return targetNotation === NOTATION.SEPARATOR; }\n\n  public toHelm(targetNotation: NOTATION): boolean { return targetNotation === NOTATION.HELM; }\n\n  public isRna(): boolean { return this.sourceUnits.toLowerCase().endsWith('rna'); }\n\n  public isDna(): boolean { return this.sourceUnits.toLowerCase().endsWith('dna'); }\n\n  public isPeptide(): boolean { return this.sourceUnits.toLowerCase().endsWith('pt'); }\n\n  /** Associate notation types with the corresponding units */\n  /**\n   * @return {NOTATION}     Notation associated with the units type\n   */\n  private getSourceNotation(): NOTATION {\n    if (this.sourceUnits.toLowerCase().startsWith('fasta'))\n      return NOTATION.FASTA;\n    else if (this.sourceUnits.toLowerCase().startsWith('separator'))\n      return NOTATION.SEPARATOR;\n    else if (this.sourceUnits.toLowerCase().startsWith('helm'))\n      return NOTATION.HELM;\n    else\n      throw new Error('The column has units that do not correspond to any notation');\n  }\n\n  /**\n   * Create a new empty column of the specified notation type and the same\n   * length as sourceColumn\n   *\n   * @param {NOTATION} targetNotation\n   * @return {DG.Column}\n   */\n  private getNewColumn(targetNotation: NOTATION): DG.Column {\n    const col = this.sourceColumn;\n    const len = col.length;\n    const name = targetNotation.toLowerCase() + '(' + col.name + ')';\n    const newColName = col.dataFrame.columns.getUnusedName(name);\n    // dummy code\n    const newColumn = DG.Column.fromList('string', newColName, new Array(len).fill(''));\n    newColumn.semType = DG.SEMTYPE.MACROMOLECULE;\n    newColumn.setTag(\n      DG.TAGS.UNITS,\n      this.sourceUnits.replace(\n        this.sourceNotation.toLowerCase().toString(),\n        targetNotation.toLowerCase().toString()\n      )\n    );\n    // TODO: specify cell renderers for all cases\n    if (this.toFasta(targetNotation)) {\n      newColumn.setTag(\n        DG.TAGS.CELL_RENDERER,\n        'Macromolecule');\n    }\n    return newColumn;\n  }\n\n  /**\n   * Convert a Macromolecule column from FASTA to SEPARATOR notation\n   *\n   * @param {string} separator  A specific separator to be used\n   * @param {string} fastaGapSymbol  Gap symbol in FASTA, '-' by default\n   * @return {DG.Column}        A new column in SEPARATOR notation\n   */\n  private convertFastaToSeparator(separator: string, fastaGapSymbol: string | null = null): DG.Column {\n    if (fastaGapSymbol === null)\n      fastaGapSymbol = this.defaultGapSymbol;\n\n    const newColumn = this.getNewColumn(NOTATION.SEPARATOR);\n    // assign the values to the newly created empty column\n    newColumn.init((idx: number) => {\n      const fastaPolymer = this.sourceColumn.get(idx);\n      const fastaMonomersArray = this.splitter(fastaPolymer);\n      for (let i = 0; i < fastaMonomersArray.length; i++) {\n        if (fastaMonomersArray[i] === fastaGapSymbol)\n          fastaMonomersArray[i] = this._defaultGapSymbolsDict.SEPARATOR;\n      }\n      return fastaMonomersArray.join(separator);\n    });\n    newColumn.setTag('separator', separator);\n    return newColumn;\n  }\n\n  /**\n   * Get the wrapper strings for HELM, depending on the type of the\n   * macromolecule (peptide, DNA, RNA)\n   *\n   * @return {string[]} Array of wrappers\n   */\n  private getHelmWrappers(): string[] {\n    const prefix = (this.isDna()) ? 'DNA1{' :\n      (this.isRna()) ? 'RNA1{' :\n        (this.isPeptide()) ? 'PEPTIDE1{' :\n          'Unknown'; // this case should be handled as exceptional\n\n    if (prefix === 'Unknown')\n      throw new Error('Neither peptide, nor nucleotide');\n\n    const postfix = '}$$$';\n    const leftWrapper = (this.isDna()) ? 'D(' :\n      (this.isRna()) ? 'R(' : ''; // no wrapper for peptides\n    const rightWrapper = (this.isDna() || this.isRna()) ? ')P' : ''; // no wrapper for peptides\n    return [prefix, leftWrapper, rightWrapper, postfix];\n  }\n\n  // A helper function for converting strings to HELM\n  private convertToHelmHelper(\n    sourcePolymer: string,\n    sourceGapSymbol: string,\n    prefix: string,\n    leftWrapper: string,\n    rightWrapper: string,\n    postfix: string\n  ): string {\n    const monomerArray = this.splitter(sourcePolymer);\n    const monomerHelmArray: string[] = monomerArray.map((mm: string) => {\n      if (mm === sourceGapSymbol)\n        return this._defaultGapSymbolsDict.HELM;\n      else\n        return `${leftWrapper}${mm}${rightWrapper}`;\n    });\n    return `${prefix}${monomerHelmArray.join('.')}${postfix}`;\n  }\n\n  /**\n   * Convert a string with SEPARATOR/FASTA notation to HELM\n   *\n   * @param {string} sourcePolymer  A string to be converted\n   * @param {string | null} sourceGapSymbol  An optional gap symbol, set to\n   * default values ('-' for FASTA and '' for SEPARATOR) unless specified\n   * @return {string}  The target HELM string\n   */\n  public convertStringToHelm(\n    sourcePolymer: string,\n    sourceGapSymbol: string | null = null\n  ) : string {\n    if (sourceGapSymbol === null)\n      sourceGapSymbol = this.defaultGapSymbol;\n    const [prefix, leftWrapper, rightWrapper, postfix] = this.getHelmWrappers();\n    return this.convertToHelmHelper(sourcePolymer, sourceGapSymbol, prefix, leftWrapper, rightWrapper, postfix);\n  }\n\n  /**\n   * Convert a column to HELM\n   *\n   * @param {string | null} sourceGapSymbol\n   * @return {DG.Column}\n   */\n  private convertToHelm(sourceGapSymbol: string | null = null): DG.Column {\n    if (sourceGapSymbol === null)\n      sourceGapSymbol = this.defaultGapSymbol;\n\n    const [prefix, leftWrapper, rightWrapper, postfix] = this.getHelmWrappers();\n\n    const newColumn = this.getNewColumn(NOTATION.HELM);\n    // assign the values to the empty column\n    newColumn.init((idx: number) => {\n      const sourcePolymer = this.sourceColumn.get(idx);\n      return this.convertToHelmHelper(sourcePolymer, sourceGapSymbol!, prefix, leftWrapper, rightWrapper, postfix);\n    });\n    return newColumn;\n  }\n\n  /**\n   * Convert SEPARATOR column to FASTA notation\n   *\n   * @param {string | null} fastaGapSymbol Optional gap symbol for FASTA\n   * @return {DG.Column}  Converted column\n   */\n  private convertSeparatorToFasta(fastaGapSymbol: string | null = null): DG.Column {\n    if (fastaGapSymbol === null)\n      fastaGapSymbol = this._defaultGapSymbolsDict.FASTA;\n\n    const newColumn = this.getNewColumn(NOTATION.FASTA);\n    // assign the values to the empty column\n    newColumn.init((idx: number) => {\n      const separatorPolymer = this.sourceColumn.get(idx);\n      // items can be monomers or separators\n      const separatorItemsArray = this.splitter(separatorPolymer);\n      const fastaMonomersArray: string[] = [];\n      for (let i = 0; i < separatorItemsArray.length; i++) {\n        const item = separatorItemsArray[i];\n        if (item.length === 0) {\n          fastaMonomersArray.push(fastaGapSymbol!);\n        } else if (item.length > 1) {\n          // the case of a multi-character monomer\n          const monomer = '[' + item + ']';\n          fastaMonomersArray.push(monomer);\n        } else {\n          fastaMonomersArray.push(item);\n        }\n      }\n      return fastaMonomersArray.join('');\n    });\n    return newColumn;\n  }\n\n  /**\n   *  Convert HELM column to FASTA/SEPARATOR\n   *\n   * @param {string} tgtNotation    Target notation: FASTA or SEPARATOR\n   * @param {string} tgtSeparator   Optional target separator (for HELM ->\n   * @param {string | null} tgtGapSymbol   Optional target gap symbol\n   * SEPARATOR)\n   * @return {DG.Column} Converted column\n   */\n  private convertHelm(\n    tgtNotation: string,\n    tgtSeparator: string = '',\n    tgtGapSymbol: string | null = null\n  ): DG.Column {\n    // This function must not contain calls of isDna() and isRna(), for\n    // source helm columns may contain RNA, DNA and PT across different rows\n    if (tgtGapSymbol === null) {\n      tgtGapSymbol = (this.toFasta(tgtNotation as NOTATION)) ?\n        this._defaultGapSymbolsDict.FASTA :\n        this._defaultGapSymbolsDict.SEPARATOR;\n    }\n\n    if (this.toSeparator(tgtNotation as NOTATION) && tgtSeparator === '')\n      tgtSeparator = this.separator;\n\n    const helmWrappersRe = /(R\\(|D\\(|\\)|P)/g;\n    const newColumn = this.getNewColumn(tgtNotation as NOTATION);\n    // assign the values to the empty column\n    newColumn.init((idx: number) => {\n      const helmPolymer = this.sourceColumn.get(idx);\n\n      // we cannot use isDna() or isRna() because source helm columns can\n      // contain DNA, RNA and PT in different cells, so the corresponding\n      // tags cannot be set for the whole column\n      const isNucleotide = helmPolymer.startsWith('DNA') || helmPolymer.startsWith('RNA');\n\n      // items can be monomers or helms\n      const helmItemsArray = this.splitter(helmPolymer);\n      const tgtMonomersArray: string[] = [];\n      for (let i = 0; i < helmItemsArray.length; i++) {\n        let item = helmItemsArray[i];\n        if (isNucleotide)\n          item = item.replace(helmWrappersRe, '');\n        if (item === this._defaultGapSymbolsDict.HELM) {\n          tgtMonomersArray.push(tgtGapSymbol!);\n        } else if (this.toFasta(tgtNotation as NOTATION) && item.length > 1) {\n          // the case of a multi-character monomer converted to FASTA\n          const monomer = '[' + item + ']';\n          tgtMonomersArray.push(monomer);\n        } else {\n          tgtMonomersArray.push(item);\n        }\n      }\n      return tgtMonomersArray.join(tgtSeparator);\n    });\n    return newColumn;\n  }\n\n  private convertHelmToSeparator(): DG.Column {\n    // TODO: implementatioreturn this.getNewColumn();\n    return this.getNewColumn(NOTATION.SEPARATOR);\n  }\n\n  /** Dispatcher method for notation conversion\n   *\n   * @param {NOTATION} tgtNotation   Notation we want to convert to\n   * @param {string | null} tgtSeparator   Possible separator\n   * @return {DG.Column}                Converted column\n   */\n  public convert(tgtNotation: NOTATION, tgtSeparator: string | null = null): DG.Column {\n    // possible exceptions\n    if (this.sourceNotation === tgtNotation)\n      throw new Error('tgt notation is invalid');\n    if (this.toSeparator(tgtNotation) && tgtSeparator === null)\n      throw new Error('tgt separator is not specified');\n\n    if (this.isFasta() && this.toSeparator(tgtNotation) && tgtSeparator !== null)\n      return this.convertFastaToSeparator(tgtSeparator);\n    else if ((this.isFasta() || this.isSeparator()) && this.toHelm(tgtNotation))\n      return this.convertToHelm();\n    else if (this.isSeparator() && this.toFasta(tgtNotation))\n      return this.convertSeparatorToFasta();\n    else if (this.isHelm() && this.toFasta(tgtNotation)) // the case of HELM\n      return this.convertHelm(tgtNotation);\n    else // this.isHelm() && this.toSeparator(tgtNotation)\n      return this.convertHelm(tgtNotation, tgtSeparator!);\n  }\n\n  public constructor(col: DG.Column) {\n    this._sourceColumn = col;\n    const units = this._sourceColumn.tags[DG.TAGS.UNITS];\n    if (units !== null)\n      this._sourceUnits = units;\n    else\n      throw new Error('Units are not specified in column');\n    this._sourceNotation = this.getSourceNotation();\n    this._defaultGapSymbol = (this.isFasta()) ? this._defaultGapSymbolsDict.FASTA :\n      (this.isHelm()) ? this._defaultGapSymbolsDict.HELM :\n        this._defaultGapSymbolsDict.SEPARATOR;\n  }\n}\n"]}
|
|
217
|
+
//# sourceMappingURL=data:application/json;base64,{"version":3,"file":"notation-converter.js","sourceRoot":"","sources":["notation-converter.ts"],"names":[],"mappings":"AAIA,OAAO,EAAe,OAAO,EAAC,MAAM,qBAAqB,CAAC;AAC1D,OAAO,EAAC,YAAY,EAAW,MAAM,iBAAiB,CAAC;AAEvD,iFAAiF;AACjF,MAAM,OAAO,iBAAkB,SAAQ,YAAY;IAkPjD,YAAmB,GAAc;QAC/B,KAAK,CAAC,GAAG,CAAC,CAAC;QAlPL,cAAS,GAAwB,IAAI,CAAC;IAmP9C,CAAC;IAlPD,IAAc,QAAQ;QACpB,IAAI,IAAI,CAAC,SAAS,KAAK,IAAI;YACzB,IAAI,CAAC,SAAS,GAAG,OAAO,CAAC,oBAAoB,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAC7D,OAAO,IAAI,CAAC,SAAS,CAAC;IACxB,CAAC;IAAA,CAAC;IAEK,OAAO,CAAC,cAAwB,IAAa,OAAO,cAAc,iCAAmB,CAAC,CAAC,CAAC;IAExF,WAAW,CAAC,cAAwB,IAAa,OAAO,cAAc,yCAAuB,CAAC,CAAC,CAAC;IAEhG,MAAM,CAAC,cAAwB,IAAa,OAAO,cAAc,+BAAkB,CAAC,CAAC,CAAC;IAE7F;;;;;;OAMG;IACK,uBAAuB,CAAC,SAAiB,EAAE,iBAAgC,IAAI;QACrF,IAAI,cAAc,KAAK,IAAI;YACzB,cAAc,GAAG,IAAI,CAAC,gBAAgB,CAAC;QAEzC,MAAM,SAAS,GAAG,IAAI,CAAC,YAAY,sCAAoB,CAAC;QACxD,sDAAsD;QACtD,SAAS,CAAC,IAAI,CAAC,CAAC,GAAW,EAAE,EAAE;YAC7B,MAAM,YAAY,GAAG,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;YAC1C,MAAM,kBAAkB,GAAG,IAAI,CAAC,QAAQ,CAAC,YAAY,CAAC,CAAC;YACvD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,kBAAkB,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;gBAClD,IAAI,kBAAkB,CAAC,CAAC,CAAC,KAAK,cAAc;oBAC1C,kBAAkB,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,sBAAsB,CAAC,SAAS,CAAC;aACjE;YACD,OAAO,kBAAkB,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;QAC5C,CAAC,CAAC,CAAC;QACH,SAAS,CAAC,MAAM,CAAC,WAAW,EAAE,SAAS,CAAC,CAAC;QACzC,OAAO,SAAS,CAAC;IACnB,CAAC;IAED;;;;;OAKG;IACK,eAAe;QACrB,MAAM,MAAM,GAAG,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC;YACvC,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC;gBACxB,CAAC,IAAI,CAAC,SAAS,EAAE,CAAC,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC;oBAChC,SAAS,CAAC,CAAC,6CAA6C;QAE9D,IAAI,MAAM,KAAK,SAAS;YACtB,MAAM,IAAI,KAAK,CAAC,iCAAiC,CAAC,CAAC;QAErD,MAAM,OAAO,GAAG,MAAM,CAAC;QACvB,MAAM,WAAW,GAAG,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;YACzC,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,0BAA0B;QACxD,MAAM,YAAY,GAAG,CAAC,IAAI,CAAC,KAAK,EAAE,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,0BAA0B;QAC3F,OAAO,CAAC,MAAM,EAAE,WAAW,EAAE,YAAY,EAAE,OAAO,CAAC,CAAC;IACtD,CAAC;IAED,mDAAmD;IAC3C,mBAAmB,CACzB,aAAqB,EACrB,eAAuB,EACvB,MAAc,EACd,WAAmB,EACnB,YAAoB,EACpB,OAAe;QAEf,MAAM,YAAY,GAAG,IAAI,CAAC,QAAQ,CAAC,aAAa,CAAC,CAAC;QAClD,MAAM,gBAAgB,GAAa,YAAY,CAAC,GAAG,CAAC,CAAC,EAAU,EAAE,EAAE;YACjE,IAAI,EAAE,KAAK,eAAe;gBACxB,OAAO,IAAI,CAAC,sBAAsB,CAAC,IAAI,CAAC;;gBAExC,OAAO,GAAG,WAAW,GAAG,EAAE,GAAG,YAAY,EAAE,CAAC;QAChD,CAAC,CAAC,CAAC;QACH,OAAO,GAAG,MAAM,GAAG,gBAAgB,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,OAAO,EAAE,CAAC;IAC5D,CAAC;IAED;;;;;;;OAOG;IACI,mBAAmB,CACxB,aAAqB,EACrB,kBAAiC,IAAI;QAErC,IAAI,eAAe,KAAK,IAAI;YAC1B,eAAe,GAAG,IAAI,CAAC,gBAAgB,CAAC;QAC1C,MAAM,CAAC,MAAM,EAAE,WAAW,EAAE,YAAY,EAAE,OAAO,CAAC,GAAG,IAAI,CAAC,eAAe,EAAE,CAAC;QAC5E,OAAO,IAAI,CAAC,mBAAmB,CAAC,aAAa,EAAE,eAAe,EAAE,MAAM,EAAE,WAAW,EAAE,YAAY,EAAE,OAAO,CAAC,CAAC;IAC9G,CAAC;IAED;;;;;OAKG;IACK,aAAa,CAAC,kBAAiC,IAAI;QACzD,IAAI,eAAe,KAAK,IAAI;YAC1B,eAAe,GAAG,IAAI,CAAC,gBAAgB,CAAC;QAE1C,MAAM,CAAC,MAAM,EAAE,WAAW,EAAE,YAAY,EAAE,OAAO,CAAC,GAAG,IAAI,CAAC,eAAe,EAAE,CAAC;QAE5E,MAAM,SAAS,GAAG,IAAI,CAAC,YAAY,4BAAe,CAAC;QACnD,wCAAwC;QACxC,SAAS,CAAC,IAAI,CAAC,CAAC,GAAW,EAAE,EAAE;YAC7B,MAAM,aAAa,GAAG,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;YAC3C,OAAO,IAAI,CAAC,mBAAmB,CAAC,aAAa,EAAE,eAAgB,EAAE,MAAM,EAAE,WAAW,EAAE,YAAY,EAAE,OAAO,CAAC,CAAC;QAC/G,CAAC,CAAC,CAAC;QACH,OAAO,SAAS,CAAC;IACnB,CAAC;IAED;;;;;OAKG;IACK,uBAAuB,CAAC,iBAAgC,IAAI;QAClE,IAAI,cAAc,KAAK,IAAI;YACzB,cAAc,GAAG,IAAI,CAAC,sBAAsB,CAAC,KAAK,CAAC;QAErD,MAAM,SAAS,GAAG,IAAI,CAAC,YAAY,8BAAgB,CAAC;QACpD,wCAAwC;QACxC,SAAS,CAAC,IAAI,CAAC,CAAC,GAAW,EAAE,EAAE;YAC7B,MAAM,gBAAgB,GAAG,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;YAC9C,sCAAsC;YACtC,MAAM,mBAAmB,GAAG,IAAI,CAAC,QAAQ,CAAC,gBAAgB,CAAC,CAAC;YAC5D,MAAM,kBAAkB,GAAa,EAAE,CAAC;YACxC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,mBAAmB,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;gBACnD,MAAM,IAAI,GAAG,mBAAmB,CAAC,CAAC,CAAC,CAAC;gBACpC,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE;oBACrB,kBAAkB,CAAC,IAAI,CAAC,cAAe,CAAC,CAAC;iBAC1C;qBAAM,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE;oBAC1B,wCAAwC;oBACxC,MAAM,OAAO,GAAG,GAAG,GAAG,IAAI,GAAG,GAAG,CAAC;oBACjC,kBAAkB,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;iBAClC;qBAAM;oBACL,kBAAkB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;iBAC/B;aACF;YACD,OAAO,kBAAkB,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACrC,CAAC,CAAC,CAAC;QACH,OAAO,SAAS,CAAC;IACnB,CAAC;IAED;;;;;;;;OAQG;IACK,WAAW,CACjB,WAAmB,EACnB,eAAuB,EAAE,EACzB,eAA8B,IAAI;QAElC,mEAAmE;QACnE,wEAAwE;QACxE,IAAI,YAAY,KAAK,IAAI,EAAE;YACzB,YAAY,GAAG,CAAC,IAAI,CAAC,OAAO,CAAC,WAAuB,CAAC,CAAC,CAAC,CAAC;gBACtD,IAAI,CAAC,sBAAsB,CAAC,KAAK,CAAC,CAAC;gBACnC,IAAI,CAAC,sBAAsB,CAAC,SAAS,CAAC;SACzC;QAED,IAAI,IAAI,CAAC,WAAW,CAAC,WAAuB,CAAC,IAAI,YAAY,KAAK,EAAE;YAClE,YAAY,GAAG,IAAI,CAAC,SAAS,CAAC;QAEhC,MAAM,cAAc,GAAG,iBAAiB,CAAC;QACzC,MAAM,SAAS,GAAG,IAAI,CAAC,YAAY,CAAC,WAAuB,CAAC,CAAC;QAC7D,wCAAwC;QACxC,SAAS,CAAC,IAAI,CAAC,CAAC,GAAW,EAAE,EAAE;YAC7B,MAAM,WAAW,GAAG,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;YAEzC,mEAAmE;YACnE,mEAAmE;YACnE,0CAA0C;YAC1C,MAAM,YAAY,GAAG,WAAW,CAAC,UAAU,CAAC,KAAK,CAAC,IAAI,WAAW,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC;YAEpF,iCAAiC;YACjC,MAAM,cAAc,GAAG,IAAI,CAAC,QAAQ,CAAC,WAAW,CAAC,CAAC;YAClD,MAAM,gBAAgB,GAAa,EAAE,CAAC;YACtC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,cAAc,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;gBAC9C,IAAI,IAAI,GAAG,cAAc,CAAC,CAAC,CAAC,CAAC;gBAC7B,IAAI,YAAY;oBACd,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,cAAc,EAAE,EAAE,CAAC,CAAC;gBAC1C,IAAI,IAAI,KAAK,IAAI,CAAC,sBAAsB,CAAC,IAAI,EAAE;oBAC7C,gBAAgB,CAAC,IAAI,CAAC,YAAa,CAAC,CAAC;iBACtC;qBAAM,IAAI,IAAI,CAAC,OAAO,CAAC,WAAuB,CAAC,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE;oBACnE,2DAA2D;oBAC3D,MAAM,OAAO,GAAG,GAAG,GAAG,IAAI,GAAG,GAAG,CAAC;oBACjC,gBAAgB,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;iBAChC;qBAAM;oBACL,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;iBAC7B;aACF;YACD,OAAO,gBAAgB,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;QAC7C,CAAC,CAAC,CAAC;QACH,OAAO,SAAS,CAAC;IACnB,CAAC;IAEO,sBAAsB;QAC5B,iDAAiD;QACjD,OAAO,IAAI,CAAC,YAAY,sCAAoB,CAAC;IAC/C,CAAC;IAED;;;;;OAKG;IACI,OAAO,CAAC,WAAqB,EAAE,eAA8B,IAAI;QACtE,sBAAsB;QACtB,IAAI,IAAI,CAAC,QAAQ,KAAK,WAAW;YAC/B,MAAM,IAAI,KAAK,CAAC,yBAAyB,CAAC,CAAC;QAC7C,IAAI,IAAI,CAAC,WAAW,CAAC,WAAW,CAAC,IAAI,YAAY,KAAK,IAAI;YACxD,MAAM,IAAI,KAAK,CAAC,gCAAgC,CAAC,CAAC;QAEpD,IAAI,IAAI,CAAC,OAAO,EAAE,IAAI,IAAI,CAAC,WAAW,CAAC,WAAW,CAAC,IAAI,YAAY,KAAK,IAAI;YAC1E,OAAO,IAAI,CAAC,uBAAuB,CAAC,YAAY,CAAC,CAAC;aAC/C,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,IAAI,IAAI,CAAC,WAAW,EAAE,CAAC,IAAI,IAAI,CAAC,MAAM,CAAC,WAAW,CAAC;YACzE,OAAO,IAAI,CAAC,aAAa,EAAE,CAAC;aACzB,IAAI,IAAI,CAAC,WAAW,EAAE,IAAI,IAAI,CAAC,OAAO,CAAC,WAAW,CAAC;YACtD,OAAO,IAAI,CAAC,uBAAuB,EAAE,CAAC;aACnC,IAAI,IAAI,CAAC,MAAM,EAAE,IAAI,IAAI,CAAC,OAAO,CAAC,WAAW,CAAC,EAAE,mBAAmB;YACtE,OAAO,IAAI,CAAC,WAAW,CAAC,WAAW,CAAC,CAAC;aAClC,iDAAiD;YACpD,OAAO,IAAI,CAAC,WAAW,CAAC,WAAW,EAAE,YAAa,CAAC,CAAC;IACxD,CAAC;CAKF","sourcesContent":["/* Do not change these import lines to match external modules in webpack configuration */\nimport * as grok from 'datagrok-api/grok';\nimport * as ui from 'datagrok-api/ui';\nimport * as DG from 'datagrok-api/dg';\nimport {SplitterFunc, WebLogo} from '../viewers/web-logo';\nimport {UnitsHandler, NOTATION} from './units-handler';\n\n/** Class for handling conversion of notation systems in Macromolecule columns */\nexport class NotationConverter extends UnitsHandler {\n  private _splitter: SplitterFunc | null = null;\n  protected get splitter(): SplitterFunc {\n    if (this._splitter === null)\n      this._splitter = WebLogo.getSplitterForColumn(this.column);\n    return this._splitter;\n  };\n\n  public toFasta(targetNotation: NOTATION): boolean { return targetNotation === NOTATION.FASTA; }\n\n  public toSeparator(targetNotation: NOTATION): boolean { return targetNotation === NOTATION.SEPARATOR; }\n\n  public toHelm(targetNotation: NOTATION): boolean { return targetNotation === NOTATION.HELM; }\n\n  /**\n   * Convert a Macromolecule column from FASTA to SEPARATOR notation\n   *\n   * @param {string} separator  A specific separator to be used\n   * @param {string} fastaGapSymbol  Gap symbol in FASTA, '-' by default\n   * @return {DG.Column}        A new column in SEPARATOR notation\n   */\n  private convertFastaToSeparator(separator: string, fastaGapSymbol: string | null = null): DG.Column {\n    if (fastaGapSymbol === null)\n      fastaGapSymbol = this.defaultGapSymbol;\n\n    const newColumn = this.getNewColumn(NOTATION.SEPARATOR);\n    // assign the values to the newly created empty column\n    newColumn.init((idx: number) => {\n      const fastaPolymer = this.column.get(idx);\n      const fastaMonomersArray = this.splitter(fastaPolymer);\n      for (let i = 0; i < fastaMonomersArray.length; i++) {\n        if (fastaMonomersArray[i] === fastaGapSymbol)\n          fastaMonomersArray[i] = this._defaultGapSymbolsDict.SEPARATOR;\n      }\n      return fastaMonomersArray.join(separator);\n    });\n    newColumn.setTag('separator', separator);\n    return newColumn;\n  }\n\n  /**\n   * Get the wrapper strings for HELM, depending on the type of the\n   * macromolecule (peptide, DNA, RNA)\n   *\n   * @return {string[]} Array of wrappers\n   */\n  private getHelmWrappers(): string[] {\n    const prefix = (this.isDna()) ? 'DNA1{' :\n      (this.isRna()) ? 'RNA1{' :\n        (this.isPeptide()) ? 'PEPTIDE1{' :\n          'Unknown'; // this case should be handled as exceptional\n\n    if (prefix === 'Unknown')\n      throw new Error('Neither peptide, nor nucleotide');\n\n    const postfix = '}$$$';\n    const leftWrapper = (this.isDna()) ? 'D(' :\n      (this.isRna()) ? 'R(' : ''; // no wrapper for peptides\n    const rightWrapper = (this.isDna() || this.isRna()) ? ')P' : ''; // no wrapper for peptides\n    return [prefix, leftWrapper, rightWrapper, postfix];\n  }\n\n  // A helper function for converting strings to HELM\n  private convertToHelmHelper(\n    sourcePolymer: string,\n    sourceGapSymbol: string,\n    prefix: string,\n    leftWrapper: string,\n    rightWrapper: string,\n    postfix: string\n  ): string {\n    const monomerArray = this.splitter(sourcePolymer);\n    const monomerHelmArray: string[] = monomerArray.map((mm: string) => {\n      if (mm === sourceGapSymbol)\n        return this._defaultGapSymbolsDict.HELM;\n      else\n        return `${leftWrapper}${mm}${rightWrapper}`;\n    });\n    return `${prefix}${monomerHelmArray.join('.')}${postfix}`;\n  }\n\n  /**\n   * Convert a string with SEPARATOR/FASTA notation to HELM\n   *\n   * @param {string} sourcePolymer  A string to be converted\n   * @param {string | null} sourceGapSymbol  An optional gap symbol, set to\n   * default values ('-' for FASTA and '' for SEPARATOR) unless specified\n   * @return {string}  The target HELM string\n   */\n  public convertStringToHelm(\n    sourcePolymer: string,\n    sourceGapSymbol: string | null = null\n  ) : string {\n    if (sourceGapSymbol === null)\n      sourceGapSymbol = this.defaultGapSymbol;\n    const [prefix, leftWrapper, rightWrapper, postfix] = this.getHelmWrappers();\n    return this.convertToHelmHelper(sourcePolymer, sourceGapSymbol, prefix, leftWrapper, rightWrapper, postfix);\n  }\n\n  /**\n   * Convert a column to HELM\n   *\n   * @param {string | null} sourceGapSymbol\n   * @return {DG.Column}\n   */\n  private convertToHelm(sourceGapSymbol: string | null = null): DG.Column {\n    if (sourceGapSymbol === null)\n      sourceGapSymbol = this.defaultGapSymbol;\n\n    const [prefix, leftWrapper, rightWrapper, postfix] = this.getHelmWrappers();\n\n    const newColumn = this.getNewColumn(NOTATION.HELM);\n    // assign the values to the empty column\n    newColumn.init((idx: number) => {\n      const sourcePolymer = this.column.get(idx);\n      return this.convertToHelmHelper(sourcePolymer, sourceGapSymbol!, prefix, leftWrapper, rightWrapper, postfix);\n    });\n    return newColumn;\n  }\n\n  /**\n   * Convert SEPARATOR column to FASTA notation\n   *\n   * @param {string | null} fastaGapSymbol Optional gap symbol for FASTA\n   * @return {DG.Column}  Converted column\n   */\n  private convertSeparatorToFasta(fastaGapSymbol: string | null = null): DG.Column {\n    if (fastaGapSymbol === null)\n      fastaGapSymbol = this._defaultGapSymbolsDict.FASTA;\n\n    const newColumn = this.getNewColumn(NOTATION.FASTA);\n    // assign the values to the empty column\n    newColumn.init((idx: number) => {\n      const separatorPolymer = this.column.get(idx);\n      // items can be monomers or separators\n      const separatorItemsArray = this.splitter(separatorPolymer);\n      const fastaMonomersArray: string[] = [];\n      for (let i = 0; i < separatorItemsArray.length; i++) {\n        const item = separatorItemsArray[i];\n        if (item.length === 0) {\n          fastaMonomersArray.push(fastaGapSymbol!);\n        } else if (item.length > 1) {\n          // the case of a multi-character monomer\n          const monomer = '[' + item + ']';\n          fastaMonomersArray.push(monomer);\n        } else {\n          fastaMonomersArray.push(item);\n        }\n      }\n      return fastaMonomersArray.join('');\n    });\n    return newColumn;\n  }\n\n  /**\n   *  Convert HELM column to FASTA/SEPARATOR\n   *\n   * @param {string} tgtNotation    Target notation: FASTA or SEPARATOR\n   * @param {string} tgtSeparator   Optional target separator (for HELM ->\n   * @param {string | null} tgtGapSymbol   Optional target gap symbol\n   * SEPARATOR)\n   * @return {DG.Column} Converted column\n   */\n  private convertHelm(\n    tgtNotation: string,\n    tgtSeparator: string = '',\n    tgtGapSymbol: string | null = null\n  ): DG.Column {\n    // This function must not contain calls of isDna() and isRna(), for\n    // source helm columns may contain RNA, DNA and PT across different rows\n    if (tgtGapSymbol === null) {\n      tgtGapSymbol = (this.toFasta(tgtNotation as NOTATION)) ?\n        this._defaultGapSymbolsDict.FASTA :\n        this._defaultGapSymbolsDict.SEPARATOR;\n    }\n\n    if (this.toSeparator(tgtNotation as NOTATION) && tgtSeparator === '')\n      tgtSeparator = this.separator;\n\n    const helmWrappersRe = /(R\\(|D\\(|\\)|P)/g;\n    const newColumn = this.getNewColumn(tgtNotation as NOTATION);\n    // assign the values to the empty column\n    newColumn.init((idx: number) => {\n      const helmPolymer = this.column.get(idx);\n\n      // we cannot use isDna() or isRna() because source helm columns can\n      // contain DNA, RNA and PT in different cells, so the corresponding\n      // tags cannot be set for the whole column\n      const isNucleotide = helmPolymer.startsWith('DNA') || helmPolymer.startsWith('RNA');\n\n      // items can be monomers or helms\n      const helmItemsArray = this.splitter(helmPolymer);\n      const tgtMonomersArray: string[] = [];\n      for (let i = 0; i < helmItemsArray.length; i++) {\n        let item = helmItemsArray[i];\n        if (isNucleotide)\n          item = item.replace(helmWrappersRe, '');\n        if (item === this._defaultGapSymbolsDict.HELM) {\n          tgtMonomersArray.push(tgtGapSymbol!);\n        } else if (this.toFasta(tgtNotation as NOTATION) && item.length > 1) {\n          // the case of a multi-character monomer converted to FASTA\n          const monomer = '[' + item + ']';\n          tgtMonomersArray.push(monomer);\n        } else {\n          tgtMonomersArray.push(item);\n        }\n      }\n      return tgtMonomersArray.join(tgtSeparator);\n    });\n    return newColumn;\n  }\n\n  private convertHelmToSeparator(): DG.Column {\n    // TODO: implementatioreturn this.getNewColumn();\n    return this.getNewColumn(NOTATION.SEPARATOR);\n  }\n\n  /** Dispatcher method for notation conversion\n   *\n   * @param {NOTATION} tgtNotation   Notation we want to convert to\n   * @param {string | null} tgtSeparator   Possible separator\n   * @return {DG.Column}                Converted column\n   */\n  public convert(tgtNotation: NOTATION, tgtSeparator: string | null = null): DG.Column {\n    // possible exceptions\n    if (this.notation === tgtNotation)\n      throw new Error('tgt notation is invalid');\n    if (this.toSeparator(tgtNotation) && tgtSeparator === null)\n      throw new Error('tgt separator is not specified');\n\n    if (this.isFasta() && this.toSeparator(tgtNotation) && tgtSeparator !== null)\n      return this.convertFastaToSeparator(tgtSeparator);\n    else if ((this.isFasta() || this.isSeparator()) && this.toHelm(tgtNotation))\n      return this.convertToHelm();\n    else if (this.isSeparator() && this.toFasta(tgtNotation))\n      return this.convertSeparatorToFasta();\n    else if (this.isHelm() && this.toFasta(tgtNotation)) // the case of HELM\n      return this.convertHelm(tgtNotation);\n    else // this.isHelm() && this.toSeparator(tgtNotation)\n      return this.convertHelm(tgtNotation, tgtSeparator!);\n  }\n\n  public constructor(col: DG.Column) {\n    super(col);\n  }\n}\n"]}
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
import * as DG from 'datagrok-api/dg';
|
|
2
|
+
/** enum type to simplify setting "user-friendly" notation if necessary */
|
|
3
|
+
export declare const enum NOTATION {
|
|
4
|
+
FASTA = "FASTA",
|
|
5
|
+
SEPARATOR = "SEPARATOR",
|
|
6
|
+
HELM = "HELM"
|
|
7
|
+
}
|
|
8
|
+
/** Class for handling notation units in Macromolecule columns */
|
|
9
|
+
export declare class UnitsHandler {
|
|
10
|
+
protected readonly _column: DG.Column;
|
|
11
|
+
protected _units: string;
|
|
12
|
+
protected _notation: NOTATION;
|
|
13
|
+
protected _defaultGapSymbol: string;
|
|
14
|
+
protected _defaultGapSymbolsDict: {
|
|
15
|
+
HELM: string;
|
|
16
|
+
SEPARATOR: string;
|
|
17
|
+
FASTA: string;
|
|
18
|
+
};
|
|
19
|
+
static readonly PeptideFastaAlphabet: Set<string>;
|
|
20
|
+
static readonly DnaFastaAlphabet: Set<string>;
|
|
21
|
+
static readonly RnaFastaAlphabet: Set<string>;
|
|
22
|
+
static setUnitsToFastaColumn(col: DG.Column): void;
|
|
23
|
+
protected get units(): string;
|
|
24
|
+
protected get column(): DG.Column;
|
|
25
|
+
get notation(): NOTATION;
|
|
26
|
+
get defaultGapSymbol(): string;
|
|
27
|
+
get separator(): string;
|
|
28
|
+
isFasta(): boolean;
|
|
29
|
+
isSeparator(): boolean;
|
|
30
|
+
isHelm(): boolean;
|
|
31
|
+
isRna(): boolean;
|
|
32
|
+
isDna(): boolean;
|
|
33
|
+
isPeptide(): boolean;
|
|
34
|
+
/** Associate notation types with the corresponding units */
|
|
35
|
+
/**
|
|
36
|
+
* @return {NOTATION} Notation associated with the units type
|
|
37
|
+
*/
|
|
38
|
+
protected getNotation(): NOTATION;
|
|
39
|
+
/**
|
|
40
|
+
* Create a new empty column of the specified notation type and the same
|
|
41
|
+
* length as column
|
|
42
|
+
*
|
|
43
|
+
* @param {NOTATION} targetNotation
|
|
44
|
+
* @return {DG.Column}
|
|
45
|
+
*/
|
|
46
|
+
protected getNewColumn(targetNotation: NOTATION): DG.Column;
|
|
47
|
+
/**
|
|
48
|
+
* Create a new empty column using templateCol as a template
|
|
49
|
+
*
|
|
50
|
+
* @param {DG.Column} templateCol the properties and units of this column are used as a
|
|
51
|
+
* template to build the new one
|
|
52
|
+
* @return {DG.Column}
|
|
53
|
+
*/
|
|
54
|
+
static getNewColumn(templateCol: DG.Column): DG.Column;
|
|
55
|
+
/**
|
|
56
|
+
* A helper function checking the validity of the 'units' string
|
|
57
|
+
*
|
|
58
|
+
* @param {string} units the string to be validated
|
|
59
|
+
* @return {boolean}
|
|
60
|
+
*/
|
|
61
|
+
static unitsStringIsValid(units: string): boolean;
|
|
62
|
+
/**
|
|
63
|
+
* Construct a new column of semantic type MACROMOLECULE from the list of
|
|
64
|
+
* specified parameters
|
|
65
|
+
*
|
|
66
|
+
* @param {number} len the length of the new column
|
|
67
|
+
* @param {string} name the name of the new column
|
|
68
|
+
* @param {string} units the units of the new column
|
|
69
|
+
* @return {DG.Column}
|
|
70
|
+
*/
|
|
71
|
+
static getNewColumnFromParams(len: number, name: string, units: string): DG.Column;
|
|
72
|
+
constructor(col: DG.Column);
|
|
73
|
+
}
|
|
74
|
+
//# sourceMappingURL=units-handler.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"units-handler.d.ts","sourceRoot":"","sources":["units-handler.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,MAAM,iBAAiB,CAAC;AAMtC,0EAA0E;AAC1E,0BAAkB,QAAQ;IACxB,KAAK,UAAU;IACf,SAAS,cAAc;IACvB,IAAI,SAAS;CACd;AAED,iEAAiE;AACjE,qBAAa,YAAY;IACvB,SAAS,CAAC,QAAQ,CAAC,OAAO,EAAE,EAAE,CAAC,MAAM,CAAC;IACtC,SAAS,CAAC,MAAM,EAAE,MAAM,CAAC;IACzB,SAAS,CAAC,SAAS,EAAE,QAAQ,CAAC;IAC9B,SAAS,CAAC,iBAAiB,EAAE,MAAM,CAAC;IACpC,SAAS,CAAC,sBAAsB;;;;MAI9B;IAEF,gBAAuB,oBAAoB,cAGxC;IACH,gBAAuB,gBAAgB,cAAiC;IACxE,gBAAuB,gBAAgB,cAAiC;WAE1D,qBAAqB,CAAC,GAAG,EAAE,EAAE,CAAC,MAAM;IAsBlD,SAAS,KAAK,KAAK,IAAI,MAAM,CAAwB;IAErD,SAAS,KAAK,MAAM,IAAI,EAAE,CAAC,MAAM,CAAyB;IAE1D,IAAW,QAAQ,IAAI,QAAQ,CAA2B;IAE1D,IAAW,gBAAgB,IAAI,MAAM,CAAmC;IAExE,IAAW,SAAS,IAAI,MAAM,CAM7B;IAEM,OAAO,IAAI,OAAO;IAElB,WAAW,IAAI,OAAO;IAEtB,MAAM,IAAI,OAAO;IAEjB,KAAK,IAAI,OAAO;IAEhB,KAAK,IAAI,OAAO;IAEhB,SAAS,IAAI,OAAO;IAE3B,4DAA4D;IAC5D;;OAEG;IACH,SAAS,CAAC,WAAW,IAAI,QAAQ;IAWjC;;;;;;OAMG;IACH,SAAS,CAAC,YAAY,CAAC,cAAc,EAAE,QAAQ,GAAG,EAAE,CAAC,MAAM;IAmB3D;;;;;;OAMG;WACW,YAAY,CAAC,WAAW,EAAE,EAAE,CAAC,MAAM,GAAG,EAAE,CAAC,MAAM;IAM7D;;;;;OAKG;WACW,kBAAkB,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO;IAUxD;;;;;;;;OAQG;WACW,sBAAsB,CAClC,GAAG,EAAE,MAAM,EACX,IAAI,EAAE,MAAM,EACZ,KAAK,EAAE,MAAM,GACZ,EAAE,CAAC,MAAM;gBAYO,GAAG,EAAE,EAAE,CAAC,MAAM;CAYlC"}
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
import * as DG from 'datagrok-api/dg';
|
|
2
|
+
import { WebLogo } from '../viewers/web-logo';
|
|
3
|
+
/** Class for handling notation units in Macromolecule columns */
|
|
4
|
+
export class UnitsHandler {
|
|
5
|
+
constructor(col) {
|
|
6
|
+
this._defaultGapSymbolsDict = {
|
|
7
|
+
HELM: '*',
|
|
8
|
+
SEPARATOR: '',
|
|
9
|
+
FASTA: '-',
|
|
10
|
+
};
|
|
11
|
+
this._column = col;
|
|
12
|
+
const units = this._column.tags[DG.TAGS.UNITS];
|
|
13
|
+
if (units !== null)
|
|
14
|
+
this._units = units;
|
|
15
|
+
else
|
|
16
|
+
throw new Error('Units are not specified in column');
|
|
17
|
+
this._notation = this.getNotation();
|
|
18
|
+
this._defaultGapSymbol = (this.isFasta()) ? this._defaultGapSymbolsDict.FASTA :
|
|
19
|
+
(this.isHelm()) ? this._defaultGapSymbolsDict.HELM :
|
|
20
|
+
this._defaultGapSymbolsDict.SEPARATOR;
|
|
21
|
+
}
|
|
22
|
+
static setUnitsToFastaColumn(col) {
|
|
23
|
+
if (col.semType !== DG.SEMTYPE.MACROMOLECULE)
|
|
24
|
+
throw new Error('Fasta column must be MACROMOLECULE');
|
|
25
|
+
const stats = WebLogo.getStats(col, 5, WebLogo.splitterAsFasta);
|
|
26
|
+
const seqType = stats.sameLength ? 'SEQ.MSA' : 'SEQ';
|
|
27
|
+
const alphabetCandidates = [
|
|
28
|
+
['PT', UnitsHandler.PeptideFastaAlphabet],
|
|
29
|
+
['DNA', UnitsHandler.DnaFastaAlphabet],
|
|
30
|
+
['RNA', UnitsHandler.RnaFastaAlphabet],
|
|
31
|
+
];
|
|
32
|
+
// Calculate likelihoods for alphabet_candidates
|
|
33
|
+
const alphabetCandidatesSim = alphabetCandidates.map((c) => WebLogo.getAlphabetSimilarity(stats.freq, c[1]));
|
|
34
|
+
const maxCos = Math.max(...alphabetCandidatesSim);
|
|
35
|
+
const alphabet = maxCos > 0.65 ? alphabetCandidates[alphabetCandidatesSim.indexOf(maxCos)][0] : 'UN';
|
|
36
|
+
const units = `fasta:${seqType}:${alphabet}`;
|
|
37
|
+
col.setTag(DG.TAGS.UNITS, units);
|
|
38
|
+
}
|
|
39
|
+
get units() { return this._units; }
|
|
40
|
+
get column() { return this._column; }
|
|
41
|
+
get notation() { return this._notation; }
|
|
42
|
+
get defaultGapSymbol() { return this._defaultGapSymbol; }
|
|
43
|
+
get separator() {
|
|
44
|
+
const separator = this.column.getTag('separator');
|
|
45
|
+
if (separator !== null)
|
|
46
|
+
return separator;
|
|
47
|
+
else
|
|
48
|
+
throw new Error('Separator not set');
|
|
49
|
+
}
|
|
50
|
+
isFasta() { return this.notation === "FASTA" /* NOTATION.FASTA */; }
|
|
51
|
+
isSeparator() { return this.notation === "SEPARATOR" /* NOTATION.SEPARATOR */; }
|
|
52
|
+
isHelm() { return this.notation === "HELM" /* NOTATION.HELM */; }
|
|
53
|
+
isRna() { return this.units.toLowerCase().endsWith('rna'); }
|
|
54
|
+
isDna() { return this.units.toLowerCase().endsWith('dna'); }
|
|
55
|
+
isPeptide() { return this.units.toLowerCase().endsWith('pt'); }
|
|
56
|
+
/** Associate notation types with the corresponding units */
|
|
57
|
+
/**
|
|
58
|
+
* @return {NOTATION} Notation associated with the units type
|
|
59
|
+
*/
|
|
60
|
+
getNotation() {
|
|
61
|
+
if (this.units.toLowerCase().startsWith('fasta'))
|
|
62
|
+
return "FASTA" /* NOTATION.FASTA */;
|
|
63
|
+
else if (this.units.toLowerCase().startsWith('separator'))
|
|
64
|
+
return "SEPARATOR" /* NOTATION.SEPARATOR */;
|
|
65
|
+
else if (this.units.toLowerCase().startsWith('helm'))
|
|
66
|
+
return "HELM" /* NOTATION.HELM */;
|
|
67
|
+
else
|
|
68
|
+
throw new Error('The column has units that do not correspond to any notation');
|
|
69
|
+
}
|
|
70
|
+
/**
|
|
71
|
+
* Create a new empty column of the specified notation type and the same
|
|
72
|
+
* length as column
|
|
73
|
+
*
|
|
74
|
+
* @param {NOTATION} targetNotation
|
|
75
|
+
* @return {DG.Column}
|
|
76
|
+
*/
|
|
77
|
+
getNewColumn(targetNotation) {
|
|
78
|
+
const col = this.column;
|
|
79
|
+
const len = col.length;
|
|
80
|
+
const name = targetNotation.toLowerCase() + '(' + col.name + ')';
|
|
81
|
+
const newColName = col.dataFrame.columns.getUnusedName(name);
|
|
82
|
+
const newColumn = DG.Column.fromList('string', newColName, new Array(len).fill(''));
|
|
83
|
+
newColumn.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
84
|
+
newColumn.setTag(DG.TAGS.UNITS, this.units.replace(this.notation.toLowerCase().toString(), targetNotation.toLowerCase().toString()));
|
|
85
|
+
newColumn.setTag(DG.TAGS.CELL_RENDERER, 'Macromolecule');
|
|
86
|
+
return newColumn;
|
|
87
|
+
}
|
|
88
|
+
/**
|
|
89
|
+
* Create a new empty column using templateCol as a template
|
|
90
|
+
*
|
|
91
|
+
* @param {DG.Column} templateCol the properties and units of this column are used as a
|
|
92
|
+
* template to build the new one
|
|
93
|
+
* @return {DG.Column}
|
|
94
|
+
*/
|
|
95
|
+
static getNewColumn(templateCol) {
|
|
96
|
+
const col = new UnitsHandler(templateCol);
|
|
97
|
+
const targetNotation = col.notation;
|
|
98
|
+
return col.getNewColumn(targetNotation);
|
|
99
|
+
}
|
|
100
|
+
/**
|
|
101
|
+
* A helper function checking the validity of the 'units' string
|
|
102
|
+
*
|
|
103
|
+
* @param {string} units the string to be validated
|
|
104
|
+
* @return {boolean}
|
|
105
|
+
*/
|
|
106
|
+
static unitsStringIsValid(units) {
|
|
107
|
+
units = units.toLowerCase();
|
|
108
|
+
const prefixes = ["FASTA" /* NOTATION.FASTA */, "SEPARATOR" /* NOTATION.SEPARATOR */, "HELM" /* NOTATION.HELM */];
|
|
109
|
+
const postfixes = ['rna', 'dna', 'pt'];
|
|
110
|
+
const prefixCriterion = prefixes.some((p) => units.startsWith(p.toLowerCase()));
|
|
111
|
+
const postfixCriterion = postfixes.some((p) => units.endsWith(p)); // already lowercase;
|
|
112
|
+
return prefixCriterion && postfixCriterion;
|
|
113
|
+
}
|
|
114
|
+
/**
|
|
115
|
+
* Construct a new column of semantic type MACROMOLECULE from the list of
|
|
116
|
+
* specified parameters
|
|
117
|
+
*
|
|
118
|
+
* @param {number} len the length of the new column
|
|
119
|
+
* @param {string} name the name of the new column
|
|
120
|
+
* @param {string} units the units of the new column
|
|
121
|
+
* @return {DG.Column}
|
|
122
|
+
*/
|
|
123
|
+
static getNewColumnFromParams(len, name, units) {
|
|
124
|
+
// WARNING: in this implementation is is impossible to verify the uniqueness
|
|
125
|
+
// of the new column's name
|
|
126
|
+
// TODO: verify the validity of units parameter
|
|
127
|
+
if (!UnitsHandler.unitsStringIsValid(units))
|
|
128
|
+
throw new Error('Invalid format of \'units\' parameter');
|
|
129
|
+
const newColumn = DG.Column.fromList('string', name, new Array(len).fill(''));
|
|
130
|
+
newColumn.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
131
|
+
newColumn.setTag(DG.TAGS.UNITS, units);
|
|
132
|
+
return newColumn;
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
UnitsHandler.PeptideFastaAlphabet = new Set([
|
|
136
|
+
'G', 'L', 'Y', 'S', 'E', 'Q', 'D', 'N', 'F', 'A',
|
|
137
|
+
'K', 'R', 'H', 'C', 'V', 'P', 'W', 'I', 'M', 'T',
|
|
138
|
+
]);
|
|
139
|
+
UnitsHandler.DnaFastaAlphabet = new Set(['A', 'C', 'G', 'T']);
|
|
140
|
+
UnitsHandler.RnaFastaAlphabet = new Set(['A', 'C', 'G', 'U']);
|
|
141
|
+
//# sourceMappingURL=data:application/json;base64,{"version":3,"file":"units-handler.js","sourceRoot":"","sources":["units-handler.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,MAAM,iBAAiB,CAAC;AAItC,OAAO,EAAC,OAAO,EAAc,MAAM,qBAAqB,CAAC;AASzD,iEAAiE;AACjE,MAAM,OAAO,YAAY;IAmKvB,YAAmB,GAAc;QA9JvB,2BAAsB,GAAG;YACjC,IAAI,EAAE,GAAG;YACT,SAAS,EAAE,EAAE;YACb,KAAK,EAAE,GAAG;SACX,CAAC;QA2JA,IAAI,CAAC,OAAO,GAAG,GAAG,CAAC;QACnB,MAAM,KAAK,GAAG,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QAC/C,IAAI,KAAK,KAAK,IAAI;YAChB,IAAI,CAAC,MAAM,GAAG,KAAK,CAAC;;YAEpB,MAAM,IAAI,KAAK,CAAC,mCAAmC,CAAC,CAAC;QACvD,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC,WAAW,EAAE,CAAC;QACpC,IAAI,CAAC,iBAAiB,GAAG,CAAC,IAAI,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,sBAAsB,CAAC,KAAK,CAAC,CAAC;YAC7E,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,sBAAsB,CAAC,IAAI,CAAC,CAAC;gBAClD,IAAI,CAAC,sBAAsB,CAAC,SAAS,CAAC;IAC5C,CAAC;IA5JM,MAAM,CAAC,qBAAqB,CAAC,GAAc;QAChD,IAAI,GAAG,CAAC,OAAO,KAAK,EAAE,CAAC,OAAO,CAAC,aAAa;YAC1C,MAAM,IAAI,KAAK,CAAC,oCAAoC,CAAC,CAAC;QAExD,MAAM,KAAK,GAAgB,OAAO,CAAC,QAAQ,CAAC,GAAG,EAAE,CAAC,EAAE,OAAO,CAAC,eAAe,CAAC,CAAC;QAC7E,MAAM,OAAO,GAAG,KAAK,CAAC,UAAU,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,KAAK,CAAC;QAErD,MAAM,kBAAkB,GAA4B;YAClD,CAAC,IAAI,EAAE,YAAY,CAAC,oBAAoB,CAAC;YACzC,CAAC,KAAK,EAAE,YAAY,CAAC,gBAAgB,CAAC;YACtC,CAAC,KAAK,EAAE,YAAY,CAAC,gBAAgB,CAAC;SACvC,CAAC;QAEF,gDAAgD;QAChD,MAAM,qBAAqB,GAAa,kBAAkB,CAAC,GAAG,CAC5D,CAAC,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,qBAAqB,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QAC1D,MAAM,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,qBAAqB,CAAC,CAAC;QAClD,MAAM,QAAQ,GAAG,MAAM,GAAG,IAAI,CAAC,CAAC,CAAC,kBAAkB,CAAC,qBAAqB,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;QACrG,MAAM,KAAK,GAAW,SAAS,OAAO,IAAI,QAAQ,EAAE,CAAC;QACrD,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,IAAI,CAAC,KAAK,EAAE,KAAK,CAAC,CAAC;IACnC,CAAC;IAED,IAAc,KAAK,KAAa,OAAO,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC;IAErD,IAAc,MAAM,KAAgB,OAAO,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC;IAE1D,IAAW,QAAQ,KAAe,OAAO,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC;IAE1D,IAAW,gBAAgB,KAAa,OAAO,IAAI,CAAC,iBAAiB,CAAC,CAAC,CAAC;IAExE,IAAW,SAAS;QAClB,MAAM,SAAS,GAAG,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC;QAClD,IAAI,SAAS,KAAK,IAAI;YACpB,OAAO,SAAS,CAAC;;YAEjB,MAAM,IAAI,KAAK,CAAC,mBAAmB,CAAC,CAAC;IACzC,CAAC;IAEM,OAAO,KAAc,OAAO,IAAI,CAAC,QAAQ,iCAAmB,CAAC,CAAC,CAAC;IAE/D,WAAW,KAAc,OAAO,IAAI,CAAC,QAAQ,yCAAuB,CAAC,CAAC,CAAC;IAEvE,MAAM,KAAc,OAAO,IAAI,CAAC,QAAQ,+BAAkB,CAAC,CAAC,CAAC;IAE7D,KAAK,KAAc,OAAO,IAAI,CAAC,KAAK,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;IAErE,KAAK,KAAc,OAAO,IAAI,CAAC,KAAK,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;IAErE,SAAS,KAAc,OAAO,IAAI,CAAC,KAAK,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAE/E,4DAA4D;IAC5D;;OAEG;IACO,WAAW;QACnB,IAAI,IAAI,CAAC,KAAK,CAAC,WAAW,EAAE,CAAC,UAAU,CAAC,OAAO,CAAC;YAC9C,oCAAsB;aACnB,IAAI,IAAI,CAAC,KAAK,CAAC,WAAW,EAAE,CAAC,UAAU,CAAC,WAAW,CAAC;YACvD,4CAA0B;aACvB,IAAI,IAAI,CAAC,KAAK,CAAC,WAAW,EAAE,CAAC,UAAU,CAAC,MAAM,CAAC;YAClD,kCAAqB;;YAErB,MAAM,IAAI,KAAK,CAAC,6DAA6D,CAAC,CAAC;IACnF,CAAC;IAED;;;;;;OAMG;IACO,YAAY,CAAC,cAAwB;QAC7C,MAAM,GAAG,GAAG,IAAI,CAAC,MAAM,CAAC;QACxB,MAAM,GAAG,GAAG,GAAG,CAAC,MAAM,CAAC;QACvB,MAAM,IAAI,GAAG,cAAc,CAAC,WAAW,EAAE,GAAG,GAAG,GAAG,GAAG,CAAC,IAAI,GAAG,GAAG,CAAC;QACjE,MAAM,UAAU,GAAG,GAAG,CAAC,SAAS,CAAC,OAAO,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC;QAC7D,MAAM,SAAS,GAAG,EAAE,CAAC,MAAM,CAAC,QAAQ,CAAC,QAAQ,EAAE,UAAU,EAAE,IAAI,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC;QACpF,SAAS,CAAC,OAAO,GAAG,EAAE,CAAC,OAAO,CAAC,aAAa,CAAC;QAC7C,SAAS,CAAC,MAAM,CACd,EAAE,CAAC,IAAI,CAAC,KAAK,EACb,IAAI,CAAC,KAAK,CAAC,OAAO,CAChB,IAAI,CAAC,QAAQ,CAAC,WAAW,EAAE,CAAC,QAAQ,EAAE,EACtC,cAAc,CAAC,WAAW,EAAE,CAAC,QAAQ,EAAE,CACxC,CACF,CAAC;QACF,SAAS,CAAC,MAAM,CAAC,EAAE,CAAC,IAAI,CAAC,aAAa,EAAE,eAAe,CAAC,CAAC;QAEzD,OAAO,SAAS,CAAC;IACnB,CAAC;IAED;;;;;;OAMG;IACI,MAAM,CAAC,YAAY,CAAC,WAAsB;QAC/C,MAAM,GAAG,GAAiB,IAAI,YAAY,CAAC,WAAW,CAAC,CAAC;QACxD,MAAM,cAAc,GAAG,GAAG,CAAC,QAAQ,CAAC;QACpC,OAAO,GAAG,CAAC,YAAY,CAAC,cAAc,CAAC,CAAC;IAC1C,CAAC;IAED;;;;;OAKG;IACI,MAAM,CAAC,kBAAkB,CAAC,KAAa;QAC5C,KAAK,GAAG,KAAK,CAAC,WAAW,EAAE,CAAC;QAC5B,MAAM,QAAQ,GAAG,gGAAmD,CAAC;QACrE,MAAM,SAAS,GAAG,CAAC,KAAK,EAAE,KAAK,EAAE,IAAI,CAAC,CAAC;QAEvC,MAAM,eAAe,GAAG,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,KAAK,CAAC,UAAU,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC;QAChF,MAAM,gBAAgB,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,qBAAqB;QACxF,OAAO,eAAe,IAAI,gBAAgB,CAAC;IAC7C,CAAC;IAED;;;;;;;;OAQG;IACI,MAAM,CAAC,sBAAsB,CAClC,GAAW,EACX,IAAY,EACZ,KAAa;QAEb,4EAA4E;QAC5E,2BAA2B;QAC3B,+CAA+C;QAC/C,IAAI,CAAC,YAAY,CAAC,kBAAkB,CAAC,KAAK,CAAC;YACzC,MAAM,IAAI,KAAK,CAAC,uCAAuC,CAAC,CAAC;QAC3D,MAAM,SAAS,GAAG,EAAE,CAAC,MAAM,CAAC,QAAQ,CAAC,QAAQ,EAAE,IAAI,EAAE,IAAI,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC;QAC9E,SAAS,CAAC,OAAO,GAAG,EAAE,CAAC,OAAO,CAAC,aAAa,CAAC;QAC7C,SAAS,CAAC,MAAM,CAAC,EAAE,CAAC,IAAI,CAAC,KAAK,EAAE,KAAK,CAAC,CAAC;QACvC,OAAO,SAAS,CAAC;IACnB,CAAC;;AAtJsB,iCAAoB,GAAG,IAAI,GAAG,CAAC;IACpD,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG;IAChD,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG;CACjD,CAAC,CAAC;AACoB,6BAAgB,GAAG,IAAI,GAAG,CAAC,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC,CAAC;AACjD,6BAAgB,GAAG,IAAI,GAAG,CAAC,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC,CAAC","sourcesContent":["import * as DG from 'datagrok-api/dg';\nimport * as ui from 'datagrok-api/ui';\nimport * as grok from 'datagrok-api/grok';\n\nimport {WebLogo, SeqColStats} from '../viewers/web-logo';\n\n/** enum type to simplify setting \"user-friendly\" notation if necessary */\nexport const enum NOTATION {\n  FASTA = 'FASTA',\n  SEPARATOR = 'SEPARATOR',\n  HELM = 'HELM'\n}\n\n/** Class for handling notation units in Macromolecule columns */\nexport class UnitsHandler {\n  protected readonly _column: DG.Column; // the column to be converted\n  protected _units: string; // units, of the form fasta:SEQ:NT, etc.\n  protected _notation: NOTATION; // current notation (without :SEQ:NT, etc.)\n  protected _defaultGapSymbol: string;\n  protected _defaultGapSymbolsDict = {\n    HELM: '*',\n    SEPARATOR: '',\n    FASTA: '-',\n  };\n\n  public static readonly PeptideFastaAlphabet = new Set([\n    'G', 'L', 'Y', 'S', 'E', 'Q', 'D', 'N', 'F', 'A',\n    'K', 'R', 'H', 'C', 'V', 'P', 'W', 'I', 'M', 'T',\n  ]);\n  public static readonly DnaFastaAlphabet = new Set(['A', 'C', 'G', 'T']);\n  public static readonly RnaFastaAlphabet = new Set(['A', 'C', 'G', 'U']);\n\n  public static setUnitsToFastaColumn(col: DG.Column) {\n    if (col.semType !== DG.SEMTYPE.MACROMOLECULE)\n      throw new Error('Fasta column must be MACROMOLECULE');\n\n    const stats: SeqColStats = WebLogo.getStats(col, 5, WebLogo.splitterAsFasta);\n    const seqType = stats.sameLength ? 'SEQ.MSA' : 'SEQ';\n\n    const alphabetCandidates: [string, Set<string>][] = [\n      ['PT', UnitsHandler.PeptideFastaAlphabet],\n      ['DNA', UnitsHandler.DnaFastaAlphabet],\n      ['RNA', UnitsHandler.RnaFastaAlphabet],\n    ];\n\n    // Calculate likelihoods for alphabet_candidates\n    const alphabetCandidatesSim: number[] = alphabetCandidates.map(\n      (c) => WebLogo.getAlphabetSimilarity(stats.freq, c[1]));\n    const maxCos = Math.max(...alphabetCandidatesSim);\n    const alphabet = maxCos > 0.65 ? alphabetCandidates[alphabetCandidatesSim.indexOf(maxCos)][0] : 'UN';\n    const units: string = `fasta:${seqType}:${alphabet}`;\n    col.setTag(DG.TAGS.UNITS, units);\n  }\n\n  protected get units(): string { return this._units; }\n\n  protected get column(): DG.Column { return this._column; }\n\n  public get notation(): NOTATION { return this._notation; }\n\n  public get defaultGapSymbol(): string { return this._defaultGapSymbol; }\n\n  public get separator(): string {\n    const separator = this.column.getTag('separator');\n    if (separator !== null)\n      return separator;\n    else\n      throw new Error('Separator not set');\n  }\n\n  public isFasta(): boolean { return this.notation === NOTATION.FASTA; }\n\n  public isSeparator(): boolean { return this.notation === NOTATION.SEPARATOR; }\n\n  public isHelm(): boolean { return this.notation === NOTATION.HELM; }\n\n  public isRna(): boolean { return this.units.toLowerCase().endsWith('rna'); }\n\n  public isDna(): boolean { return this.units.toLowerCase().endsWith('dna'); }\n\n  public isPeptide(): boolean { return this.units.toLowerCase().endsWith('pt'); }\n\n  /** Associate notation types with the corresponding units */\n  /**\n   * @return {NOTATION}     Notation associated with the units type\n   */\n  protected getNotation(): NOTATION {\n    if (this.units.toLowerCase().startsWith('fasta'))\n      return NOTATION.FASTA;\n    else if (this.units.toLowerCase().startsWith('separator'))\n      return NOTATION.SEPARATOR;\n    else if (this.units.toLowerCase().startsWith('helm'))\n      return NOTATION.HELM;\n    else\n      throw new Error('The column has units that do not correspond to any notation');\n  }\n\n  /**\n   * Create a new empty column of the specified notation type and the same\n   * length as column\n   *\n   * @param {NOTATION} targetNotation\n   * @return {DG.Column}\n   */\n  protected getNewColumn(targetNotation: NOTATION): DG.Column {\n    const col = this.column;\n    const len = col.length;\n    const name = targetNotation.toLowerCase() + '(' + col.name + ')';\n    const newColName = col.dataFrame.columns.getUnusedName(name);\n    const newColumn = DG.Column.fromList('string', newColName, new Array(len).fill(''));\n    newColumn.semType = DG.SEMTYPE.MACROMOLECULE;\n    newColumn.setTag(\n      DG.TAGS.UNITS,\n      this.units.replace(\n        this.notation.toLowerCase().toString(),\n        targetNotation.toLowerCase().toString()\n      )\n    );\n    newColumn.setTag(DG.TAGS.CELL_RENDERER, 'Macromolecule');\n\n    return newColumn;\n  }\n\n  /**\n   * Create a new empty column using templateCol as a template\n   *\n   * @param {DG.Column} templateCol  the properties and units of this column are used as a\n   * template to build the new one\n   * @return {DG.Column}\n   */\n  public static getNewColumn(templateCol: DG.Column): DG.Column {\n    const col: UnitsHandler = new UnitsHandler(templateCol);\n    const targetNotation = col.notation;\n    return col.getNewColumn(targetNotation);\n  }\n\n  /**\n   * A helper function checking the validity of the 'units' string\n   *\n   * @param {string} units  the string to be validated\n   * @return {boolean}\n   */\n  public static unitsStringIsValid(units: string): boolean {\n    units = units.toLowerCase();\n    const prefixes = [NOTATION.FASTA, NOTATION.SEPARATOR, NOTATION.HELM];\n    const postfixes = ['rna', 'dna', 'pt'];\n\n    const prefixCriterion = prefixes.some((p) => units.startsWith(p.toLowerCase()));\n    const postfixCriterion = postfixes.some((p) => units.endsWith(p)); // already lowercase;\n    return prefixCriterion && postfixCriterion;\n  }\n\n  /**\n   * Construct a new column of semantic type MACROMOLECULE from the list of\n   * specified parameters\n   *\n   * @param {number}    len  the length of the new column\n   * @param {string}    name  the name of the new column\n   * @param {string}    units  the units of the new column\n   * @return {DG.Column}\n   */\n  public static getNewColumnFromParams(\n    len: number,\n    name: string,\n    units: string\n  ): DG.Column {\n    // WARNING: in this implementation is is impossible to verify the uniqueness\n    // of the new column's name\n    // TODO: verify the validity of units parameter\n    if (!UnitsHandler.unitsStringIsValid(units))\n      throw new Error('Invalid format of \\'units\\' parameter');\n    const newColumn = DG.Column.fromList('string', name, new Array(len).fill(''));\n    newColumn.semType = DG.SEMTYPE.MACROMOLECULE;\n    newColumn.setTag(DG.TAGS.UNITS, units);\n    return newColumn;\n  }\n\n  public constructor(col: DG.Column) {\n    this._column = col;\n    const units = this._column.tags[DG.TAGS.UNITS];\n    if (units !== null)\n      this._units = units;\n    else\n      throw new Error('Units are not specified in column');\n    this._notation = this.getNotation();\n    this._defaultGapSymbol = (this.isFasta()) ? this._defaultGapSymbolsDict.FASTA :\n      (this.isHelm()) ? this._defaultGapSymbolsDict.HELM :\n        this._defaultGapSymbolsDict.SEPARATOR;\n  }\n}\n"]}
|