@datagrok-libraries/bio 5.9.14 → 5.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.d.ts +2 -2
- package/index.d.ts.map +1 -1
- package/index.js +3 -3
- package/index.ts +3 -1
- package/package.json +2 -2
- package/src/utils/macromolecule.d.ts +2 -0
- package/src/utils/macromolecule.d.ts.map +1 -1
- package/src/utils/macromolecule.js +14 -1
- package/src/utils/notation-converter.d.ts.map +1 -1
- package/src/utils/notation-converter.js +10 -6
- package/src/utils/units-handler.d.ts +1 -1
- package/src/utils/units-handler.d.ts.map +1 -1
- package/src/utils/units-handler.js +26 -16
- package/src/viewers/phylocanvas-gl-viewer.js +1 -1
package/index.d.ts
CHANGED
|
@@ -8,7 +8,7 @@ import { IMonomerLib, Monomer, NodeType, isLeaf, NodeCuttedType } from './src/ty
|
|
|
8
8
|
import { UnknownSeqPalette, UnknownSeqPalettes } from './src/unknown';
|
|
9
9
|
import { DrawStyle, printLeftOrCentered } from './src/utils/cell-renderer';
|
|
10
10
|
import { FastaFileHandler } from './src/utils/fasta-handler';
|
|
11
|
-
import { getSplitter, splitterAsFasta, getSplitterForColumn, SplitterFunc, monomerToShort, splitterAsHelm, getStats, pickUpPalette, getPaletteByType, getAlphabetSimilarity, ALPHABET, NOTATION, TAGS, ALIGNMENT } from './src/utils/macromolecule';
|
|
11
|
+
import { getSplitter, splitterAsFasta, getSplitterForColumn, SplitterFunc, monomerToShort, splitterAsHelm, getStats, pickUpPalette, getPaletteByType, getAlphabet, getAlphabetSimilarity, ALPHABET, NOTATION, TAGS, ALIGNMENT } from './src/utils/macromolecule';
|
|
12
12
|
import { INewickHelper } from './src/utils/newick-helper';
|
|
13
13
|
import { NotationConverter } from './src/utils/notation-converter';
|
|
14
14
|
import { splitAlignedSequences } from './src/utils/splitter';
|
|
@@ -20,5 +20,5 @@ import { IVdRegionsViewer } from './src/viewers/vd-regions-viewer';
|
|
|
20
20
|
import { PositionHeight, PositionInfo, PositionMonomerInfo, WebLogoViewer } from './src/viewers/web-logo-viewer';
|
|
21
21
|
import { MonomerLib } from './src/monomer-works/monomer-lib';
|
|
22
22
|
import { readLibrary } from './src/monomer-works/monomer-utils';
|
|
23
|
-
export { ALIGNMENT, ALPHABET, NOTATION, TAGS, NotationConverter, SplitterFunc, getStats, getAlphabetSimilarity, getSplitter, splitterAsFasta, splitterAsHelm, getSplitterForColumn, monomerToShort, splitAlignedSequences, SeqPalette, SeqPaletteBase, Aminoacids, AminoacidsPalettes, Nucleotides, NucleotidesPalettes, UnknownSeqPalettes, UnknownSeqPalette, pickUpPalette, getPaletteByType, PositionHeight, PositionInfo, PositionMonomerInfo, WebLogoViewer, UnitsHandler, DrawStyle, printLeftOrCentered, FastaFileHandler, VdRegionType, VdRegion, IVdRegionsViewer, NodeType, isLeaf, NodeCuttedType, PhylocanvasTreeNode, NodeStyleType, StylesType, IPhylocanvasGlViewer, TreeTypesNames, PhylocanvasGlServiceBase, CanvasCallback, PhylocanvasGlTask, getPhylocanvasGlService, PhylocanvasGL, Shapes, TreeTypes, Utils, Newick, INewickHelper, ITreeHelper, getTreeHelper, Monomer, IMonomerLib, MonomerWorks, MonomerLib, readLibrary };
|
|
23
|
+
export { ALIGNMENT, ALPHABET, NOTATION, TAGS, NotationConverter, SplitterFunc, getStats, getAlphabet, getAlphabetSimilarity, getSplitter, splitterAsFasta, splitterAsHelm, getSplitterForColumn, monomerToShort, splitAlignedSequences, SeqPalette, SeqPaletteBase, Aminoacids, AminoacidsPalettes, Nucleotides, NucleotidesPalettes, UnknownSeqPalettes, UnknownSeqPalette, pickUpPalette, getPaletteByType, PositionHeight, PositionInfo, PositionMonomerInfo, WebLogoViewer, UnitsHandler, DrawStyle, printLeftOrCentered, FastaFileHandler, VdRegionType, VdRegion, IVdRegionsViewer, NodeType, isLeaf, NodeCuttedType, PhylocanvasTreeNode, NodeStyleType, StylesType, IPhylocanvasGlViewer, TreeTypesNames, PhylocanvasGlServiceBase, CanvasCallback, PhylocanvasGlTask, getPhylocanvasGlService, PhylocanvasGL, Shapes, TreeTypes, Utils, Newick, INewickHelper, ITreeHelper, getTreeHelper, Monomer, IMonomerLib, MonomerWorks, MonomerLib, readLibrary };
|
|
24
24
|
//# sourceMappingURL=index.d.ts.map
|
package/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["index.ts"],"names":[],"mappings":"AACA,OAAO,EAAC,mBAAmB,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,SAAS,EAAC,MAAM,6BAA6B,CAAC;AAElG,OAAO,EAAC,aAAa,EAAC,MAAM,6BAA6B,CAAC;AAE1D,OAAO,EAAC,UAAU,EAAE,kBAAkB,EAAC,MAAM,kBAAkB,CAAC;AAChE,OAAO,EAAC,YAAY,EAAC,MAAM,mCAAmC,CAAC;AAC/D,OAAO,EAAC,WAAW,EAAE,mBAAmB,EAAC,MAAM,mBAAmB,CAAC;AACnE,OAAO,EAAC,UAAU,EAAE,cAAc,EAAC,MAAM,oBAAoB,CAAC;AAC9D,OAAO,EAAC,WAAW,EAAE,OAAO,EAAE,QAAQ,EAAE,MAAM,EAAE,cAAc,EAAC,MAAM,aAAa,CAAC;AACnF,OAAO,EAAC,iBAAiB,EAAE,kBAAkB,EAAC,MAAM,eAAe,CAAC;AACpE,OAAO,EAAC,SAAS,EAAE,mBAAmB,EAAC,MAAM,2BAA2B,CAAC;AACzE,OAAO,EAAC,gBAAgB,EAAC,MAAM,2BAA2B,CAAC;AAC3D,OAAO,EACL,WAAW,EACX,eAAe,EACf,oBAAoB,EACpB,YAAY,EACZ,cAAc,EACd,cAAc,EACd,QAAQ,EACR,aAAa,EACb,gBAAgB,EAChB,qBAAqB,EACrB,QAAQ,EACR,QAAQ,EACR,IAAI,EACJ,SAAS,EACV,MAAM,2BAA2B,CAAC;AACnC,OAAO,EAAC,aAAa,EAAC,MAAM,2BAA2B,CAAC;AACxD,OAAO,EAAC,iBAAiB,EAAC,MAAM,gCAAgC,CAAC;AACjE,OAAO,EAAC,qBAAqB,EAAC,MAAM,sBAAsB,CAAC;AAC3D,OAAO,EAAC,aAAa,EAAE,WAAW,EAAC,MAAM,yBAAyB,CAAC;AACnE,OAAO,EAAC,YAAY,EAAC,MAAM,2BAA2B,CAAC;AACvD,OAAO,EAAC,QAAQ,EAAE,YAAY,EAAC,MAAM,kBAAkB,CAAC;AACxD,OAAO,EACL,cAAc,EACd,uBAAuB,EACvB,oBAAoB,EACpB,aAAa,EACb,wBAAwB,EACxB,iBAAiB,EACjB,UAAU,EACV,cAAc,EACf,MAAM,qCAAqC,CAAC;AAC7C,OAAO,EAAC,gBAAgB,EAAC,MAAM,iCAAiC,CAAC;AACjE,OAAO,EAAC,cAAc,EAAE,YAAY,EAAE,mBAAmB,EAAE,aAAa,EAAC,MAAM,+BAA+B,CAAC;AAC/G,OAAO,EAAC,UAAU,EAAC,MAAM,iCAAiC,CAAC;AAC3D,OAAO,EAAC,WAAW,EAAC,MAAM,mCAAmC,CAAC;AAE9D,OAAO,EACL,SAAS,EACT,QAAQ,EACR,QAAQ,EACR,IAAI,EACJ,iBAAiB,EACjB,YAAY,EACZ,QAAQ,EACR,qBAAqB,EACrB,WAAW,EACX,eAAe,EACf,cAAc,EACd,oBAAoB,EACpB,cAAc,EACd,qBAAqB,EACrB,UAAU,EACV,cAAc,EACd,UAAU,EACV,kBAAkB,EAClB,WAAW,EACX,mBAAmB,EACnB,kBAAkB,EAClB,iBAAiB,EACjB,aAAa,EACb,gBAAgB,EAChB,cAAc,EACd,YAAY,EACZ,mBAAmB,EACnB,aAAa,EACb,YAAY,EACZ,SAAS,EACT,mBAAmB,EACnB,gBAAgB,EAChB,YAAY,EACZ,QAAQ,EACR,gBAAgB,EAEhB,QAAQ,EAAE,MAAM,EAAE,cAAc,EAChC,mBAAmB,EACnB,aAAa,EAAE,UAAU,EAEzB,oBAAoB,EACpB,cAAc,EACd,wBAAwB,EACxB,cAAc,EACd,iBAAiB,EACjB,uBAAuB,EAEvB,aAAa,EACb,MAAM,EAAE,SAAS,EACjB,KAAK,EACL,MAAM,EACN,aAAa,EACb,WAAW,EACX,aAAa,EAGb,OAAO,EACP,WAAW,EACX,YAAY,EACZ,UAAU,EACV,WAAW,EACZ,CAAC"}
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["index.ts"],"names":[],"mappings":"AACA,OAAO,EAAC,mBAAmB,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,SAAS,EAAC,MAAM,6BAA6B,CAAC;AAElG,OAAO,EAAC,aAAa,EAAC,MAAM,6BAA6B,CAAC;AAE1D,OAAO,EAAC,UAAU,EAAE,kBAAkB,EAAC,MAAM,kBAAkB,CAAC;AAChE,OAAO,EAAC,YAAY,EAAC,MAAM,mCAAmC,CAAC;AAC/D,OAAO,EAAC,WAAW,EAAE,mBAAmB,EAAC,MAAM,mBAAmB,CAAC;AACnE,OAAO,EAAC,UAAU,EAAE,cAAc,EAAC,MAAM,oBAAoB,CAAC;AAC9D,OAAO,EAAC,WAAW,EAAE,OAAO,EAAE,QAAQ,EAAE,MAAM,EAAE,cAAc,EAAC,MAAM,aAAa,CAAC;AACnF,OAAO,EAAC,iBAAiB,EAAE,kBAAkB,EAAC,MAAM,eAAe,CAAC;AACpE,OAAO,EAAC,SAAS,EAAE,mBAAmB,EAAC,MAAM,2BAA2B,CAAC;AACzE,OAAO,EAAC,gBAAgB,EAAC,MAAM,2BAA2B,CAAC;AAC3D,OAAO,EACL,WAAW,EACX,eAAe,EACf,oBAAoB,EACpB,YAAY,EACZ,cAAc,EACd,cAAc,EACd,QAAQ,EACR,aAAa,EACb,gBAAgB,EAChB,WAAW,EACX,qBAAqB,EACrB,QAAQ,EACR,QAAQ,EACR,IAAI,EACJ,SAAS,EACV,MAAM,2BAA2B,CAAC;AACnC,OAAO,EAAC,aAAa,EAAC,MAAM,2BAA2B,CAAC;AACxD,OAAO,EAAC,iBAAiB,EAAC,MAAM,gCAAgC,CAAC;AACjE,OAAO,EAAC,qBAAqB,EAAC,MAAM,sBAAsB,CAAC;AAC3D,OAAO,EAAC,aAAa,EAAE,WAAW,EAAC,MAAM,yBAAyB,CAAC;AACnE,OAAO,EAAC,YAAY,EAAC,MAAM,2BAA2B,CAAC;AACvD,OAAO,EAAC,QAAQ,EAAE,YAAY,EAAC,MAAM,kBAAkB,CAAC;AACxD,OAAO,EACL,cAAc,EACd,uBAAuB,EACvB,oBAAoB,EACpB,aAAa,EACb,wBAAwB,EACxB,iBAAiB,EACjB,UAAU,EACV,cAAc,EACf,MAAM,qCAAqC,CAAC;AAC7C,OAAO,EAAC,gBAAgB,EAAC,MAAM,iCAAiC,CAAC;AACjE,OAAO,EAAC,cAAc,EAAE,YAAY,EAAE,mBAAmB,EAAE,aAAa,EAAC,MAAM,+BAA+B,CAAC;AAC/G,OAAO,EAAC,UAAU,EAAC,MAAM,iCAAiC,CAAC;AAC3D,OAAO,EAAC,WAAW,EAAC,MAAM,mCAAmC,CAAC;AAE9D,OAAO,EACL,SAAS,EACT,QAAQ,EACR,QAAQ,EACR,IAAI,EACJ,iBAAiB,EACjB,YAAY,EACZ,QAAQ,EACR,WAAW,EACX,qBAAqB,EACrB,WAAW,EACX,eAAe,EACf,cAAc,EACd,oBAAoB,EACpB,cAAc,EACd,qBAAqB,EACrB,UAAU,EACV,cAAc,EACd,UAAU,EACV,kBAAkB,EAClB,WAAW,EACX,mBAAmB,EACnB,kBAAkB,EAClB,iBAAiB,EACjB,aAAa,EACb,gBAAgB,EAChB,cAAc,EACd,YAAY,EACZ,mBAAmB,EACnB,aAAa,EACb,YAAY,EACZ,SAAS,EACT,mBAAmB,EACnB,gBAAgB,EAChB,YAAY,EACZ,QAAQ,EACR,gBAAgB,EAEhB,QAAQ,EAAE,MAAM,EAAE,cAAc,EAChC,mBAAmB,EACnB,aAAa,EAAE,UAAU,EAEzB,oBAAoB,EACpB,cAAc,EACd,wBAAwB,EACxB,cAAc,EACd,iBAAiB,EACjB,uBAAuB,EAEvB,aAAa,EACb,MAAM,EAAE,SAAS,EACjB,KAAK,EACL,MAAM,EACN,aAAa,EACb,WAAW,EACX,aAAa,EAGb,OAAO,EACP,WAAW,EACX,YAAY,EACZ,UAAU,EACV,WAAW,EACZ,CAAC"}
|
package/index.js
CHANGED
|
@@ -10,7 +10,7 @@ import { isLeaf } from './src/types';
|
|
|
10
10
|
import { UnknownSeqPalette, UnknownSeqPalettes } from './src/unknown';
|
|
11
11
|
import { DrawStyle, printLeftOrCentered } from './src/utils/cell-renderer';
|
|
12
12
|
import { FastaFileHandler } from './src/utils/fasta-handler';
|
|
13
|
-
import { getSplitter, splitterAsFasta, getSplitterForColumn, monomerToShort, splitterAsHelm, getStats, pickUpPalette, getPaletteByType, getAlphabetSimilarity } from './src/utils/macromolecule';
|
|
13
|
+
import { getSplitter, splitterAsFasta, getSplitterForColumn, monomerToShort, splitterAsHelm, getStats, pickUpPalette, getPaletteByType, getAlphabet, getAlphabetSimilarity, } from './src/utils/macromolecule';
|
|
14
14
|
import { NotationConverter } from './src/utils/notation-converter';
|
|
15
15
|
import { splitAlignedSequences } from './src/utils/splitter';
|
|
16
16
|
import { getTreeHelper } from './src/utils/tree-helper';
|
|
@@ -20,5 +20,5 @@ import { getPhylocanvasGlService, PhylocanvasGlServiceBase, TreeTypesNames } fro
|
|
|
20
20
|
import { PositionHeight, PositionInfo, PositionMonomerInfo, WebLogoViewer } from './src/viewers/web-logo-viewer';
|
|
21
21
|
import { MonomerLib } from './src/monomer-works/monomer-lib';
|
|
22
22
|
import { readLibrary } from './src/monomer-works/monomer-utils';
|
|
23
|
-
export { NotationConverter, getStats, getAlphabetSimilarity, getSplitter, splitterAsFasta, splitterAsHelm, getSplitterForColumn, monomerToShort, splitAlignedSequences, SeqPaletteBase, Aminoacids, AminoacidsPalettes, Nucleotides, NucleotidesPalettes, UnknownSeqPalettes, UnknownSeqPalette, pickUpPalette, getPaletteByType, PositionHeight, PositionInfo, PositionMonomerInfo, WebLogoViewer, UnitsHandler, DrawStyle, printLeftOrCentered, FastaFileHandler, VdRegionType, VdRegion, isLeaf, TreeTypesNames, PhylocanvasGlServiceBase, getPhylocanvasGlService, PhylocanvasGL, Shapes, TreeTypes, Utils, Newick, getTreeHelper, MonomerWorks, MonomerLib, readLibrary };
|
|
24
|
-
//# sourceMappingURL=data:application/json;base64,
|
|
23
|
+
export { NotationConverter, getStats, getAlphabet, getAlphabetSimilarity, getSplitter, splitterAsFasta, splitterAsHelm, getSplitterForColumn, monomerToShort, splitAlignedSequences, SeqPaletteBase, Aminoacids, AminoacidsPalettes, Nucleotides, NucleotidesPalettes, UnknownSeqPalettes, UnknownSeqPalette, pickUpPalette, getPaletteByType, PositionHeight, PositionInfo, PositionMonomerInfo, WebLogoViewer, UnitsHandler, DrawStyle, printLeftOrCentered, FastaFileHandler, VdRegionType, VdRegion, isLeaf, TreeTypesNames, PhylocanvasGlServiceBase, getPhylocanvasGlService, PhylocanvasGL, Shapes, TreeTypes, Utils, Newick, getTreeHelper, MonomerWorks, MonomerLib, readLibrary };
|
|
24
|
+
//# sourceMappingURL=data:application/json;base64,eyJ2ZXJzaW9uIjozLCJmaWxlIjoiaW5kZXguanMiLCJzb3VyY2VSb290IjoiIiwic291cmNlcyI6WyJpbmRleC50cyJdLCJuYW1lcyI6W10sIm1hcHBpbmdzIjoiQUFBQSxZQUFZO0FBQ1osT0FBTyxFQUFzQixNQUFNLEVBQUUsS0FBSyxFQUFFLE1BQU0sRUFBRSxTQUFTLEVBQUMsTUFBTSw2QkFBNkIsQ0FBQztBQUNsRyxZQUFZO0FBQ1osT0FBTyxFQUFDLGFBQWEsRUFBQyxNQUFNLDZCQUE2QixDQUFDO0FBRTFELE9BQU8sRUFBQyxVQUFVLEVBQUUsa0JBQWtCLEVBQUMsTUFBTSxrQkFBa0IsQ0FBQztBQUNoRSxPQUFPLEVBQUMsWUFBWSxFQUFDLE1BQU0sbUNBQW1DLENBQUM7QUFDL0QsT0FBTyxFQUFDLFdBQVcsRUFBRSxtQkFBbUIsRUFBQyxNQUFNLG1CQUFtQixDQUFDO0FBQ25FLE9BQU8sRUFBYSxjQUFjLEVBQUMsTUFBTSxvQkFBb0IsQ0FBQztBQUM5RCxPQUFPLEVBQWlDLE1BQU0sRUFBaUIsTUFBTSxhQUFhLENBQUM7QUFDbkYsT0FBTyxFQUFDLGlCQUFpQixFQUFFLGtCQUFrQixFQUFDLE1BQU0sZUFBZSxDQUFDO0FBQ3BFLE9BQU8sRUFBQyxTQUFTLEVBQUUsbUJBQW1CLEVBQUMsTUFBTSwyQkFBMkIsQ0FBQztBQUN6RSxPQUFPLEVBQUMsZ0JBQWdCLEVBQUMsTUFBTSwyQkFBMkIsQ0FBQztBQUMzRCxPQUFPLEVBQ0wsV0FBVyxFQUNYLGVBQWUsRUFDZixvQkFBb0IsRUFFcEIsY0FBYyxFQUNkLGNBQWMsRUFDZCxRQUFRLEVBQ1IsYUFBYSxFQUNiLGdCQUFnQixFQUNoQixXQUFXLEVBQ1gscUJBQXFCLEdBS3RCLE1BQU0sMkJBQTJCLENBQUM7QUFFbkMsT0FBTyxFQUFDLGlCQUFpQixFQUFDLE1BQU0sZ0NBQWdDLENBQUM7QUFDakUsT0FBTyxFQUFDLHFCQUFxQixFQUFDLE1BQU0sc0JBQXNCLENBQUM7QUFDM0QsT0FBTyxFQUFDLGFBQWEsRUFBYyxNQUFNLHlCQUF5QixDQUFDO0FBQ25FLE9BQU8sRUFBQyxZQUFZLEVBQUMsTUFBTSwyQkFBMkIsQ0FBQztBQUN2RCxPQUFPLEVBQUMsUUFBUSxFQUFFLFlBQVksRUFBQyxNQUFNLGtCQUFrQixDQUFDO0FBQ3hELE9BQU8sRUFFTCx1QkFBdUIsRUFHdkIsd0JBQXdCLEVBR3hCLGNBQWMsRUFDZixNQUFNLHFDQUFxQyxDQUFDO0FBRTdDLE9BQU8sRUFBQyxjQUFjLEVBQUUsWUFBWSxFQUFFLG1CQUFtQixFQUFFLGFBQWEsRUFBQyxNQUFNLCtCQUErQixDQUFDO0FBQy9HLE9BQU8sRUFBQyxVQUFVLEVBQUMsTUFBTSxpQ0FBaUMsQ0FBQztBQUMzRCxPQUFPLEVBQUMsV0FBVyxFQUFDLE1BQU0sbUNBQW1DLENBQUM7QUFFOUQsT0FBTyxFQUtMLGlCQUFpQixFQUVqQixRQUFRLEVBQ1IsV0FBVyxFQUNYLHFCQUFxQixFQUNyQixXQUFXLEVBQ1gsZUFBZSxFQUNmLGNBQWMsRUFDZCxvQkFBb0IsRUFDcEIsY0FBYyxFQUNkLHFCQUFxQixFQUVyQixjQUFjLEVBQ2QsVUFBVSxFQUNWLGtCQUFrQixFQUNsQixXQUFXLEVBQ1gsbUJBQW1CLEVBQ25CLGtCQUFrQixFQUNsQixpQkFBaUIsRUFDakIsYUFBYSxFQUNiLGdCQUFnQixFQUNoQixjQUFjLEVBQ2QsWUFBWSxFQUNaLG1CQUFtQixFQUNuQixhQUFhLEVBQ2IsWUFBWSxFQUNaLFNBQVMsRUFDVCxtQkFBbUIsRUFDbkIsZ0JBQWdCLEVBQ2hCLFlBQVksRUFDWixRQUFRLEVBR0UsTUFBTSxFQUtoQixjQUFjLEVBQ2Qsd0JBQXdCLEVBR3hCLHVCQUF1QixFQUV2QixhQUFhLEVBQ2IsTUFBTSxFQUFFLFNBQVMsRUFDakIsS0FBSyxFQUNMLE1BQU0sRUFHTixhQUFhLEVBS2IsWUFBWSxFQUNaLFVBQVUsRUFDVixXQUFXLEVBQ1osQ0FBQyIsInNvdXJjZXNDb250ZW50IjpbIi8vQHRzLWlnbm9yZVxuaW1wb3J0IHtQaHlsb2NhbnZhc1RyZWVOb2RlLCBOZXdpY2ssIFV0aWxzLCBTaGFwZXMsIFRyZWVUeXBlc30gZnJvbSAnQHBoeWxvY2FudmFzL3BoeWxvY2FudmFzLmdsJztcbi8vQHRzLWlnbm9yZVxuaW1wb3J0IHtQaHlsb2NhbnZhc0dMfSBmcm9tICdAcGh5bG9jYW52YXMvcGh5bG9jYW52YXMuZ2wnO1xuXG5pbXBvcnQge0FtaW5vYWNpZHMsIEFtaW5vYWNpZHNQYWxldHRlc30gZnJvbSAnLi9zcmMvYW1pbm9hY2lkcyc7XG5pbXBvcnQge01vbm9tZXJXb3Jrc30gZnJvbSAnLi9zcmMvbW9ub21lci13b3Jrcy9tb25vbWVyLXdvcmtzJztcbmltcG9ydCB7TnVjbGVvdGlkZXMsIE51Y2xlb3RpZGVzUGFsZXR0ZXN9IGZyb20gJy4vc3JjL251Y2xlb3RpZGVzJztcbmltcG9ydCB7U2VxUGFsZXR0ZSwgU2VxUGFsZXR0ZUJhc2V9IGZyb20gJy4vc3JjL3NlcS1wYWxldHRlcyc7XG5pbXBvcnQge0lNb25vbWVyTGliLCBNb25vbWVyLCBOb2RlVHlwZSwgaXNMZWFmLCBOb2RlQ3V0dGVkVHlwZX0gZnJvbSAnLi9zcmMvdHlwZXMnO1xuaW1wb3J0IHtVbmtub3duU2VxUGFsZXR0ZSwgVW5rbm93blNlcVBhbGV0dGVzfSBmcm9tICcuL3NyYy91bmtub3duJztcbmltcG9ydCB7RHJhd1N0eWxlLCBwcmludExlZnRPckNlbnRlcmVkfSBmcm9tICcuL3NyYy91dGlscy9jZWxsLXJlbmRlcmVyJztcbmltcG9ydCB7RmFzdGFGaWxlSGFuZGxlcn0gZnJvbSAnLi9zcmMvdXRpbHMvZmFzdGEtaGFuZGxlcic7XG5pbXBvcnQge1xuICBnZXRTcGxpdHRlcixcbiAgc3BsaXR0ZXJBc0Zhc3RhLFxuICBnZXRTcGxpdHRlckZvckNvbHVtbixcbiAgU3BsaXR0ZXJGdW5jLFxuICBtb25vbWVyVG9TaG9ydCxcbiAgc3BsaXR0ZXJBc0hlbG0sXG4gIGdldFN0YXRzLFxuICBwaWNrVXBQYWxldHRlLFxuICBnZXRQYWxldHRlQnlUeXBlLFxuICBnZXRBbHBoYWJldCxcbiAgZ2V0QWxwaGFiZXRTaW1pbGFyaXR5LFxuICBBTFBIQUJFVCxcbiAgTk9UQVRJT04sXG4gIFRBR1MsXG4gIEFMSUdOTUVOVCxcbn0gZnJvbSAnLi9zcmMvdXRpbHMvbWFjcm9tb2xlY3VsZSc7XG5pbXBvcnQge0lOZXdpY2tIZWxwZXJ9IGZyb20gJy4vc3JjL3V0aWxzL25ld2ljay1oZWxwZXInO1xuaW1wb3J0IHtOb3RhdGlvbkNvbnZlcnRlcn0gZnJvbSAnLi9zcmMvdXRpbHMvbm90YXRpb24tY29udmVydGVyJztcbmltcG9ydCB7c3BsaXRBbGlnbmVkU2VxdWVuY2VzfSBmcm9tICcuL3NyYy91dGlscy9zcGxpdHRlcic7XG5pbXBvcnQge2dldFRyZWVIZWxwZXIsIElUcmVlSGVscGVyfSBmcm9tICcuL3NyYy91dGlscy90cmVlLWhlbHBlcic7XG5pbXBvcnQge1VuaXRzSGFuZGxlcn0gZnJvbSAnLi9zcmMvdXRpbHMvdW5pdHMtaGFuZGxlcic7XG5pbXBvcnQge1ZkUmVnaW9uLCBWZFJlZ2lvblR5cGV9IGZyb20gJy4vc3JjL3ZkLXJlZ2lvbnMnO1xuaW1wb3J0IHtcbiAgQ2FudmFzQ2FsbGJhY2ssXG4gIGdldFBoeWxvY2FudmFzR2xTZXJ2aWNlLFxuICBJUGh5bG9jYW52YXNHbFZpZXdlcixcbiAgTm9kZVN0eWxlVHlwZSxcbiAgUGh5bG9jYW52YXNHbFNlcnZpY2VCYXNlLFxuICBQaHlsb2NhbnZhc0dsVGFzayxcbiAgU3R5bGVzVHlwZSxcbiAgVHJlZVR5cGVzTmFtZXNcbn0gZnJvbSAnLi9zcmMvdmlld2Vycy9waHlsb2NhbnZhcy1nbC12aWV3ZXInO1xuaW1wb3J0IHtJVmRSZWdpb25zVmlld2VyfSBmcm9tICcuL3NyYy92aWV3ZXJzL3ZkLXJlZ2lvbnMtdmlld2VyJztcbmltcG9ydCB7UG9zaXRpb25IZWlnaHQsIFBvc2l0aW9uSW5mbywgUG9zaXRpb25Nb25vbWVySW5mbywgV2ViTG9nb1ZpZXdlcn0gZnJvbSAnLi9zcmMvdmlld2Vycy93ZWItbG9nby12aWV3ZXInO1xuaW1wb3J0IHtNb25vbWVyTGlifSBmcm9tICcuL3NyYy9tb25vbWVyLXdvcmtzL21vbm9tZXItbGliJztcbmltcG9ydCB7cmVhZExpYnJhcnl9IGZyb20gJy4vc3JjL21vbm9tZXItd29ya3MvbW9ub21lci11dGlscyc7XG5cbmV4cG9ydCB7XG4gIEFMSUdOTUVOVCxcbiAgQUxQSEFCRVQsXG4gIE5PVEFUSU9OLFxuICBUQUdTLFxuICBOb3RhdGlvbkNvbnZlcnRlcixcbiAgU3BsaXR0ZXJGdW5jLFxuICBnZXRTdGF0cyxcbiAgZ2V0QWxwaGFiZXQsXG4gIGdldEFscGhhYmV0U2ltaWxhcml0eSxcbiAgZ2V0U3BsaXR0ZXIsXG4gIHNwbGl0dGVyQXNGYXN0YSxcbiAgc3BsaXR0ZXJBc0hlbG0sXG4gIGdldFNwbGl0dGVyRm9yQ29sdW1uLFxuICBtb25vbWVyVG9TaG9ydCxcbiAgc3BsaXRBbGlnbmVkU2VxdWVuY2VzLFxuICBTZXFQYWxldHRlLFxuICBTZXFQYWxldHRlQmFzZSxcbiAgQW1pbm9hY2lkcyxcbiAgQW1pbm9hY2lkc1BhbGV0dGVzLFxuICBOdWNsZW90aWRlcyxcbiAgTnVjbGVvdGlkZXNQYWxldHRlcyxcbiAgVW5rbm93blNlcVBhbGV0dGVzLFxuICBVbmtub3duU2VxUGFsZXR0ZSxcbiAgcGlja1VwUGFsZXR0ZSxcbiAgZ2V0UGFsZXR0ZUJ5VHlwZSxcbiAgUG9zaXRpb25IZWlnaHQsXG4gIFBvc2l0aW9uSW5mbyxcbiAgUG9zaXRpb25Nb25vbWVySW5mbyxcbiAgV2ViTG9nb1ZpZXdlcixcbiAgVW5pdHNIYW5kbGVyLFxuICBEcmF3U3R5bGUsXG4gIHByaW50TGVmdE9yQ2VudGVyZWQsXG4gIEZhc3RhRmlsZUhhbmRsZXIsXG4gIFZkUmVnaW9uVHlwZSxcbiAgVmRSZWdpb24sXG4gIElWZFJlZ2lvbnNWaWV3ZXIsXG5cbiAgTm9kZVR5cGUsIGlzTGVhZiwgTm9kZUN1dHRlZFR5cGUsXG4gIFBoeWxvY2FudmFzVHJlZU5vZGUsXG4gIE5vZGVTdHlsZVR5cGUsIFN0eWxlc1R5cGUsXG5cbiAgSVBoeWxvY2FudmFzR2xWaWV3ZXIsXG4gIFRyZWVUeXBlc05hbWVzLFxuICBQaHlsb2NhbnZhc0dsU2VydmljZUJhc2UsXG4gIENhbnZhc0NhbGxiYWNrLFxuICBQaHlsb2NhbnZhc0dsVGFzayxcbiAgZ2V0UGh5bG9jYW52YXNHbFNlcnZpY2UsXG5cbiAgUGh5bG9jYW52YXNHTCxcbiAgU2hhcGVzLCBUcmVlVHlwZXMsXG4gIFV0aWxzLFxuICBOZXdpY2ssXG4gIElOZXdpY2tIZWxwZXIsXG4gIElUcmVlSGVscGVyLFxuICBnZXRUcmVlSGVscGVyLFxuXG4gIC8vTW9ub21lciBsaWIgYW5kIGZlYXR1cmVzXG4gIE1vbm9tZXIsXG4gIElNb25vbWVyTGliLFxuICBNb25vbWVyV29ya3MsXG4gIE1vbm9tZXJMaWIsXG4gIHJlYWRMaWJyYXJ5XG59O1xuIl19
|
package/index.ts
CHANGED
|
@@ -21,11 +21,12 @@ import {
|
|
|
21
21
|
getStats,
|
|
22
22
|
pickUpPalette,
|
|
23
23
|
getPaletteByType,
|
|
24
|
+
getAlphabet,
|
|
24
25
|
getAlphabetSimilarity,
|
|
25
26
|
ALPHABET,
|
|
26
27
|
NOTATION,
|
|
27
28
|
TAGS,
|
|
28
|
-
ALIGNMENT
|
|
29
|
+
ALIGNMENT,
|
|
29
30
|
} from './src/utils/macromolecule';
|
|
30
31
|
import {INewickHelper} from './src/utils/newick-helper';
|
|
31
32
|
import {NotationConverter} from './src/utils/notation-converter';
|
|
@@ -56,6 +57,7 @@ export {
|
|
|
56
57
|
NotationConverter,
|
|
57
58
|
SplitterFunc,
|
|
58
59
|
getStats,
|
|
60
|
+
getAlphabet,
|
|
59
61
|
getAlphabetSimilarity,
|
|
60
62
|
getSplitter,
|
|
61
63
|
splitterAsFasta,
|
package/package.json
CHANGED
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
"access": "public"
|
|
5
5
|
},
|
|
6
6
|
"friendlyName": "Datagrok bio library",
|
|
7
|
-
"version": "5.
|
|
7
|
+
"version": "5.10.0",
|
|
8
8
|
"description": "",
|
|
9
9
|
"dependencies": {
|
|
10
10
|
"@datagrok-libraries/utils": "^1.10.1",
|
|
@@ -24,7 +24,7 @@
|
|
|
24
24
|
"eslint": "latest",
|
|
25
25
|
"eslint-config-google": "latest",
|
|
26
26
|
"ts-loader": "^9.2.6",
|
|
27
|
-
"typescript": "^4.
|
|
27
|
+
"typescript": "^4.6.3"
|
|
28
28
|
},
|
|
29
29
|
"scripts": {
|
|
30
30
|
"link": "npm link",
|
|
@@ -70,6 +70,8 @@ export declare function getSplitter(units: string, separator: string, limit?: nu
|
|
|
70
70
|
export declare function getSplitterForColumn(col: DG.Column): SplitterFunc;
|
|
71
71
|
/** Convert long monomer names to short ones */
|
|
72
72
|
export declare function monomerToShort(amino: string, maxLengthOfMonomer: number): string;
|
|
73
|
+
/** */
|
|
74
|
+
export declare function getAlphabet(alphabet: ALPHABET): Set<string>;
|
|
73
75
|
/** Calculate similarity in current sequence and alphabet.
|
|
74
76
|
* @param {MonomerFreqs} freq
|
|
75
77
|
* @param {Set<string>} alphabet
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"macromolecule.d.ts","sourceRoot":"","sources":["macromolecule.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,MAAM,iBAAiB,CAAC;AAKtC,OAAO,EAAC,UAAU,EAAC,MAAM,iBAAiB,CAAC;
|
|
1
|
+
{"version":3,"file":"macromolecule.d.ts","sourceRoot":"","sources":["macromolecule.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,MAAM,iBAAiB,CAAC;AAKtC,OAAO,EAAC,UAAU,EAAC,MAAM,iBAAiB,CAAC;AAO3C,0EAA0E;AAC1E,0BAAkB,QAAQ;IACxB,KAAK,UAAU;IACf,SAAS,cAAc;IACvB,IAAI,SAAS;CACd;AAED,0BAAkB,SAAS;IACzB,OAAO,YAAY;IACnB,GAAG,QAAQ;CACZ;AAED,0BAAkB,QAAQ;IACxB,GAAG,QAAQ;IACX,GAAG,QAAQ;IACX,EAAE,OAAO;IACT,EAAE,OAAO;CACV;AAED,0BAAkB,IAAI;IACpB,OAAO,YAAY;IACnB,QAAQ,aAAa;IACrB,YAAY,kBAAkB;IAC9B,mBAAmB,yBAAyB;IAC5C,SAAS,cAAc;CACxB;AAED,oBAAY,WAAW,GAAG;IAAE,IAAI,EAAE,YAAY,CAAC;IAAC,UAAU,EAAE,OAAO,CAAA;CAAE,CAAA;AACrE,oBAAY,YAAY,GAAG,CAAC,GAAG,EAAE,MAAM,KAAK,MAAM,EAAE,CAAC;AACrD,oBAAY,YAAY,GAAG;IAAE,CAAC,CAAC,EAAE,MAAM,GAAG,MAAM,CAAA;CAAE,CAAC;AAEnD;;;;;GAKG;AACH,wBAAgB,QAAQ,CAAC,MAAM,EAAE,EAAE,CAAC,MAAM,EAAE,SAAS,EAAE,MAAM,EAAE,QAAQ,EAAE,YAAY,GAAG,WAAW,CAsBlG;AAED,eAAO,MAAM,SAAS,EAAE,MAA8B,CAAC;AAEvD;;;GAGG;AACH,wBAAgB,eAAe,CAAC,GAAG,EAAE,GAAG,GAAG,MAAM,EAAE,CAYlD;AAED;;;;GAIG;AACH,wBAAgB,wBAAwB,CAAC,SAAS,EAAE,MAAM,EAAE,KAAK,GAAE,MAAM,GAAG,SAAqB,GAAG,YAAY,CAI/G;AAKD;;;;GAIG;AACH,wBAAgB,cAAc,CAAC,GAAG,EAAE,GAAG,GAAG,MAAM,EAAE,CAgBjD;AAED;;;;;GAKG;AACH,wBAAgB,WAAW,CAAC,KAAK,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,EAAE,KAAK,GAAE,MAAM,GAAG,SAAqB,GAAG,YAAY,CAWjH;AAED;;;GAGG;AACH,wBAAgB,oBAAoB,CAAC,GAAG,EAAE,EAAE,CAAC,MAAM,GAAG,YAAY,CAOjE;AAID,+CAA+C;AAC/C,wBAAgB,cAAc,CAAC,KAAK,EAAE,MAAM,EAAE,kBAAkB,EAAE,MAAM,GAAG,MAAM,CAKhF;AAED,MAAM;AACN,wBAAgB,WAAW,CAAC,QAAQ,EAAE,QAAQ,GAAG,GAAG,CAAC,MAAM,CAAC,CAW3D;AAED;;;;;GAKG;AACH,wBAAgB,qBAAqB,CAAC,IAAI,EAAE,YAAY,EAAE,QAAQ,EAAE,GAAG,CAAC,MAAM,CAAC,EAAE,SAAS,GAAE,MAAY,GAAG,MAAM,CAchH;AAED,wBAAgB,cAAc,CAAC,KAAK,EAAE,WAAW,GAAG,MAAM,CAazD;AAED;;;;GAIG;AACH,wBAAgB,aAAa,CAAC,MAAM,EAAE,EAAE,CAAC,MAAM,EAAE,SAAS,GAAE,MAAU,GAAG,UAAU,CAYlF;AAED,wBAAgB,gBAAgB,CAAC,WAAW,EAAE,MAAM,GAAG,UAAU,CAYhE;AAED,wBAAgB,YAAY,CAAC,EAAE,EAAE,EAAE,CAAC,SAAS,GAAG,EAAE,CAAC,MAAM,GAAG,IAAI,CAS/D"}
|
|
@@ -121,6 +121,19 @@ export function monomerToShort(amino, maxLengthOfMonomer) {
|
|
|
121
121
|
const shortAmino = (_b = shortAminoMatch === null || shortAminoMatch === void 0 ? void 0 : shortAminoMatch[0]) !== null && _b !== void 0 ? _b : ' ';
|
|
122
122
|
return !needAddDots ? shortAmino : shortAmino.substring(0, maxLengthOfMonomer) + '…';
|
|
123
123
|
}
|
|
124
|
+
/** */
|
|
125
|
+
export function getAlphabet(alphabet) {
|
|
126
|
+
switch (alphabet) {
|
|
127
|
+
case "DNA" /* ALPHABET.DNA */:
|
|
128
|
+
return UnitsHandler.DnaFastaAlphabet;
|
|
129
|
+
case "RNA" /* ALPHABET.RNA */:
|
|
130
|
+
return UnitsHandler.RnaFastaAlphabet;
|
|
131
|
+
case "PT" /* ALPHABET.PT */:
|
|
132
|
+
return UnitsHandler.PeptideFastaAlphabet;
|
|
133
|
+
default:
|
|
134
|
+
throw new Error(`Unsupported alphabet '${alphabet}'.`);
|
|
135
|
+
}
|
|
136
|
+
}
|
|
124
137
|
/** Calculate similarity in current sequence and alphabet.
|
|
125
138
|
* @param {MonomerFreqs} freq
|
|
126
139
|
* @param {Set<string>} alphabet
|
|
@@ -195,4 +208,4 @@ export function pickUpSeqCol(df) {
|
|
|
195
208
|
resCol = semTypeColList[0];
|
|
196
209
|
return resCol;
|
|
197
210
|
}
|
|
198
|
-
//# sourceMappingURL=data:application/json;base64,{"version":3,"file":"macromolecule.js","sourceRoot":"","sources":["macromolecule.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,MAAM,iBAAiB,CAAC;AACtC,OAAO,EAAE,MAAM,IAAI,CAAC;AAEpB,OAAO,EAAC,MAAM,EAAC,MAAM,iDAAiD,CAAC;AACvE,OAAO,EAAC,YAAY,EAAE,gBAAgB,EAAC,MAAM,iDAAiD,CAAC;AAE/F,OAAO,EAAa,kBAAkB,EAAC,MAAM,eAAe,CAAC;AAC7D,OAAO,EAAc,mBAAmB,EAAC,MAAM,gBAAgB,CAAC;AAChE,OAAO,EAAC,kBAAkB,EAAC,MAAM,YAAY,CAAC;AAC9C,OAAO,EAAC,YAAY,EAAC,MAAM,wBAAwB,CAAC;AA2BnD,CAAC;AAMF;;;;;GAKG;AACH,MAAM,UAAU,QAAQ,CAAC,MAAiB,EAAE,SAAiB,EAAE,QAAsB;IACnF,MAAM,IAAI,GAA4B,EAAE,CAAC;IACzC,IAAI,UAAU,GAAG,IAAI,CAAC;IACtB,IAAI,WAAW,GAAG,IAAI,CAAC;IAEvB,KAAK,MAAM,GAAG,IAAI,MAAM,CAAC,UAAU,EAAE;QACnC,MAAM,IAAI,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC;QAE3B,IAAI,WAAW,IAAI,IAAI;YACrB,WAAW,GAAG,IAAI,CAAC,MAAM,CAAC;aACvB,IAAI,IAAI,CAAC,MAAM,KAAK,WAAW;YAClC,UAAU,GAAG,KAAK,CAAC;QAErB,IAAI,IAAI,CAAC,MAAM,IAAI,SAAS,EAAE;YAC5B,KAAK,MAAM,CAAC,IAAI,IAAI,EAAE;gBACpB,IAAI,CAAC,CAAC,CAAC,IAAI,IAAI,CAAC;oBACd,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;gBACd,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;aACd;SACF;KACF;IACD,OAAO,EAAC,IAAI,EAAE,IAAI,EAAE,UAAU,EAAE,UAAU,EAAC,CAAC;AAC9C,CAAC;AAED,MAAM,CAAC,MAAM,SAAS,GAAW,qBAAqB,CAAC;AAEvD;;;GAGG;AACH,MAAM,UAAU,eAAe,CAAC,GAAQ;IACtC,OAAO,EAAE,CAAmB,GAAG,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC;SAC5D,GAAG,CAAC,CAAC,EAAoB,EAAE,EAAE;QAC5B,IAAI,IAAY,CAAC;QACjB,MAAM,CAAC,GAAW,EAAE,CAAC,CAAC,CAAC,CAAC;QACxB,IAAI,CAAC,CAAC,MAAM,GAAG,CAAC,EAAE;YAChB,IAAI,GAAG,EAAE,CAAC,CAAC,CAAC,CAAC;SACd;aAAM;YACL,IAAI,GAAG,CAAC,CAAC;SACV;QACD,OAAO,IAAI,CAAC;IACd,CAAC,CAAC,CAAC,OAAO,EAAE,CAAC;AACjB,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,wBAAwB,CAAC,SAAiB,EAAE,QAA4B,SAAS;IAC/F,OAAO,CAAC,GAAW,EAAE,EAAE;QACrB,OAAO,GAAG,CAAC,KAAK,CAAC,SAAS,EAAE,KAAK,CAAC,CAAC;IACrC,CAAC,CAAC;AACJ,CAAC;AAED,MAAM,MAAM,GAAW,iCAAiC,CAAC;AACzD,MAAM,SAAS,GAAW,gBAAgB,CAAC;AAG3C;;;;GAIG;AACH,MAAM,UAAU,cAAc,CAAC,GAAQ;IACrC,MAAM,CAAC,SAAS,GAAG,CAAC,CAAC;IACrB,MAAM,EAAE,GAA2B,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,QAAQ,EAAE,CAAC,CAAC;IAC/D,MAAM,KAAK,GAAkB,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;IAE/C,MAAM,aAAa,GAAG,CAAC,EAAU,EAAU,EAAE;QAC3C,SAAS,CAAC,SAAS,GAAG,CAAC,CAAC;QACxB,MAAM,IAAI,GAAG,SAAS,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QAChC,IAAI,IAAI,IAAI,IAAI,CAAC,MAAM,IAAI,CAAC;YAC1B,OAAO,IAAI,CAAC,CAAC,CAAC,CAAC;;YAEf,OAAO,EAAE,CAAC;IACd,CAAC,CAAC;IAEF,MAAM,MAAM,GAAa,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;IACvD,OAAO,MAAM,CAAC,GAAG,CAAC,aAAa,CAAC,CAAC;AACnC,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,WAAW,CAAC,KAAa,EAAE,SAAiB,EAAE,QAA4B,SAAS;IACjG,IAAI,KAAK,CAAC,WAAW,EAAE,CAAC,UAAU,8BAAgB;QAChD,OAAO,eAAe,CAAC;SACpB,IAAI,KAAK,CAAC,WAAW,EAAE,CAAC,UAAU,sCAAoB;QACzD,OAAO,wBAAwB,CAAC,SAAS,EAAE,KAAK,CAAC,CAAC;SAC/C,IAAI,KAAK,CAAC,WAAW,EAAE,CAAC,UAAU,4BAAe;QACpD,OAAO,cAAc,CAAC;;QAEtB,MAAM,IAAI,KAAK,CAAC,oBAAoB,KAAK,IAAI,CAAC,CAAC;IAEjD,0BAA0B;AAC5B,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,oBAAoB,CAAC,GAAc;IACjD,IAAI,GAAG,CAAC,OAAO,KAAK,EAAE,CAAC,OAAO,CAAC,aAAa;QAC1C,MAAM,IAAI,KAAK,CAAC,6BAA6B,EAAE,CAAC,OAAO,CAAC,aAAa,SAAS,CAAC,CAAC;IAElF,MAAM,KAAK,GAAG,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IACxC,MAAM,SAAS,GAAG,GAAG,CAAC,MAAM,kCAAgB,CAAC;IAC7C,OAAO,WAAW,CAAC,KAAK,EAAE,SAAS,CAAC,CAAC;AACvC,CAAC;AAED,MAAM,iBAAiB,GAAW,QAAQ,CAAC;AAE3C,+CAA+C;AAC/C,MAAM,UAAU,cAAc,CAAC,KAAa,EAAE,kBAA0B;;IACtE,MAAM,eAAe,GAA4B,KAAK,CAAC,KAAK,CAAC,iBAAiB,CAAC,CAAC;IAChF,MAAM,WAAW,GAAY,KAAK,CAAC,MAAM,GAAG,kBAAkB,IAAI,CAAC,MAAA,eAAe,aAAf,eAAe,uBAAf,eAAe,CAAE,MAAM,mCAAI,CAAC,CAAC,GAAG,CAAC,CAAC;IACrG,MAAM,UAAU,GAAG,MAAA,eAAe,aAAf,eAAe,uBAAf,eAAe,CAAG,CAAC,CAAC,mCAAI,GAAG,CAAC;IAC/C,OAAO,CAAC,WAAW,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,UAAU,CAAC,SAAS,CAAC,CAAC,EAAE,kBAAkB,CAAC,GAAG,GAAG,CAAC;AACvF,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,qBAAqB,CAAC,IAAkB,EAAE,QAAqB,EAAE,YAAoB,GAAG;IACtG,MAAM,IAAI,GAAG,IAAI,GAAG,CAAS,CAAC,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,EAAE,GAAG,QAAQ,CAAC,CAAC,CAAC;IAC3E,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;IAEvB,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,MAAM,SAAS,GAAa,EAAE,CAAC;IAC/B,KAAK,MAAM,CAAC,IAAI,IAAI,EAAE;QACpB,KAAK,CAAC,IAAI,CAAC,CAAC,IAAI,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QACpC,SAAS,CAAC,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;KACzC;IACD,0FAA0F;IAC1F,MAAM,KAAK,GAAW,IAAI,MAAM,CAAC,KAAK,CAAC,CAAC;IACxC,MAAM,SAAS,GAAW,IAAI,MAAM,CAAC,SAAS,CAAC,CAAC;IAChD,OAAO,gBAAgB,CAAC,KAAK,EAAE,SAAS,CAAC,GAAG,CAAC,YAAY,CAAC,KAAK,CAAC,GAAG,YAAY,CAAC,SAAS,CAAC,CAAC,CAAC;AAC9F,CAAC;AAED,MAAM,UAAU,cAAc,CAAC,KAAkB;IAC/C,MAAM,kBAAkB,GAA4B;QAClD,yBAAc,YAAY,CAAC,oBAAoB,CAAC;QAChD,2BAAe,YAAY,CAAC,gBAAgB,CAAC;QAC7C,2BAAe,YAAY,CAAC,gBAAgB,CAAC;KAC9C,CAAC;IAEF,gDAAgD;IAChD,MAAM,qBAAqB,GAAa,kBAAkB,CAAC,GAAG,CAC5D,CAAC,CAAC,EAAE,EAAE,CAAC,qBAAqB,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAClD,MAAM,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,qBAAqB,CAAC,CAAC;IAClD,MAAM,QAAQ,GAAG,MAAM,GAAG,IAAI,CAAC,CAAC,CAAC,kBAAkB,CAAC,qBAAqB,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;IACrG,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,aAAa,CAAC,MAAiB,EAAE,YAAoB,CAAC;IACpE,IAAI,QAAgB,CAAC;IACrB,IAAI,MAAM,CAAC,OAAO,IAAI,EAAE,CAAC,OAAO,CAAC,aAAa,EAAE;QAC9C,MAAM,EAAE,GAAiB,IAAI,YAAY,CAAC,MAAM,CAAC,CAAC;QAClD,QAAQ,GAAG,EAAE,CAAC,QAAQ,CAAC;KACxB;SAAM;QACL,MAAM,KAAK,GAAgB,QAAQ,CAAC,MAAM,EAAE,SAAS,EAAE,eAAe,CAAC,CAAC;QACxE,QAAQ,GAAG,cAAc,CAAC,KAAK,CAAC,CAAC;KAClC;IAED,MAAM,GAAG,GAAG,gBAAgB,CAAC,QAAQ,CAAC,CAAC;IACvC,OAAO,GAAG,CAAC;AACb,CAAC;AAED,MAAM,UAAU,gBAAgB,CAAC,WAAmB;IAClD,QAAQ,WAAW,EAAE;QACrB,KAAK,IAAI;YACP,OAAO,kBAAkB,CAAC,UAAU,CAAC;QACvC,KAAK,IAAI,CAAC;QACV,KAAK,KAAK,CAAC;QACX,KAAK,KAAK;YACR,OAAO,mBAAmB,CAAC,YAAY,CAAC;QACxC,QAAQ;QACV;YACE,OAAO,kBAAkB,CAAC,KAAK,CAAC;KACjC;AACH,CAAC;AAED,MAAM,UAAU,YAAY,CAAC,EAAgB;;IAC3C,MAAM,cAAc,GAAG,EAAE,CAAC,OAAO,CAAC,YAAY,CAAC,EAAE,CAAC,OAAO,CAAC,aAAa,CAAC,CAAC;IACzE,IAAI,MAAM,GAAqB,MAAA,cAAc,CAAC,IAAI,CAAC,CAAC,GAAG,EAAE,EAAE;QACzD,MAAM,KAAK,GAAG,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QACxC,OAAO,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC;IACrD,CAAC,CAAC,mCAAI,IAAI,CAAC;IACX,IAAI,CAAC,MAAM,IAAI,cAAc,CAAC,MAAM,GAAG,CAAC;QACtC,MAAM,GAAG,cAAc,CAAC,CAAC,CAAC,CAAC;IAC7B,OAAO,MAAM,CAAC;AAChB,CAAC","sourcesContent":["import * as grok from 'datagrok-api/grok';\nimport * as ui from 'datagrok-api/ui';\nimport * as DG from 'datagrok-api/dg';\nimport wu from 'wu';\n\nimport {Vector} from '@datagrok-libraries/utils/src/type-declarations';\nimport {vectorLength, vectorDotProduct} from '@datagrok-libraries/utils/src/vector-operations';\nimport {SeqPalette} from '../seq-palettes';\nimport {Aminoacids, AminoacidsPalettes} from '../aminoacids';\nimport {Nucleotides, NucleotidesPalettes} from '../nucleotides';\nimport {UnknownSeqPalettes} from '../unknown';\nimport {UnitsHandler} from '../utils/units-handler';\n\n/** enum type to simplify setting \"user-friendly\" notation if necessary */\nexport const enum NOTATION {\n  FASTA = 'fasta',\n  SEPARATOR = 'separator',\n  HELM = 'helm',\n}\n\nexport const enum ALIGNMENT {\n  SEQ_MSA = 'SEQ.MSA',\n  SEQ = 'SEQ',\n}\n\nexport const enum ALPHABET {\n  DNA = 'DNA',\n  RNA = 'RNA',\n  PT = 'PT',\n  UN = 'UN',\n}\n\nexport const enum TAGS {\n  aligned = 'aligned',\n  alphabet = 'alphabet',\n  alphabetSize = '.alphabetSize',\n  alphabetIsMultichar = '.alphabetIsMultichar',\n  separator = 'separator',\n};\n\nexport type SeqColStats = { freq: MonomerFreqs, sameLength: boolean }\nexport type SplitterFunc = (seq: string) => string[];\nexport type MonomerFreqs = { [m: string]: number };\n\n/** Stats of sequences with specified splitter func, returns { freq, sameLength }.\n * @param {DG.Column} seqCol\n * @param {number} minLength\n * @param {SplitterFunc} splitter\n * @return { SeqColStats }, sameLength: boolean } stats of column sequences\n */\nexport function getStats(seqCol: DG.Column, minLength: number, splitter: SplitterFunc): SeqColStats {\n  const freq: { [m: string]: number } = {};\n  let sameLength = true;\n  let firstLength = null;\n\n  for (const seq of seqCol.categories) {\n    const mSeq = splitter(seq);\n\n    if (firstLength == null)\n      firstLength = mSeq.length;\n    else if (mSeq.length !== firstLength)\n      sameLength = false;\n\n    if (mSeq.length >= minLength) {\n      for (const m of mSeq) {\n        if (!(m in freq))\n          freq[m] = 0;\n        freq[m] += 1;\n      }\n    }\n  }\n  return {freq: freq, sameLength: sameLength};\n}\n\nexport const monomerRe: RegExp = /\\[(\\w+)\\]|(\\w)|(-)/g;\n\n/** Split sequence for single character monomers, square brackets multichar monomer names or gap symbol.\n * @param {any} seq object with sequence\n * @return {string[]} array of monomers\n */\nexport function splitterAsFasta(seq: any): string[] {\n  return wu<RegExpMatchArray>(seq.toString().matchAll(monomerRe))\n    .map((ma: RegExpMatchArray) => {\n      let mRes: string;\n      const m: string = ma[0];\n      if (m.length > 1) {\n        mRes = ma[1];\n      } else {\n        mRes = m;\n      }\n      return mRes;\n    }).toArray();\n}\n\n/** Gets method to split sequence by separator\n * @param {string} separator\n * @param limit\n * @return {SplitterFunc}\n */\nexport function getSplitterWithSeparator(separator: string, limit: number | undefined = undefined): SplitterFunc {\n  return (seq: string) => {\n    return seq.split(separator, limit);\n  };\n}\n\nconst helmRe: RegExp = /(PEPTIDE1|DNA1|RNA1)\\{([^}]+)}/g;\nconst helmPp1Re: RegExp = /\\[([^\\[\\]]+)]/g;\n\n\n/** Splits Helm string to monomers, but does not replace monomer names to other notation (e.g. for RNA).\n * Only for linear polymers, does not split RNA for ribose and phosphate monomers.\n * @param {string} seq Source string of HELM notation\n * @return {string[]}\n */\nexport function splitterAsHelm(seq: any): string[] {\n  helmRe.lastIndex = 0;\n  const ea: RegExpExecArray | null = helmRe.exec(seq.toString());\n  const inSeq: string | null = ea ? ea[2] : null;\n\n  const mmPostProcess = (mm: string): string => {\n    helmPp1Re.lastIndex = 0;\n    const pp1M = helmPp1Re.exec(mm);\n    if (pp1M && pp1M.length >= 2)\n      return pp1M[1];\n    else\n      return mm;\n  };\n\n  const mmList: string[] = inSeq ? inSeq.split('.') : [];\n  return mmList.map(mmPostProcess);\n}\n\n/** Get splitter method to split sequences to monomers\n * @param {string} units\n * @param {string} separator\n * @param limit\n * @return {SplitterFunc}\n */\nexport function getSplitter(units: string, separator: string, limit: number | undefined = undefined): SplitterFunc {\n  if (units.toLowerCase().startsWith(NOTATION.FASTA))\n    return splitterAsFasta;\n  else if (units.toLowerCase().startsWith(NOTATION.SEPARATOR))\n    return getSplitterWithSeparator(separator, limit);\n  else if (units.toLowerCase().startsWith(NOTATION.HELM))\n    return splitterAsHelm;\n  else\n    throw new Error(`Unexpected units ${units} .`);\n\n  // TODO: Splitter for HELM\n}\n\n/** Generate splitter function for sequence column\n * @param {DG.Column} col\n * @return {SplitterFunc} Splitter function\n */\nexport function getSplitterForColumn(col: DG.Column): SplitterFunc {\n  if (col.semType !== DG.SEMTYPE.MACROMOLECULE)\n    throw new Error(`Get splitter for semType \"${DG.SEMTYPE.MACROMOLECULE}\" only.`);\n\n  const units = col.getTag(DG.TAGS.UNITS);\n  const separator = col.getTag(TAGS.separator);\n  return getSplitter(units, separator);\n}\n\nconst longMonomerPartRe: RegExp = /(\\w+)/g;\n\n/** Convert long monomer names to short ones */\nexport function monomerToShort(amino: string, maxLengthOfMonomer: number): string {\n  const shortAminoMatch: RegExpMatchArray | null = amino.match(longMonomerPartRe);\n  const needAddDots: boolean = amino.length > maxLengthOfMonomer || (shortAminoMatch?.length ?? 0) > 1;\n  const shortAmino = shortAminoMatch?.[0] ?? ' ';\n  return !needAddDots ? shortAmino : shortAmino.substring(0, maxLengthOfMonomer) + '…';\n}\n\n/** Calculate similarity in current sequence and alphabet.\n * @param {MonomerFreqs} freq\n * @param {Set<string>} alphabet\n * @param {string} gapSymbol\n * @return {number} Cosine similarity\n */\nexport function getAlphabetSimilarity(freq: MonomerFreqs, alphabet: Set<string>, gapSymbol: string = '-'): number {\n  const keys = new Set<string>([...new Set(Object.keys(freq)), ...alphabet]);\n  keys.delete(gapSymbol);\n\n  const freqA: number[] = [];\n  const alphabetA: number[] = [];\n  for (const m of keys) {\n    freqA.push(m in freq ? freq[m] : 0);\n    alphabetA.push(alphabet.has(m) ? 1 : 0);\n  }\n  /* There were a few ideas: chi-squared, pearson correlation (variance?), scalar product */\n  const freqV: Vector = new Vector(freqA);\n  const alphabetV: Vector = new Vector(alphabetA);\n  return vectorDotProduct(freqV, alphabetV) / (vectorLength(freqV) * vectorLength(alphabetV));\n}\n\nexport function detectAlphabet(stats: SeqColStats): string {\n  const alphabetCandidates: [string, Set<string>][] = [\n    [ALPHABET.PT, UnitsHandler.PeptideFastaAlphabet],\n    [ALPHABET.DNA, UnitsHandler.DnaFastaAlphabet],\n    [ALPHABET.RNA, UnitsHandler.RnaFastaAlphabet],\n  ];\n\n  // Calculate likelihoods for alphabet_candidates\n  const alphabetCandidatesSim: number[] = alphabetCandidates.map(\n    (c) => getAlphabetSimilarity(stats.freq, c[1]));\n  const maxCos = Math.max(...alphabetCandidatesSim);\n  const alphabet = maxCos > 0.65 ? alphabetCandidates[alphabetCandidatesSim.indexOf(maxCos)][0] : 'UN';\n  return alphabet;\n}\n\n/** Selects a suitable palette based on column data\n * @param {DG.Column} seqCol Column to look for a palette\n * @param {number}  minLength minimum length of sequence to detect palette (empty strings are allowed)\n * @return {SeqPalette} Palette corresponding to the alphabet of the sequences in the column\n */\nexport function pickUpPalette(seqCol: DG.Column, minLength: number = 5): SeqPalette {\n  let alphabet: string;\n  if (seqCol.semType == DG.SEMTYPE.MACROMOLECULE) {\n    const uh: UnitsHandler = new UnitsHandler(seqCol);\n    alphabet = uh.alphabet;\n  } else {\n    const stats: SeqColStats = getStats(seqCol, minLength, splitterAsFasta);\n    alphabet = detectAlphabet(stats);\n  }\n\n  const res = getPaletteByType(alphabet);\n  return res;\n}\n\nexport function getPaletteByType(paletteType: string): SeqPalette {\n  switch (paletteType) {\n  case 'PT':\n    return AminoacidsPalettes.GrokGroups;\n  case 'NT':\n  case 'DNA':\n  case 'RNA':\n    return NucleotidesPalettes.Chromatogram;\n    // other\n  default:\n    return UnknownSeqPalettes.Color;\n  }\n}\n\nexport function pickUpSeqCol(df: DG.DataFrame): DG.Column | null {\n  const semTypeColList = df.columns.bySemTypeAll(DG.SEMTYPE.MACROMOLECULE);\n  let resCol: DG.Column | null = semTypeColList.find((col) => {\n    const units = col.getTag(DG.TAGS.UNITS);\n    return units ? units.indexOf('MSA') !== -1 : false;\n  }) ?? null;\n  if (!resCol && semTypeColList.length > 0)\n    resCol = semTypeColList[0];\n  return resCol;\n}\n"]}
|
|
211
|
+
//# sourceMappingURL=data:application/json;base64,{"version":3,"file":"macromolecule.js","sourceRoot":"","sources":["macromolecule.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,MAAM,iBAAiB,CAAC;AACtC,OAAO,EAAE,MAAM,IAAI,CAAC;AAEpB,OAAO,EAAC,MAAM,EAAC,MAAM,iDAAiD,CAAC;AACvE,OAAO,EAAC,YAAY,EAAE,gBAAgB,EAAC,MAAM,iDAAiD,CAAC;AAE/F,OAAO,EAAa,kBAAkB,EAAC,MAAM,eAAe,CAAC;AAC7D,OAAO,EAAc,mBAAmB,EAAC,MAAM,gBAAgB,CAAC;AAChE,OAAO,EAAC,kBAAkB,EAAC,MAAM,YAAY,CAAC;AAC9C,OAAO,EAAC,YAAY,EAAC,MAAM,wBAAwB,CAAC;AA4BnD,CAAC;AAMF;;;;;GAKG;AACH,MAAM,UAAU,QAAQ,CAAC,MAAiB,EAAE,SAAiB,EAAE,QAAsB;IACnF,MAAM,IAAI,GAA4B,EAAE,CAAC;IACzC,IAAI,UAAU,GAAG,IAAI,CAAC;IACtB,IAAI,WAAW,GAAG,IAAI,CAAC;IAEvB,KAAK,MAAM,GAAG,IAAI,MAAM,CAAC,UAAU,EAAE;QACnC,MAAM,IAAI,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC;QAE3B,IAAI,WAAW,IAAI,IAAI;YACrB,WAAW,GAAG,IAAI,CAAC,MAAM,CAAC;aACvB,IAAI,IAAI,CAAC,MAAM,KAAK,WAAW;YAClC,UAAU,GAAG,KAAK,CAAC;QAErB,IAAI,IAAI,CAAC,MAAM,IAAI,SAAS,EAAE;YAC5B,KAAK,MAAM,CAAC,IAAI,IAAI,EAAE;gBACpB,IAAI,CAAC,CAAC,CAAC,IAAI,IAAI,CAAC;oBACd,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;gBACd,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;aACd;SACF;KACF;IACD,OAAO,EAAC,IAAI,EAAE,IAAI,EAAE,UAAU,EAAE,UAAU,EAAC,CAAC;AAC9C,CAAC;AAED,MAAM,CAAC,MAAM,SAAS,GAAW,qBAAqB,CAAC;AAEvD;;;GAGG;AACH,MAAM,UAAU,eAAe,CAAC,GAAQ;IACtC,OAAO,EAAE,CAAmB,GAAG,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC;SAC5D,GAAG,CAAC,CAAC,EAAoB,EAAE,EAAE;QAC5B,IAAI,IAAY,CAAC;QACjB,MAAM,CAAC,GAAW,EAAE,CAAC,CAAC,CAAC,CAAC;QACxB,IAAI,CAAC,CAAC,MAAM,GAAG,CAAC,EAAE;YAChB,IAAI,GAAG,EAAE,CAAC,CAAC,CAAC,CAAC;SACd;aAAM;YACL,IAAI,GAAG,CAAC,CAAC;SACV;QACD,OAAO,IAAI,CAAC;IACd,CAAC,CAAC,CAAC,OAAO,EAAE,CAAC;AACjB,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,wBAAwB,CAAC,SAAiB,EAAE,QAA4B,SAAS;IAC/F,OAAO,CAAC,GAAW,EAAE,EAAE;QACrB,OAAO,GAAG,CAAC,KAAK,CAAC,SAAS,EAAE,KAAK,CAAC,CAAC;IACrC,CAAC,CAAC;AACJ,CAAC;AAED,MAAM,MAAM,GAAW,iCAAiC,CAAC;AACzD,MAAM,SAAS,GAAW,gBAAgB,CAAC;AAE3C;;;;GAIG;AACH,MAAM,UAAU,cAAc,CAAC,GAAQ;IACrC,MAAM,CAAC,SAAS,GAAG,CAAC,CAAC;IACrB,MAAM,EAAE,GAA2B,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,QAAQ,EAAE,CAAC,CAAC;IAC/D,MAAM,KAAK,GAAkB,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;IAE/C,MAAM,aAAa,GAAG,CAAC,EAAU,EAAU,EAAE;QAC3C,SAAS,CAAC,SAAS,GAAG,CAAC,CAAC;QACxB,MAAM,IAAI,GAAG,SAAS,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QAChC,IAAI,IAAI,IAAI,IAAI,CAAC,MAAM,IAAI,CAAC;YAC1B,OAAO,IAAI,CAAC,CAAC,CAAC,CAAC;;YAEf,OAAO,EAAE,CAAC;IACd,CAAC,CAAC;IAEF,MAAM,MAAM,GAAa,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;IACvD,OAAO,MAAM,CAAC,GAAG,CAAC,aAAa,CAAC,CAAC;AACnC,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,WAAW,CAAC,KAAa,EAAE,SAAiB,EAAE,QAA4B,SAAS;IACjG,IAAI,KAAK,CAAC,WAAW,EAAE,CAAC,UAAU,8BAAgB;QAChD,OAAO,eAAe,CAAC;SACpB,IAAI,KAAK,CAAC,WAAW,EAAE,CAAC,UAAU,sCAAoB;QACzD,OAAO,wBAAwB,CAAC,SAAS,EAAE,KAAK,CAAC,CAAC;SAC/C,IAAI,KAAK,CAAC,WAAW,EAAE,CAAC,UAAU,4BAAe;QACpD,OAAO,cAAc,CAAC;;QAEtB,MAAM,IAAI,KAAK,CAAC,oBAAoB,KAAK,IAAI,CAAC,CAAC;IAEjD,0BAA0B;AAC5B,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,oBAAoB,CAAC,GAAc;IACjD,IAAI,GAAG,CAAC,OAAO,KAAK,EAAE,CAAC,OAAO,CAAC,aAAa;QAC1C,MAAM,IAAI,KAAK,CAAC,6BAA6B,EAAE,CAAC,OAAO,CAAC,aAAa,SAAS,CAAC,CAAC;IAElF,MAAM,KAAK,GAAG,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IACxC,MAAM,SAAS,GAAG,GAAG,CAAC,MAAM,kCAAgB,CAAC;IAC7C,OAAO,WAAW,CAAC,KAAK,EAAE,SAAS,CAAC,CAAC;AACvC,CAAC;AAED,MAAM,iBAAiB,GAAW,QAAQ,CAAC;AAE3C,+CAA+C;AAC/C,MAAM,UAAU,cAAc,CAAC,KAAa,EAAE,kBAA0B;;IACtE,MAAM,eAAe,GAA4B,KAAK,CAAC,KAAK,CAAC,iBAAiB,CAAC,CAAC;IAChF,MAAM,WAAW,GAAY,KAAK,CAAC,MAAM,GAAG,kBAAkB,IAAI,CAAC,MAAA,eAAe,aAAf,eAAe,uBAAf,eAAe,CAAE,MAAM,mCAAI,CAAC,CAAC,GAAG,CAAC,CAAC;IACrG,MAAM,UAAU,GAAG,MAAA,eAAe,aAAf,eAAe,uBAAf,eAAe,CAAG,CAAC,CAAC,mCAAI,GAAG,CAAC;IAC/C,OAAO,CAAC,WAAW,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,UAAU,CAAC,SAAS,CAAC,CAAC,EAAE,kBAAkB,CAAC,GAAG,GAAG,CAAC;AACvF,CAAC;AAED,MAAM;AACN,MAAM,UAAU,WAAW,CAAC,QAAkB;IAC5C,QAAQ,QAAQ,EAAE;QAClB;YACE,OAAO,YAAY,CAAC,gBAAgB,CAAC;QACvC;YACE,OAAO,YAAY,CAAC,gBAAgB,CAAC;QACvC;YACE,OAAO,YAAY,CAAC,oBAAoB,CAAC;QAC3C;YACE,MAAM,IAAI,KAAK,CAAC,yBAAyB,QAAQ,IAAI,CAAC,CAAC;KACxD;AACH,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,qBAAqB,CAAC,IAAkB,EAAE,QAAqB,EAAE,YAAoB,GAAG;IACtG,MAAM,IAAI,GAAG,IAAI,GAAG,CAAS,CAAC,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,EAAE,GAAG,QAAQ,CAAC,CAAC,CAAC;IAC3E,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;IAEvB,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,MAAM,SAAS,GAAa,EAAE,CAAC;IAC/B,KAAK,MAAM,CAAC,IAAI,IAAI,EAAE;QACpB,KAAK,CAAC,IAAI,CAAC,CAAC,IAAI,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QACpC,SAAS,CAAC,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;KACzC;IACD,0FAA0F;IAC1F,MAAM,KAAK,GAAW,IAAI,MAAM,CAAC,KAAK,CAAC,CAAC;IACxC,MAAM,SAAS,GAAW,IAAI,MAAM,CAAC,SAAS,CAAC,CAAC;IAChD,OAAO,gBAAgB,CAAC,KAAK,EAAE,SAAS,CAAC,GAAG,CAAC,YAAY,CAAC,KAAK,CAAC,GAAG,YAAY,CAAC,SAAS,CAAC,CAAC,CAAC;AAC9F,CAAC;AAED,MAAM,UAAU,cAAc,CAAC,KAAkB;IAC/C,MAAM,kBAAkB,GAA4B;QAClD,yBAAc,YAAY,CAAC,oBAAoB,CAAC;QAChD,2BAAe,YAAY,CAAC,gBAAgB,CAAC;QAC7C,2BAAe,YAAY,CAAC,gBAAgB,CAAC;KAC9C,CAAC;IAEF,gDAAgD;IAChD,MAAM,qBAAqB,GAAa,kBAAkB,CAAC,GAAG,CAC5D,CAAC,CAAC,EAAE,EAAE,CAAC,qBAAqB,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAClD,MAAM,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,qBAAqB,CAAC,CAAC;IAClD,MAAM,QAAQ,GAAG,MAAM,GAAG,IAAI,CAAC,CAAC,CAAC,kBAAkB,CAAC,qBAAqB,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;IACrG,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,aAAa,CAAC,MAAiB,EAAE,YAAoB,CAAC;IACpE,IAAI,QAAgB,CAAC;IACrB,IAAI,MAAM,CAAC,OAAO,IAAI,EAAE,CAAC,OAAO,CAAC,aAAa,EAAE;QAC9C,MAAM,EAAE,GAAiB,IAAI,YAAY,CAAC,MAAM,CAAC,CAAC;QAClD,QAAQ,GAAG,EAAE,CAAC,QAAQ,CAAC;KACxB;SAAM;QACL,MAAM,KAAK,GAAgB,QAAQ,CAAC,MAAM,EAAE,SAAS,EAAE,eAAe,CAAC,CAAC;QACxE,QAAQ,GAAG,cAAc,CAAC,KAAK,CAAC,CAAC;KAClC;IAED,MAAM,GAAG,GAAG,gBAAgB,CAAC,QAAQ,CAAC,CAAC;IACvC,OAAO,GAAG,CAAC;AACb,CAAC;AAED,MAAM,UAAU,gBAAgB,CAAC,WAAmB;IAClD,QAAQ,WAAW,EAAE;QACrB,KAAK,IAAI;YACP,OAAO,kBAAkB,CAAC,UAAU,CAAC;QACvC,KAAK,IAAI,CAAC;QACV,KAAK,KAAK,CAAC;QACX,KAAK,KAAK;YACR,OAAO,mBAAmB,CAAC,YAAY,CAAC;QACxC,QAAQ;QACV;YACE,OAAO,kBAAkB,CAAC,KAAK,CAAC;KACjC;AACH,CAAC;AAED,MAAM,UAAU,YAAY,CAAC,EAAgB;;IAC3C,MAAM,cAAc,GAAG,EAAE,CAAC,OAAO,CAAC,YAAY,CAAC,EAAE,CAAC,OAAO,CAAC,aAAa,CAAC,CAAC;IACzE,IAAI,MAAM,GAAqB,MAAA,cAAc,CAAC,IAAI,CAAC,CAAC,GAAG,EAAE,EAAE;QACzD,MAAM,KAAK,GAAG,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QACxC,OAAO,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC;IACrD,CAAC,CAAC,mCAAI,IAAI,CAAC;IACX,IAAI,CAAC,MAAM,IAAI,cAAc,CAAC,MAAM,GAAG,CAAC;QACtC,MAAM,GAAG,cAAc,CAAC,CAAC,CAAC,CAAC;IAC7B,OAAO,MAAM,CAAC;AAChB,CAAC","sourcesContent":["import * as grok from 'datagrok-api/grok';\nimport * as ui from 'datagrok-api/ui';\nimport * as DG from 'datagrok-api/dg';\nimport wu from 'wu';\n\nimport {Vector} from '@datagrok-libraries/utils/src/type-declarations';\nimport {vectorLength, vectorDotProduct} from '@datagrok-libraries/utils/src/vector-operations';\nimport {SeqPalette} from '../seq-palettes';\nimport {Aminoacids, AminoacidsPalettes} from '../aminoacids';\nimport {Nucleotides, NucleotidesPalettes} from '../nucleotides';\nimport {UnknownSeqPalettes} from '../unknown';\nimport {UnitsHandler} from '../utils/units-handler';\n\n\n/** enum type to simplify setting \"user-friendly\" notation if necessary */\nexport const enum NOTATION {\n  FASTA = 'fasta',\n  SEPARATOR = 'separator',\n  HELM = 'helm',\n}\n\nexport const enum ALIGNMENT {\n  SEQ_MSA = 'SEQ.MSA',\n  SEQ = 'SEQ',\n}\n\nexport const enum ALPHABET {\n  DNA = 'DNA',\n  RNA = 'RNA',\n  PT = 'PT',\n  UN = 'UN',\n}\n\nexport const enum TAGS {\n  aligned = 'aligned',\n  alphabet = 'alphabet',\n  alphabetSize = '.alphabetSize',\n  alphabetIsMultichar = '.alphabetIsMultichar',\n  separator = 'separator',\n};\n\nexport type SeqColStats = { freq: MonomerFreqs, sameLength: boolean }\nexport type SplitterFunc = (seq: string) => string[];\nexport type MonomerFreqs = { [m: string]: number };\n\n/** Stats of sequences with specified splitter func, returns { freq, sameLength }.\n * @param {DG.Column} seqCol\n * @param {number} minLength\n * @param {SplitterFunc} splitter\n * @return { SeqColStats }, sameLength: boolean } stats of column sequences\n */\nexport function getStats(seqCol: DG.Column, minLength: number, splitter: SplitterFunc): SeqColStats {\n  const freq: { [m: string]: number } = {};\n  let sameLength = true;\n  let firstLength = null;\n\n  for (const seq of seqCol.categories) {\n    const mSeq = splitter(seq);\n\n    if (firstLength == null)\n      firstLength = mSeq.length;\n    else if (mSeq.length !== firstLength)\n      sameLength = false;\n\n    if (mSeq.length >= minLength) {\n      for (const m of mSeq) {\n        if (!(m in freq))\n          freq[m] = 0;\n        freq[m] += 1;\n      }\n    }\n  }\n  return {freq: freq, sameLength: sameLength};\n}\n\nexport const monomerRe: RegExp = /\\[(\\w+)\\]|(\\w)|(-)/g;\n\n/** Split sequence for single character monomers, square brackets multichar monomer names or gap symbol.\n * @param {any} seq object with sequence\n * @return {string[]} array of monomers\n */\nexport function splitterAsFasta(seq: any): string[] {\n  return wu<RegExpMatchArray>(seq.toString().matchAll(monomerRe))\n    .map((ma: RegExpMatchArray) => {\n      let mRes: string;\n      const m: string = ma[0];\n      if (m.length > 1) {\n        mRes = ma[1];\n      } else {\n        mRes = m;\n      }\n      return mRes;\n    }).toArray();\n}\n\n/** Gets method to split sequence by separator\n * @param {string} separator\n * @param limit\n * @return {SplitterFunc}\n */\nexport function getSplitterWithSeparator(separator: string, limit: number | undefined = undefined): SplitterFunc {\n  return (seq: string) => {\n    return seq.split(separator, limit);\n  };\n}\n\nconst helmRe: RegExp = /(PEPTIDE1|DNA1|RNA1)\\{([^}]+)}/g;\nconst helmPp1Re: RegExp = /\\[([^\\[\\]]+)]/g;\n\n/** Splits Helm string to monomers, but does not replace monomer names to other notation (e.g. for RNA).\n * Only for linear polymers, does not split RNA for ribose and phosphate monomers.\n * @param {string} seq Source string of HELM notation\n * @return {string[]}\n */\nexport function splitterAsHelm(seq: any): string[] {\n  helmRe.lastIndex = 0;\n  const ea: RegExpExecArray | null = helmRe.exec(seq.toString());\n  const inSeq: string | null = ea ? ea[2] : null;\n\n  const mmPostProcess = (mm: string): string => {\n    helmPp1Re.lastIndex = 0;\n    const pp1M = helmPp1Re.exec(mm);\n    if (pp1M && pp1M.length >= 2)\n      return pp1M[1];\n    else\n      return mm;\n  };\n\n  const mmList: string[] = inSeq ? inSeq.split('.') : [];\n  return mmList.map(mmPostProcess);\n}\n\n/** Get splitter method to split sequences to monomers\n * @param {string} units\n * @param {string} separator\n * @param limit\n * @return {SplitterFunc}\n */\nexport function getSplitter(units: string, separator: string, limit: number | undefined = undefined): SplitterFunc {\n  if (units.toLowerCase().startsWith(NOTATION.FASTA))\n    return splitterAsFasta;\n  else if (units.toLowerCase().startsWith(NOTATION.SEPARATOR))\n    return getSplitterWithSeparator(separator, limit);\n  else if (units.toLowerCase().startsWith(NOTATION.HELM))\n    return splitterAsHelm;\n  else\n    throw new Error(`Unexpected units ${units} .`);\n\n  // TODO: Splitter for HELM\n}\n\n/** Generate splitter function for sequence column\n * @param {DG.Column} col\n * @return {SplitterFunc} Splitter function\n */\nexport function getSplitterForColumn(col: DG.Column): SplitterFunc {\n  if (col.semType !== DG.SEMTYPE.MACROMOLECULE)\n    throw new Error(`Get splitter for semType \"${DG.SEMTYPE.MACROMOLECULE}\" only.`);\n\n  const units = col.getTag(DG.TAGS.UNITS);\n  const separator = col.getTag(TAGS.separator);\n  return getSplitter(units, separator);\n}\n\nconst longMonomerPartRe: RegExp = /(\\w+)/g;\n\n/** Convert long monomer names to short ones */\nexport function monomerToShort(amino: string, maxLengthOfMonomer: number): string {\n  const shortAminoMatch: RegExpMatchArray | null = amino.match(longMonomerPartRe);\n  const needAddDots: boolean = amino.length > maxLengthOfMonomer || (shortAminoMatch?.length ?? 0) > 1;\n  const shortAmino = shortAminoMatch?.[0] ?? ' ';\n  return !needAddDots ? shortAmino : shortAmino.substring(0, maxLengthOfMonomer) + '…';\n}\n\n/** */\nexport function getAlphabet(alphabet: ALPHABET): Set<string> {\n  switch (alphabet) {\n  case ALPHABET.DNA:\n    return UnitsHandler.DnaFastaAlphabet;\n  case ALPHABET.RNA:\n    return UnitsHandler.RnaFastaAlphabet;\n  case ALPHABET.PT:\n    return UnitsHandler.PeptideFastaAlphabet;\n  default:\n    throw new Error(`Unsupported alphabet '${alphabet}'.`);\n  }\n}\n\n/** Calculate similarity in current sequence and alphabet.\n * @param {MonomerFreqs} freq\n * @param {Set<string>} alphabet\n * @param {string} gapSymbol\n * @return {number} Cosine similarity\n */\nexport function getAlphabetSimilarity(freq: MonomerFreqs, alphabet: Set<string>, gapSymbol: string = '-'): number {\n  const keys = new Set<string>([...new Set(Object.keys(freq)), ...alphabet]);\n  keys.delete(gapSymbol);\n\n  const freqA: number[] = [];\n  const alphabetA: number[] = [];\n  for (const m of keys) {\n    freqA.push(m in freq ? freq[m] : 0);\n    alphabetA.push(alphabet.has(m) ? 1 : 0);\n  }\n  /* There were a few ideas: chi-squared, pearson correlation (variance?), scalar product */\n  const freqV: Vector = new Vector(freqA);\n  const alphabetV: Vector = new Vector(alphabetA);\n  return vectorDotProduct(freqV, alphabetV) / (vectorLength(freqV) * vectorLength(alphabetV));\n}\n\nexport function detectAlphabet(stats: SeqColStats): string {\n  const alphabetCandidates: [string, Set<string>][] = [\n    [ALPHABET.PT, UnitsHandler.PeptideFastaAlphabet],\n    [ALPHABET.DNA, UnitsHandler.DnaFastaAlphabet],\n    [ALPHABET.RNA, UnitsHandler.RnaFastaAlphabet],\n  ];\n\n  // Calculate likelihoods for alphabet_candidates\n  const alphabetCandidatesSim: number[] = alphabetCandidates.map(\n    (c) => getAlphabetSimilarity(stats.freq, c[1]));\n  const maxCos = Math.max(...alphabetCandidatesSim);\n  const alphabet = maxCos > 0.65 ? alphabetCandidates[alphabetCandidatesSim.indexOf(maxCos)][0] : 'UN';\n  return alphabet;\n}\n\n/** Selects a suitable palette based on column data\n * @param {DG.Column} seqCol Column to look for a palette\n * @param {number}  minLength minimum length of sequence to detect palette (empty strings are allowed)\n * @return {SeqPalette} Palette corresponding to the alphabet of the sequences in the column\n */\nexport function pickUpPalette(seqCol: DG.Column, minLength: number = 5): SeqPalette {\n  let alphabet: string;\n  if (seqCol.semType == DG.SEMTYPE.MACROMOLECULE) {\n    const uh: UnitsHandler = new UnitsHandler(seqCol);\n    alphabet = uh.alphabet;\n  } else {\n    const stats: SeqColStats = getStats(seqCol, minLength, splitterAsFasta);\n    alphabet = detectAlphabet(stats);\n  }\n\n  const res = getPaletteByType(alphabet);\n  return res;\n}\n\nexport function getPaletteByType(paletteType: string): SeqPalette {\n  switch (paletteType) {\n  case 'PT':\n    return AminoacidsPalettes.GrokGroups;\n  case 'NT':\n  case 'DNA':\n  case 'RNA':\n    return NucleotidesPalettes.Chromatogram;\n    // other\n  default:\n    return UnknownSeqPalettes.Color;\n  }\n}\n\nexport function pickUpSeqCol(df: DG.DataFrame): DG.Column | null {\n  const semTypeColList = df.columns.bySemTypeAll(DG.SEMTYPE.MACROMOLECULE);\n  let resCol: DG.Column | null = semTypeColList.find((col) => {\n    const units = col.getTag(DG.TAGS.UNITS);\n    return units ? units.indexOf('MSA') !== -1 : false;\n  }) ?? null;\n  if (!resCol && semTypeColList.length > 0)\n    resCol = semTypeColList[0];\n  return resCol;\n}\n"]}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"notation-converter.d.ts","sourceRoot":"","sources":["notation-converter.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,MAAM,iBAAiB,CAAC;AAEtC,OAAO,EAAC,YAAY,EAAC,MAAM,iBAAiB,CAAC;AAC7C,OAAO,EAAiC,QAAQ,EAAe,YAAY,EAAO,MAAM,iBAAiB,CAAC;AAE1G,iFAAiF;AACjF,qBAAa,iBAAkB,SAAQ,YAAY;IACjD,OAAO,CAAC,SAAS,CAA6B;IAE9C,SAAS,KAAK,QAAQ,IAAI,YAAY,CAIrC;IAEM,OAAO,CAAC,cAAc,EAAE,QAAQ,GAAG,OAAO;IAE1C,WAAW,CAAC,cAAc,EAAE,QAAQ,GAAG,OAAO;IAE9C,MAAM,CAAC,cAAc,EAAE,QAAQ,GAAG,OAAO;IAEhD;;;;;;OAMG;IACH,OAAO,CAAC,uBAAuB;IAoB/B;;;;;OAKG;IACH,OAAO,CAAC,eAAe;IAiBvB,OAAO,CAAC,mBAAmB;IAkB3B;;;;;;;OAOG;IACI,mBAAmB,CACxB,aAAa,EAAE,MAAM,EACrB,eAAe,GAAE,MAAM,GAAG,IAAW,GACpC,MAAM;IAOT;;;;;OAKG;IACH,OAAO,CAAC,aAAa;IAgBrB;;;;;OAKG;IACH,OAAO,CAAC,uBAAuB;IA6B/B;;;;;;;;OAQG;IACH,OAAO,CAAC,WAAW;
|
|
1
|
+
{"version":3,"file":"notation-converter.d.ts","sourceRoot":"","sources":["notation-converter.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,MAAM,iBAAiB,CAAC;AAEtC,OAAO,EAAC,YAAY,EAAC,MAAM,iBAAiB,CAAC;AAC7C,OAAO,EAAiC,QAAQ,EAAe,YAAY,EAAO,MAAM,iBAAiB,CAAC;AAE1G,iFAAiF;AACjF,qBAAa,iBAAkB,SAAQ,YAAY;IACjD,OAAO,CAAC,SAAS,CAA6B;IAE9C,SAAS,KAAK,QAAQ,IAAI,YAAY,CAIrC;IAEM,OAAO,CAAC,cAAc,EAAE,QAAQ,GAAG,OAAO;IAE1C,WAAW,CAAC,cAAc,EAAE,QAAQ,GAAG,OAAO;IAE9C,MAAM,CAAC,cAAc,EAAE,QAAQ,GAAG,OAAO;IAEhD;;;;;;OAMG;IACH,OAAO,CAAC,uBAAuB;IAoB/B;;;;;OAKG;IACH,OAAO,CAAC,eAAe;IAiBvB,OAAO,CAAC,mBAAmB;IAkB3B;;;;;;;OAOG;IACI,mBAAmB,CACxB,aAAa,EAAE,MAAM,EACrB,eAAe,GAAE,MAAM,GAAG,IAAW,GACpC,MAAM;IAOT;;;;;OAKG;IACH,OAAO,CAAC,aAAa;IAgBrB;;;;;OAKG;IACH,OAAO,CAAC,uBAAuB;IA6B/B;;;;;;;;OAQG;IACH,OAAO,CAAC,WAAW;IAqDnB,OAAO,CAAC,sBAAsB;IAK9B;;;;;OAKG;IACI,OAAO,CAAC,WAAW,EAAE,QAAQ,EAAE,YAAY,GAAE,MAAM,GAAG,IAAW,GAAG,EAAE,CAAC,MAAM;gBAsBjE,GAAG,EAAE,EAAE,CAAC,MAAM;CAGlC"}
|
|
@@ -147,16 +147,17 @@ export class NotationConverter extends UnitsHandler {
|
|
|
147
147
|
* SEPARATOR)
|
|
148
148
|
* @return {DG.Column} Converted column
|
|
149
149
|
*/
|
|
150
|
-
convertHelm(tgtNotation, tgtSeparator
|
|
150
|
+
convertHelm(tgtNotation, tgtSeparator, tgtGapSymbol) {
|
|
151
151
|
// This function must not contain calls of isDna() and isRna(), for
|
|
152
152
|
// source helm columns may contain RNA, DNA and PT across different rows
|
|
153
|
-
if (tgtGapSymbol
|
|
153
|
+
if (!tgtGapSymbol) {
|
|
154
154
|
tgtGapSymbol = (this.toFasta(tgtNotation)) ?
|
|
155
155
|
UnitsHandler._defaultGapSymbolsDict.FASTA :
|
|
156
156
|
UnitsHandler._defaultGapSymbolsDict.SEPARATOR;
|
|
157
157
|
}
|
|
158
|
-
if (
|
|
159
|
-
tgtSeparator = this.separator;
|
|
158
|
+
if (!tgtSeparator) {
|
|
159
|
+
tgtSeparator = (this.toFasta(tgtNotation)) ? '' : this.separator;
|
|
160
|
+
}
|
|
160
161
|
const helmWrappersRe = /(R\(|D\(|\)|P)/g;
|
|
161
162
|
const newColumn = this.getNewColumn(tgtNotation);
|
|
162
163
|
// assign the values to the empty column
|
|
@@ -218,8 +219,11 @@ export class NotationConverter extends UnitsHandler {
|
|
|
218
219
|
return this.convertSeparatorToFasta();
|
|
219
220
|
else if (this.isHelm() && this.toFasta(tgtNotation)) // the case of HELM
|
|
220
221
|
return this.convertHelm(tgtNotation);
|
|
221
|
-
else
|
|
222
|
+
else if (this.isHelm() && this.toSeparator(tgtNotation))
|
|
222
223
|
return this.convertHelm(tgtNotation, tgtSeparator);
|
|
224
|
+
else
|
|
225
|
+
throw new Error('Not supported conversion ' +
|
|
226
|
+
`from source notation '${this.notation}' to target notation '${tgtNotation}'.`);
|
|
223
227
|
}
|
|
224
228
|
}
|
|
225
|
-
//# sourceMappingURL=data:application/json;base64,{"version":3,"file":"notation-converter.js","sourceRoot":"","sources":["notation-converter.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,MAAM,iBAAiB,CAAC;AAEtC,OAAO,EAAC,YAAY,EAAC,MAAM,iBAAiB,CAAC;AAC7C,OAAO,EAAC,oBAAoB,EAAE,QAAQ,EAA4C,MAAM,iBAAiB,CAAC;AAE1G,iFAAiF;AACjF,MAAM,OAAO,iBAAkB,SAAQ,YAAY;IA6PjD,YAAmB,GAAc;QAC/B,KAAK,CAAC,GAAG,CAAC,CAAC;QA7PL,cAAS,GAAwB,IAAI,CAAC;IA8P9C,CAAC;IA5PD,IAAc,QAAQ;QACpB,IAAI,IAAI,CAAC,SAAS,KAAK,IAAI;YACzB,IAAI,CAAC,SAAS,GAAG,oBAAoB,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QACrD,OAAO,IAAI,CAAC,SAAS,CAAC;IACxB,CAAC;IAEM,OAAO,CAAC,cAAwB,IAAa,OAAO,cAAc,iCAAmB,CAAC,CAAC,CAAC;IAExF,WAAW,CAAC,cAAwB,IAAa,OAAO,cAAc,yCAAuB,CAAC,CAAC,CAAC;IAEhG,MAAM,CAAC,cAAwB,IAAa,OAAO,cAAc,+BAAkB,CAAC,CAAC,CAAC;IAE7F;;;;;;OAMG;IACK,uBAAuB,CAAC,SAAiB,EAAE,iBAAgC,IAAI;QACrF,IAAI,cAAc,KAAK,IAAI;YACzB,cAAc,GAAG,IAAI,CAAC,gBAAgB,CAAC;QAEzC,MAAM,SAAS,GAAG,IAAI,CAAC,YAAY,sCAAoB,CAAC;QACxD,sDAAsD;QACtD,SAAS,CAAC,IAAI,CAAC,CAAC,GAAW,EAAE,EAAE;YAC7B,MAAM,YAAY,GAAG,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;YAC1C,MAAM,kBAAkB,GAAG,IAAI,CAAC,QAAQ,CAAC,YAAY,CAAC,CAAC;YACvD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,kBAAkB,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;gBAClD,IAAI,kBAAkB,CAAC,CAAC,CAAC,KAAK,cAAc;oBAC1C,kBAAkB,CAAC,CAAC,CAAC,GAAG,YAAY,CAAC,sBAAsB,CAAC,SAAS,CAAC;aACzE;YACD,OAAO,kBAAkB,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;QAC5C,CAAC,CAAC,CAAC;QACH,SAAS,CAAC,MAAM,CAAC,EAAE,CAAC,IAAI,CAAC,KAAK,uCAAqB,CAAC;QACpD,SAAS,CAAC,MAAM,mCAAiB,SAAS,CAAC,CAAC;QAC5C,OAAO,SAAS,CAAC;IACnB,CAAC;IAED;;;;;OAKG;IACK,eAAe;QACrB,MAAM,MAAM,GAAG,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC;YACvC,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC;gBACxB,CAAC,IAAI,CAAC,SAAS,EAAE,CAAC,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC;oBAChC,SAAS,CAAC,CAAC,6CAA6C;QAE9D,IAAI,MAAM,KAAK,SAAS;YACtB,MAAM,IAAI,KAAK,CAAC,iCAAiC,CAAC,CAAC;QAErD,MAAM,OAAO,GAAG,MAAM,CAAC;QACvB,MAAM,WAAW,GAAG,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;YACzC,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,0BAA0B;QACxD,MAAM,YAAY,GAAG,CAAC,IAAI,CAAC,KAAK,EAAE,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,0BAA0B;QAC3F,OAAO,CAAC,MAAM,EAAE,WAAW,EAAE,YAAY,EAAE,OAAO,CAAC,CAAC;IACtD,CAAC;IAED,mDAAmD;IAC3C,mBAAmB,CACzB,aAAqB,EACrB,eAAuB,EACvB,MAAc,EACd,WAAmB,EACnB,YAAoB,EACpB,OAAe;QAEf,MAAM,YAAY,GAAG,IAAI,CAAC,QAAQ,CAAC,aAAa,CAAC,CAAC;QAClD,MAAM,gBAAgB,GAAa,YAAY,CAAC,GAAG,CAAC,CAAC,EAAU,EAAE,EAAE;YACjE,IAAI,EAAE,KAAK,eAAe;gBACxB,OAAO,YAAY,CAAC,sBAAsB,CAAC,IAAI,CAAC;;gBAEhD,OAAO,GAAG,WAAW,GAAG,EAAE,GAAG,YAAY,EAAE,CAAC;QAChD,CAAC,CAAC,CAAC;QACH,OAAO,GAAG,MAAM,GAAG,gBAAgB,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,OAAO,EAAE,CAAC;IAC5D,CAAC;IAED;;;;;;;OAOG;IACI,mBAAmB,CACxB,aAAqB,EACrB,kBAAiC,IAAI;QAErC,IAAI,eAAe,KAAK,IAAI;YAC1B,eAAe,GAAG,IAAI,CAAC,gBAAgB,CAAC;QAC1C,MAAM,CAAC,MAAM,EAAE,WAAW,EAAE,YAAY,EAAE,OAAO,CAAC,GAAG,IAAI,CAAC,eAAe,EAAE,CAAC;QAC5E,OAAO,IAAI,CAAC,mBAAmB,CAAC,aAAa,EAAE,eAAe,EAAE,MAAM,EAAE,WAAW,EAAE,YAAY,EAAE,OAAO,CAAC,CAAC;IAC9G,CAAC;IAED;;;;;OAKG;IACK,aAAa,CAAC,kBAAiC,IAAI;QACzD,IAAI,eAAe,KAAK,IAAI;YAC1B,eAAe,GAAG,IAAI,CAAC,gBAAgB,CAAC;QAE1C,MAAM,CAAC,MAAM,EAAE,WAAW,EAAE,YAAY,EAAE,OAAO,CAAC,GAAG,IAAI,CAAC,eAAe,EAAE,CAAC;QAE5E,MAAM,SAAS,GAAG,IAAI,CAAC,YAAY,4BAAe,CAAC;QACnD,wCAAwC;QACxC,SAAS,CAAC,IAAI,CAAC,CAAC,GAAW,EAAE,EAAE;YAC7B,MAAM,aAAa,GAAG,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;YAC3C,OAAO,IAAI,CAAC,mBAAmB,CAAC,aAAa,EAAE,eAAgB,EAAE,MAAM,EAAE,WAAW,EAAE,YAAY,EAAE,OAAO,CAAC,CAAC;QAC/G,CAAC,CAAC,CAAC;QACH,SAAS,CAAC,MAAM,CAAC,EAAE,CAAC,IAAI,CAAC,KAAK,6BAAgB,CAAC;QAC/C,OAAO,SAAS,CAAC;IACnB,CAAC;IAED;;;;;OAKG;IACK,uBAAuB,CAAC,iBAAgC,IAAI;QAClE,IAAI,cAAc,KAAK,IAAI;YACzB,cAAc,GAAG,YAAY,CAAC,sBAAsB,CAAC,KAAK,CAAC;QAE7D,MAAM,SAAS,GAAG,IAAI,CAAC,YAAY,8BAAgB,CAAC;QACpD,wCAAwC;QACxC,SAAS,CAAC,IAAI,CAAC,CAAC,GAAW,EAAE,EAAE;YAC7B,MAAM,gBAAgB,GAAG,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;YAC9C,sCAAsC;YACtC,MAAM,mBAAmB,GAAG,IAAI,CAAC,QAAQ,CAAC,gBAAgB,CAAC,CAAC;YAC5D,MAAM,kBAAkB,GAAa,EAAE,CAAC;YACxC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,mBAAmB,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;gBACnD,MAAM,IAAI,GAAG,mBAAmB,CAAC,CAAC,CAAC,CAAC;gBACpC,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE;oBACrB,kBAAkB,CAAC,IAAI,CAAC,cAAe,CAAC,CAAC;iBAC1C;qBAAM,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE;oBAC1B,wCAAwC;oBACxC,MAAM,OAAO,GAAG,GAAG,GAAG,IAAI,GAAG,GAAG,CAAC;oBACjC,kBAAkB,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;iBAClC;qBAAM;oBACL,kBAAkB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;iBAC/B;aACF;YACD,OAAO,kBAAkB,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACrC,CAAC,CAAC,CAAC;QACH,SAAS,CAAC,MAAM,CAAC,EAAE,CAAC,IAAI,CAAC,KAAK,+BAAiB,CAAC;QAChD,OAAO,SAAS,CAAC;IACnB,CAAC;IAED;;;;;;;;OAQG;IACK,WAAW,CACjB,WAAmB,EACnB,eAAuB,EAAE,EACzB,eAA8B,IAAI;QAElC,mEAAmE;QACnE,wEAAwE;QACxE,IAAI,YAAY,KAAK,IAAI,EAAE;YACzB,YAAY,GAAG,CAAC,IAAI,CAAC,OAAO,CAAC,WAAuB,CAAC,CAAC,CAAC,CAAC;gBACtD,YAAY,CAAC,sBAAsB,CAAC,KAAK,CAAC,CAAC;gBAC3C,YAAY,CAAC,sBAAsB,CAAC,SAAS,CAAC;SACjD;QAED,IAAI,IAAI,CAAC,WAAW,CAAC,WAAuB,CAAC,IAAI,YAAY,KAAK,EAAE;YAClE,YAAY,GAAG,IAAI,CAAC,SAAS,CAAC;QAEhC,MAAM,cAAc,GAAG,iBAAiB,CAAC;QACzC,MAAM,SAAS,GAAG,IAAI,CAAC,YAAY,CAAC,WAAuB,CAAC,CAAC;QAC7D,wCAAwC;QACxC,SAAS,CAAC,IAAI,CAAC,CAAC,GAAW,EAAE,EAAE;YAC7B,MAAM,WAAW,GAAG,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;YAEzC,mEAAmE;YACnE,mEAAmE;YACnE,0CAA0C;YAC1C,MAAM,YAAY,GAAG,WAAW,CAAC,UAAU,CAAC,KAAK,CAAC,IAAI,WAAW,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC;YAEpF,iCAAiC;YACjC,MAAM,cAAc,GAAG,IAAI,CAAC,QAAQ,CAAC,WAAW,CAAC,CAAC;YAClD,MAAM,gBAAgB,GAAa,EAAE,CAAC;YACtC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,cAAc,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;gBAC9C,IAAI,IAAI,GAAG,cAAc,CAAC,CAAC,CAAC,CAAC;gBAC7B,IAAI,YAAY;oBACd,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,cAAc,EAAE,EAAE,CAAC,CAAC;gBAC1C,IAAI,IAAI,KAAK,YAAY,CAAC,sBAAsB,CAAC,IAAI,EAAE;oBACrD,gBAAgB,CAAC,IAAI,CAAC,YAAa,CAAC,CAAC;iBACtC;qBAAM,IAAI,IAAI,CAAC,OAAO,CAAC,WAAuB,CAAC,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE;oBACnE,2DAA2D;oBAC3D,MAAM,OAAO,GAAG,GAAG,GAAG,IAAI,GAAG,GAAG,CAAC;oBACjC,gBAAgB,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;iBAChC;qBAAM;oBACL,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;iBAC7B;aACF;YACD,OAAO,gBAAgB,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;QAC7C,CAAC,CAAC,CAAC;QAEH,iFAAiF;QACjF,MAAM,QAAQ,GAAiB,oBAAoB,CAAC,SAAS,CAAC,CAAC;QAC/D,MAAM,KAAK,GAAgB,QAAQ,CAAC,SAAS,EAAE,CAAC,EAAE,QAAQ,CAAC,CAAC;QAC5D,MAAM,OAAO,GAAG,KAAK,CAAC,UAAU,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,KAAK,CAAC;QACrD,SAAS,CAAC,MAAM,+BAAe,OAAO,CAAC,CAAC;QAExC,OAAO,SAAS,CAAC;IACnB,CAAC;IAEO,sBAAsB;QAC5B,iDAAiD;QACjD,OAAO,IAAI,CAAC,YAAY,sCAAoB,CAAC;IAC/C,CAAC;IAED;;;;;OAKG;IACI,OAAO,CAAC,WAAqB,EAAE,eAA8B,IAAI;QACtE,sBAAsB;QACtB,IAAI,IAAI,CAAC,QAAQ,KAAK,WAAW;YAC/B,MAAM,IAAI,KAAK,CAAC,yBAAyB,CAAC,CAAC;QAC7C,IAAI,IAAI,CAAC,WAAW,CAAC,WAAW,CAAC,IAAI,YAAY,KAAK,IAAI;YACxD,MAAM,IAAI,KAAK,CAAC,gCAAgC,CAAC,CAAC;QAEpD,IAAI,IAAI,CAAC,OAAO,EAAE,IAAI,IAAI,CAAC,WAAW,CAAC,WAAW,CAAC,IAAI,YAAY,KAAK,IAAI;YAC1E,OAAO,IAAI,CAAC,uBAAuB,CAAC,YAAY,CAAC,CAAC;aAC/C,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,IAAI,IAAI,CAAC,WAAW,EAAE,CAAC,IAAI,IAAI,CAAC,MAAM,CAAC,WAAW,CAAC;YACzE,OAAO,IAAI,CAAC,aAAa,EAAE,CAAC;aACzB,IAAI,IAAI,CAAC,WAAW,EAAE,IAAI,IAAI,CAAC,OAAO,CAAC,WAAW,CAAC;YACtD,OAAO,IAAI,CAAC,uBAAuB,EAAE,CAAC;aACnC,IAAI,IAAI,CAAC,MAAM,EAAE,IAAI,IAAI,CAAC,OAAO,CAAC,WAAW,CAAC,EAAE,mBAAmB;YACtE,OAAO,IAAI,CAAC,WAAW,CAAC,WAAW,CAAC,CAAC;aAClC,iDAAiD;YACpD,OAAO,IAAI,CAAC,WAAW,CAAC,WAAW,EAAE,YAAa,CAAC,CAAC;IACxD,CAAC;CAKF","sourcesContent":["/* Do not change these import lines to match external modules in webpack configuration */\nimport * as grok from 'datagrok-api/grok';\nimport * as ui from 'datagrok-api/ui';\nimport * as DG from 'datagrok-api/dg';\n\nimport {UnitsHandler} from './units-handler';\nimport {getSplitterForColumn, getStats, NOTATION, SeqColStats, SplitterFunc, TAGS} from './macromolecule';\n\n/** Class for handling conversion of notation systems in Macromolecule columns */\nexport class NotationConverter extends UnitsHandler {\n  private _splitter: SplitterFunc | null = null;\n\n  protected get splitter(): SplitterFunc {\n    if (this._splitter === null)\n      this._splitter = getSplitterForColumn(this.column);\n    return this._splitter;\n  }\n\n  public toFasta(targetNotation: NOTATION): boolean { return targetNotation === NOTATION.FASTA; }\n\n  public toSeparator(targetNotation: NOTATION): boolean { return targetNotation === NOTATION.SEPARATOR; }\n\n  public toHelm(targetNotation: NOTATION): boolean { return targetNotation === NOTATION.HELM; }\n\n  /**\n   * Convert a Macromolecule column from FASTA to SEPARATOR notation\n   *\n   * @param {string} separator  A specific separator to be used\n   * @param {string} fastaGapSymbol  Gap symbol in FASTA, '-' by default\n   * @return {DG.Column}        A new column in SEPARATOR notation\n   */\n  private convertFastaToSeparator(separator: string, fastaGapSymbol: string | null = null): DG.Column {\n    if (fastaGapSymbol === null)\n      fastaGapSymbol = this.defaultGapSymbol;\n\n    const newColumn = this.getNewColumn(NOTATION.SEPARATOR);\n    // assign the values to the newly created empty column\n    newColumn.init((idx: number) => {\n      const fastaPolymer = this.column.get(idx);\n      const fastaMonomersArray = this.splitter(fastaPolymer);\n      for (let i = 0; i < fastaMonomersArray.length; i++) {\n        if (fastaMonomersArray[i] === fastaGapSymbol)\n          fastaMonomersArray[i] = UnitsHandler._defaultGapSymbolsDict.SEPARATOR;\n      }\n      return fastaMonomersArray.join(separator);\n    });\n    newColumn.setTag(DG.TAGS.UNITS, NOTATION.SEPARATOR);\n    newColumn.setTag(TAGS.separator, separator);\n    return newColumn;\n  }\n\n  /**\n   * Get the wrapper strings for HELM, depending on the type of the\n   * macromolecule (peptide, DNA, RNA)\n   *\n   * @return {string[]} Array of wrappers\n   */\n  private getHelmWrappers(): string[] {\n    const prefix = (this.isDna()) ? 'DNA1{' :\n      (this.isRna()) ? 'RNA1{' :\n        (this.isPeptide()) ? 'PEPTIDE1{' :\n          'Unknown'; // this case should be handled as exceptional\n\n    if (prefix === 'Unknown')\n      throw new Error('Neither peptide, nor nucleotide');\n\n    const postfix = '}$$$';\n    const leftWrapper = (this.isDna()) ? 'D(' :\n      (this.isRna()) ? 'R(' : ''; // no wrapper for peptides\n    const rightWrapper = (this.isDna() || this.isRna()) ? ')P' : ''; // no wrapper for peptides\n    return [prefix, leftWrapper, rightWrapper, postfix];\n  }\n\n  // A helper function for converting strings to HELM\n  private convertToHelmHelper(\n    sourcePolymer: string,\n    sourceGapSymbol: string,\n    prefix: string,\n    leftWrapper: string,\n    rightWrapper: string,\n    postfix: string\n  ): string {\n    const monomerArray = this.splitter(sourcePolymer);\n    const monomerHelmArray: string[] = monomerArray.map((mm: string) => {\n      if (mm === sourceGapSymbol)\n        return UnitsHandler._defaultGapSymbolsDict.HELM;\n      else\n        return `${leftWrapper}${mm}${rightWrapper}`;\n    });\n    return `${prefix}${monomerHelmArray.join('.')}${postfix}`;\n  }\n\n  /**\n   * Convert a string with SEPARATOR/FASTA notation to HELM\n   *\n   * @param {string} sourcePolymer  A string to be converted\n   * @param {string | null} sourceGapSymbol  An optional gap symbol, set to\n   * default values ('-' for FASTA and '' for SEPARATOR) unless specified\n   * @return {string}  The target HELM string\n   */\n  public convertStringToHelm(\n    sourcePolymer: string,\n    sourceGapSymbol: string | null = null\n  ): string {\n    if (sourceGapSymbol === null)\n      sourceGapSymbol = this.defaultGapSymbol;\n    const [prefix, leftWrapper, rightWrapper, postfix] = this.getHelmWrappers();\n    return this.convertToHelmHelper(sourcePolymer, sourceGapSymbol, prefix, leftWrapper, rightWrapper, postfix);\n  }\n\n  /**\n   * Convert a column to HELM\n   *\n   * @param {string | null} sourceGapSymbol\n   * @return {DG.Column}\n   */\n  private convertToHelm(sourceGapSymbol: string | null = null): DG.Column {\n    if (sourceGapSymbol === null)\n      sourceGapSymbol = this.defaultGapSymbol;\n\n    const [prefix, leftWrapper, rightWrapper, postfix] = this.getHelmWrappers();\n\n    const newColumn = this.getNewColumn(NOTATION.HELM);\n    // assign the values to the empty column\n    newColumn.init((idx: number) => {\n      const sourcePolymer = this.column.get(idx);\n      return this.convertToHelmHelper(sourcePolymer, sourceGapSymbol!, prefix, leftWrapper, rightWrapper, postfix);\n    });\n    newColumn.setTag(DG.TAGS.UNITS, NOTATION.HELM);\n    return newColumn;\n  }\n\n  /**\n   * Convert SEPARATOR column to FASTA notation\n   *\n   * @param {string | null} fastaGapSymbol Optional gap symbol for FASTA\n   * @return {DG.Column}  Converted column\n   */\n  private convertSeparatorToFasta(fastaGapSymbol: string | null = null): DG.Column {\n    if (fastaGapSymbol === null)\n      fastaGapSymbol = UnitsHandler._defaultGapSymbolsDict.FASTA;\n\n    const newColumn = this.getNewColumn(NOTATION.FASTA);\n    // assign the values to the empty column\n    newColumn.init((idx: number) => {\n      const separatorPolymer = this.column.get(idx);\n      // items can be monomers or separators\n      const separatorItemsArray = this.splitter(separatorPolymer);\n      const fastaMonomersArray: string[] = [];\n      for (let i = 0; i < separatorItemsArray.length; i++) {\n        const item = separatorItemsArray[i];\n        if (item.length === 0) {\n          fastaMonomersArray.push(fastaGapSymbol!);\n        } else if (item.length > 1) {\n          // the case of a multi-character monomer\n          const monomer = '[' + item + ']';\n          fastaMonomersArray.push(monomer);\n        } else {\n          fastaMonomersArray.push(item);\n        }\n      }\n      return fastaMonomersArray.join('');\n    });\n    newColumn.setTag(DG.TAGS.UNITS, NOTATION.FASTA);\n    return newColumn;\n  }\n\n  /**\n   *  Convert HELM column to FASTA/SEPARATOR\n   *\n   * @param {string} tgtNotation    Target notation: FASTA or SEPARATOR\n   * @param {string} tgtSeparator   Optional target separator (for HELM ->\n   * @param {string | null} tgtGapSymbol   Optional target gap symbol\n   * SEPARATOR)\n   * @return {DG.Column} Converted column\n   */\n  private convertHelm(\n    tgtNotation: string,\n    tgtSeparator: string = '',\n    tgtGapSymbol: string | null = null\n  ): DG.Column {\n    // This function must not contain calls of isDna() and isRna(), for\n    // source helm columns may contain RNA, DNA and PT across different rows\n    if (tgtGapSymbol === null) {\n      tgtGapSymbol = (this.toFasta(tgtNotation as NOTATION)) ?\n        UnitsHandler._defaultGapSymbolsDict.FASTA :\n        UnitsHandler._defaultGapSymbolsDict.SEPARATOR;\n    }\n\n    if (this.toSeparator(tgtNotation as NOTATION) && tgtSeparator === '')\n      tgtSeparator = this.separator;\n\n    const helmWrappersRe = /(R\\(|D\\(|\\)|P)/g;\n    const newColumn = this.getNewColumn(tgtNotation as NOTATION);\n    // assign the values to the empty column\n    newColumn.init((idx: number) => {\n      const helmPolymer = this.column.get(idx);\n\n      // we cannot use isDna() or isRna() because source helm columns can\n      // contain DNA, RNA and PT in different cells, so the corresponding\n      // tags cannot be set for the whole column\n      const isNucleotide = helmPolymer.startsWith('DNA') || helmPolymer.startsWith('RNA');\n\n      // items can be monomers or helms\n      const helmItemsArray = this.splitter(helmPolymer);\n      const tgtMonomersArray: string[] = [];\n      for (let i = 0; i < helmItemsArray.length; i++) {\n        let item = helmItemsArray[i];\n        if (isNucleotide)\n          item = item.replace(helmWrappersRe, '');\n        if (item === UnitsHandler._defaultGapSymbolsDict.HELM) {\n          tgtMonomersArray.push(tgtGapSymbol!);\n        } else if (this.toFasta(tgtNotation as NOTATION) && item.length > 1) {\n          // the case of a multi-character monomer converted to FASTA\n          const monomer = '[' + item + ']';\n          tgtMonomersArray.push(monomer);\n        } else {\n          tgtMonomersArray.push(item);\n        }\n      }\n      return tgtMonomersArray.join(tgtSeparator);\n    });\n\n    // TAGS.aligned is mandatory for columns of NOTATION.FASTA and NOTATION.SEPARATOR\n    const splitter: SplitterFunc = getSplitterForColumn(newColumn);\n    const stats: SeqColStats = getStats(newColumn, 5, splitter);\n    const aligned = stats.sameLength ? 'SEQ.MSA' : 'SEQ';\n    newColumn.setTag(TAGS.aligned, aligned);\n\n    return newColumn;\n  }\n\n  private convertHelmToSeparator(): DG.Column {\n    // TODO: implementatioreturn this.getNewColumn();\n    return this.getNewColumn(NOTATION.SEPARATOR);\n  }\n\n  /** Dispatcher method for notation conversion\n   *\n   * @param {NOTATION} tgtNotation   Notation we want to convert to\n   * @param {string | null} tgtSeparator   Possible separator\n   * @return {DG.Column}                Converted column\n   */\n  public convert(tgtNotation: NOTATION, tgtSeparator: string | null = null): DG.Column {\n    // possible exceptions\n    if (this.notation === tgtNotation)\n      throw new Error('tgt notation is invalid');\n    if (this.toSeparator(tgtNotation) && tgtSeparator === null)\n      throw new Error('tgt separator is not specified');\n\n    if (this.isFasta() && this.toSeparator(tgtNotation) && tgtSeparator !== null)\n      return this.convertFastaToSeparator(tgtSeparator);\n    else if ((this.isFasta() || this.isSeparator()) && this.toHelm(tgtNotation))\n      return this.convertToHelm();\n    else if (this.isSeparator() && this.toFasta(tgtNotation))\n      return this.convertSeparatorToFasta();\n    else if (this.isHelm() && this.toFasta(tgtNotation)) // the case of HELM\n      return this.convertHelm(tgtNotation);\n    else // this.isHelm() && this.toSeparator(tgtNotation)\n      return this.convertHelm(tgtNotation, tgtSeparator!);\n  }\n\n  public constructor(col: DG.Column) {\n    super(col);\n  }\n}\n"]}
|
|
229
|
+
//# sourceMappingURL=data:application/json;base64,{"version":3,"file":"notation-converter.js","sourceRoot":"","sources":["notation-converter.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,MAAM,iBAAiB,CAAC;AAEtC,OAAO,EAAC,YAAY,EAAC,MAAM,iBAAiB,CAAC;AAC7C,OAAO,EAAC,oBAAoB,EAAE,QAAQ,EAA4C,MAAM,iBAAiB,CAAC;AAE1G,iFAAiF;AACjF,MAAM,OAAO,iBAAkB,SAAQ,YAAY;IA6PjD,YAAmB,GAAc;QAC/B,KAAK,CAAC,GAAG,CAAC,CAAC;QA7PL,cAAS,GAAwB,IAAI,CAAC;IA8P9C,CAAC;IA5PD,IAAc,QAAQ;QACpB,IAAI,IAAI,CAAC,SAAS,KAAK,IAAI;YACzB,IAAI,CAAC,SAAS,GAAG,oBAAoB,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QACrD,OAAO,IAAI,CAAC,SAAS,CAAC;IACxB,CAAC;IAEM,OAAO,CAAC,cAAwB,IAAa,OAAO,cAAc,iCAAmB,CAAC,CAAC,CAAC;IAExF,WAAW,CAAC,cAAwB,IAAa,OAAO,cAAc,yCAAuB,CAAC,CAAC,CAAC;IAEhG,MAAM,CAAC,cAAwB,IAAa,OAAO,cAAc,+BAAkB,CAAC,CAAC,CAAC;IAE7F;;;;;;OAMG;IACK,uBAAuB,CAAC,SAAiB,EAAE,iBAAgC,IAAI;QACrF,IAAI,cAAc,KAAK,IAAI;YACzB,cAAc,GAAG,IAAI,CAAC,gBAAgB,CAAC;QAEzC,MAAM,SAAS,GAAG,IAAI,CAAC,YAAY,sCAAoB,CAAC;QACxD,sDAAsD;QACtD,SAAS,CAAC,IAAI,CAAC,CAAC,GAAW,EAAE,EAAE;YAC7B,MAAM,YAAY,GAAG,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;YAC1C,MAAM,kBAAkB,GAAG,IAAI,CAAC,QAAQ,CAAC,YAAY,CAAC,CAAC;YACvD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,kBAAkB,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;gBAClD,IAAI,kBAAkB,CAAC,CAAC,CAAC,KAAK,cAAc;oBAC1C,kBAAkB,CAAC,CAAC,CAAC,GAAG,YAAY,CAAC,sBAAsB,CAAC,SAAS,CAAC;aACzE;YACD,OAAO,kBAAkB,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;QAC5C,CAAC,CAAC,CAAC;QACH,SAAS,CAAC,MAAM,CAAC,EAAE,CAAC,IAAI,CAAC,KAAK,uCAAqB,CAAC;QACpD,SAAS,CAAC,MAAM,mCAAiB,SAAS,CAAC,CAAC;QAC5C,OAAO,SAAS,CAAC;IACnB,CAAC;IAED;;;;;OAKG;IACK,eAAe;QACrB,MAAM,MAAM,GAAG,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC;YACvC,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC;gBACxB,CAAC,IAAI,CAAC,SAAS,EAAE,CAAC,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC;oBAChC,SAAS,CAAC,CAAC,6CAA6C;QAE9D,IAAI,MAAM,KAAK,SAAS;YACtB,MAAM,IAAI,KAAK,CAAC,iCAAiC,CAAC,CAAC;QAErD,MAAM,OAAO,GAAG,MAAM,CAAC;QACvB,MAAM,WAAW,GAAG,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;YACzC,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,0BAA0B;QACxD,MAAM,YAAY,GAAG,CAAC,IAAI,CAAC,KAAK,EAAE,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,0BAA0B;QAC3F,OAAO,CAAC,MAAM,EAAE,WAAW,EAAE,YAAY,EAAE,OAAO,CAAC,CAAC;IACtD,CAAC;IAED,mDAAmD;IAC3C,mBAAmB,CACzB,aAAqB,EACrB,eAAuB,EACvB,MAAc,EACd,WAAmB,EACnB,YAAoB,EACpB,OAAe;QAEf,MAAM,YAAY,GAAG,IAAI,CAAC,QAAQ,CAAC,aAAa,CAAC,CAAC;QAClD,MAAM,gBAAgB,GAAa,YAAY,CAAC,GAAG,CAAC,CAAC,EAAU,EAAE,EAAE;YACjE,IAAI,EAAE,KAAK,eAAe;gBACxB,OAAO,YAAY,CAAC,sBAAsB,CAAC,IAAI,CAAC;;gBAEhD,OAAO,GAAG,WAAW,GAAG,EAAE,GAAG,YAAY,EAAE,CAAC;QAChD,CAAC,CAAC,CAAC;QACH,OAAO,GAAG,MAAM,GAAG,gBAAgB,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,OAAO,EAAE,CAAC;IAC5D,CAAC;IAED;;;;;;;OAOG;IACI,mBAAmB,CACxB,aAAqB,EACrB,kBAAiC,IAAI;QAErC,IAAI,eAAe,KAAK,IAAI;YAC1B,eAAe,GAAG,IAAI,CAAC,gBAAgB,CAAC;QAC1C,MAAM,CAAC,MAAM,EAAE,WAAW,EAAE,YAAY,EAAE,OAAO,CAAC,GAAG,IAAI,CAAC,eAAe,EAAE,CAAC;QAC5E,OAAO,IAAI,CAAC,mBAAmB,CAAC,aAAa,EAAE,eAAe,EAAE,MAAM,EAAE,WAAW,EAAE,YAAY,EAAE,OAAO,CAAC,CAAC;IAC9G,CAAC;IAED;;;;;OAKG;IACK,aAAa,CAAC,kBAAiC,IAAI;QACzD,IAAI,eAAe,KAAK,IAAI;YAC1B,eAAe,GAAG,IAAI,CAAC,gBAAgB,CAAC;QAE1C,MAAM,CAAC,MAAM,EAAE,WAAW,EAAE,YAAY,EAAE,OAAO,CAAC,GAAG,IAAI,CAAC,eAAe,EAAE,CAAC;QAE5E,MAAM,SAAS,GAAG,IAAI,CAAC,YAAY,4BAAe,CAAC;QACnD,wCAAwC;QACxC,SAAS,CAAC,IAAI,CAAC,CAAC,GAAW,EAAE,EAAE;YAC7B,MAAM,aAAa,GAAG,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;YAC3C,OAAO,IAAI,CAAC,mBAAmB,CAAC,aAAa,EAAE,eAAgB,EAAE,MAAM,EAAE,WAAW,EAAE,YAAY,EAAE,OAAO,CAAC,CAAC;QAC/G,CAAC,CAAC,CAAC;QACH,SAAS,CAAC,MAAM,CAAC,EAAE,CAAC,IAAI,CAAC,KAAK,6BAAgB,CAAC;QAC/C,OAAO,SAAS,CAAC;IACnB,CAAC;IAED;;;;;OAKG;IACK,uBAAuB,CAAC,iBAAgC,IAAI;QAClE,IAAI,cAAc,KAAK,IAAI;YACzB,cAAc,GAAG,YAAY,CAAC,sBAAsB,CAAC,KAAK,CAAC;QAE7D,MAAM,SAAS,GAAG,IAAI,CAAC,YAAY,8BAAgB,CAAC;QACpD,wCAAwC;QACxC,SAAS,CAAC,IAAI,CAAC,CAAC,GAAW,EAAE,EAAE;YAC7B,MAAM,gBAAgB,GAAG,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;YAC9C,sCAAsC;YACtC,MAAM,mBAAmB,GAAG,IAAI,CAAC,QAAQ,CAAC,gBAAgB,CAAC,CAAC;YAC5D,MAAM,kBAAkB,GAAa,EAAE,CAAC;YACxC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,mBAAmB,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;gBACnD,MAAM,IAAI,GAAG,mBAAmB,CAAC,CAAC,CAAC,CAAC;gBACpC,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE;oBACrB,kBAAkB,CAAC,IAAI,CAAC,cAAe,CAAC,CAAC;iBAC1C;qBAAM,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE;oBAC1B,wCAAwC;oBACxC,MAAM,OAAO,GAAG,GAAG,GAAG,IAAI,GAAG,GAAG,CAAC;oBACjC,kBAAkB,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;iBAClC;qBAAM;oBACL,kBAAkB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;iBAC/B;aACF;YACD,OAAO,kBAAkB,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACrC,CAAC,CAAC,CAAC;QACH,SAAS,CAAC,MAAM,CAAC,EAAE,CAAC,IAAI,CAAC,KAAK,+BAAiB,CAAC;QAChD,OAAO,SAAS,CAAC;IACnB,CAAC;IAED;;;;;;;;OAQG;IACK,WAAW,CAAC,WAAmB,EAAE,YAAqB,EAAE,YAAqB;QACnF,mEAAmE;QACnE,wEAAwE;QACxE,IAAI,CAAC,YAAY,EAAE;YACjB,YAAY,GAAG,CAAC,IAAI,CAAC,OAAO,CAAC,WAAuB,CAAC,CAAC,CAAC,CAAC;gBACtD,YAAY,CAAC,sBAAsB,CAAC,KAAK,CAAC,CAAC;gBAC3C,YAAY,CAAC,sBAAsB,CAAC,SAAS,CAAC;SACjD;QAED,IAAI,CAAC,YAAY,EAAE;YACjB,YAAY,GAAG,CAAC,IAAI,CAAC,OAAO,CAAC,WAAuB,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC;SAC9E;QAED,MAAM,cAAc,GAAG,iBAAiB,CAAC;QACzC,MAAM,SAAS,GAAG,IAAI,CAAC,YAAY,CAAC,WAAuB,CAAC,CAAC;QAC7D,wCAAwC;QACxC,SAAS,CAAC,IAAI,CAAC,CAAC,GAAW,EAAE,EAAE;YAC7B,MAAM,WAAW,GAAG,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;YAEzC,mEAAmE;YACnE,mEAAmE;YACnE,0CAA0C;YAC1C,MAAM,YAAY,GAAG,WAAW,CAAC,UAAU,CAAC,KAAK,CAAC,IAAI,WAAW,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC;YAEpF,iCAAiC;YACjC,MAAM,cAAc,GAAG,IAAI,CAAC,QAAQ,CAAC,WAAW,CAAC,CAAC;YAClD,MAAM,gBAAgB,GAAa,EAAE,CAAC;YACtC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,cAAc,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;gBAC9C,IAAI,IAAI,GAAG,cAAc,CAAC,CAAC,CAAC,CAAC;gBAC7B,IAAI,YAAY;oBACd,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,cAAc,EAAE,EAAE,CAAC,CAAC;gBAC1C,IAAI,IAAI,KAAK,YAAY,CAAC,sBAAsB,CAAC,IAAI,EAAE;oBACrD,gBAAgB,CAAC,IAAI,CAAC,YAAa,CAAC,CAAC;iBACtC;qBAAM,IAAI,IAAI,CAAC,OAAO,CAAC,WAAuB,CAAC,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE;oBACnE,2DAA2D;oBAC3D,MAAM,OAAO,GAAG,GAAG,GAAG,IAAI,GAAG,GAAG,CAAC;oBACjC,gBAAgB,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;iBAChC;qBAAM;oBACL,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;iBAC7B;aACF;YACD,OAAO,gBAAgB,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;QAC7C,CAAC,CAAC,CAAC;QAEH,iFAAiF;QACjF,MAAM,QAAQ,GAAiB,oBAAoB,CAAC,SAAS,CAAC,CAAC;QAC/D,MAAM,KAAK,GAAgB,QAAQ,CAAC,SAAS,EAAE,CAAC,EAAE,QAAQ,CAAC,CAAC;QAC5D,MAAM,OAAO,GAAG,KAAK,CAAC,UAAU,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,KAAK,CAAC;QACrD,SAAS,CAAC,MAAM,+BAAe,OAAO,CAAC,CAAC;QAExC,OAAO,SAAS,CAAC;IACnB,CAAC;IAEO,sBAAsB;QAC5B,iDAAiD;QACjD,OAAO,IAAI,CAAC,YAAY,sCAAoB,CAAC;IAC/C,CAAC;IAED;;;;;OAKG;IACI,OAAO,CAAC,WAAqB,EAAE,eAA8B,IAAI;QACtE,sBAAsB;QACtB,IAAI,IAAI,CAAC,QAAQ,KAAK,WAAW;YAC/B,MAAM,IAAI,KAAK,CAAC,yBAAyB,CAAC,CAAC;QAC7C,IAAI,IAAI,CAAC,WAAW,CAAC,WAAW,CAAC,IAAI,YAAY,KAAK,IAAI;YACxD,MAAM,IAAI,KAAK,CAAC,gCAAgC,CAAC,CAAC;QAEpD,IAAI,IAAI,CAAC,OAAO,EAAE,IAAI,IAAI,CAAC,WAAW,CAAC,WAAW,CAAC,IAAI,YAAY,KAAK,IAAI;YAC1E,OAAO,IAAI,CAAC,uBAAuB,CAAC,YAAY,CAAC,CAAC;aAC/C,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,IAAI,IAAI,CAAC,WAAW,EAAE,CAAC,IAAI,IAAI,CAAC,MAAM,CAAC,WAAW,CAAC;YACzE,OAAO,IAAI,CAAC,aAAa,EAAE,CAAC;aACzB,IAAI,IAAI,CAAC,WAAW,EAAE,IAAI,IAAI,CAAC,OAAO,CAAC,WAAW,CAAC;YACtD,OAAO,IAAI,CAAC,uBAAuB,EAAE,CAAC;aACnC,IAAI,IAAI,CAAC,MAAM,EAAE,IAAI,IAAI,CAAC,OAAO,CAAC,WAAW,CAAC,EAAE,mBAAmB;YACtE,OAAO,IAAI,CAAC,WAAW,CAAC,WAAW,CAAC,CAAC;aAClC,IAAI,IAAI,CAAC,MAAM,EAAE,IAAI,IAAI,CAAC,WAAW,CAAC,WAAW,CAAC;YACrD,OAAO,IAAI,CAAC,WAAW,CAAC,WAAW,EAAE,YAAa,CAAC,CAAC;;YAEpD,MAAM,IAAI,KAAK,CAAC,2BAA2B;gBACzC,yBAAyB,IAAI,CAAC,QAAQ,yBAAyB,WAAW,IAAI,CAAC,CAAC;IACtF,CAAC;CAKF","sourcesContent":["/* Do not change these import lines to match external modules in webpack configuration */\nimport * as grok from 'datagrok-api/grok';\nimport * as ui from 'datagrok-api/ui';\nimport * as DG from 'datagrok-api/dg';\n\nimport {UnitsHandler} from './units-handler';\nimport {getSplitterForColumn, getStats, NOTATION, SeqColStats, SplitterFunc, TAGS} from './macromolecule';\n\n/** Class for handling conversion of notation systems in Macromolecule columns */\nexport class NotationConverter extends UnitsHandler {\n  private _splitter: SplitterFunc | null = null;\n\n  protected get splitter(): SplitterFunc {\n    if (this._splitter === null)\n      this._splitter = getSplitterForColumn(this.column);\n    return this._splitter;\n  }\n\n  public toFasta(targetNotation: NOTATION): boolean { return targetNotation === NOTATION.FASTA; }\n\n  public toSeparator(targetNotation: NOTATION): boolean { return targetNotation === NOTATION.SEPARATOR; }\n\n  public toHelm(targetNotation: NOTATION): boolean { return targetNotation === NOTATION.HELM; }\n\n  /**\n   * Convert a Macromolecule column from FASTA to SEPARATOR notation\n   *\n   * @param {string} separator  A specific separator to be used\n   * @param {string} fastaGapSymbol  Gap symbol in FASTA, '-' by default\n   * @return {DG.Column}        A new column in SEPARATOR notation\n   */\n  private convertFastaToSeparator(separator: string, fastaGapSymbol: string | null = null): DG.Column {\n    if (fastaGapSymbol === null)\n      fastaGapSymbol = this.defaultGapSymbol;\n\n    const newColumn = this.getNewColumn(NOTATION.SEPARATOR);\n    // assign the values to the newly created empty column\n    newColumn.init((idx: number) => {\n      const fastaPolymer = this.column.get(idx);\n      const fastaMonomersArray = this.splitter(fastaPolymer);\n      for (let i = 0; i < fastaMonomersArray.length; i++) {\n        if (fastaMonomersArray[i] === fastaGapSymbol)\n          fastaMonomersArray[i] = UnitsHandler._defaultGapSymbolsDict.SEPARATOR;\n      }\n      return fastaMonomersArray.join(separator);\n    });\n    newColumn.setTag(DG.TAGS.UNITS, NOTATION.SEPARATOR);\n    newColumn.setTag(TAGS.separator, separator);\n    return newColumn;\n  }\n\n  /**\n   * Get the wrapper strings for HELM, depending on the type of the\n   * macromolecule (peptide, DNA, RNA)\n   *\n   * @return {string[]} Array of wrappers\n   */\n  private getHelmWrappers(): string[] {\n    const prefix = (this.isDna()) ? 'DNA1{' :\n      (this.isRna()) ? 'RNA1{' :\n        (this.isPeptide()) ? 'PEPTIDE1{' :\n          'Unknown'; // this case should be handled as exceptional\n\n    if (prefix === 'Unknown')\n      throw new Error('Neither peptide, nor nucleotide');\n\n    const postfix = '}$$$';\n    const leftWrapper = (this.isDna()) ? 'D(' :\n      (this.isRna()) ? 'R(' : ''; // no wrapper for peptides\n    const rightWrapper = (this.isDna() || this.isRna()) ? ')P' : ''; // no wrapper for peptides\n    return [prefix, leftWrapper, rightWrapper, postfix];\n  }\n\n  // A helper function for converting strings to HELM\n  private convertToHelmHelper(\n    sourcePolymer: string,\n    sourceGapSymbol: string,\n    prefix: string,\n    leftWrapper: string,\n    rightWrapper: string,\n    postfix: string\n  ): string {\n    const monomerArray = this.splitter(sourcePolymer);\n    const monomerHelmArray: string[] = monomerArray.map((mm: string) => {\n      if (mm === sourceGapSymbol)\n        return UnitsHandler._defaultGapSymbolsDict.HELM;\n      else\n        return `${leftWrapper}${mm}${rightWrapper}`;\n    });\n    return `${prefix}${monomerHelmArray.join('.')}${postfix}`;\n  }\n\n  /**\n   * Convert a string with SEPARATOR/FASTA notation to HELM\n   *\n   * @param {string} sourcePolymer  A string to be converted\n   * @param {string | null} sourceGapSymbol  An optional gap symbol, set to\n   * default values ('-' for FASTA and '' for SEPARATOR) unless specified\n   * @return {string}  The target HELM string\n   */\n  public convertStringToHelm(\n    sourcePolymer: string,\n    sourceGapSymbol: string | null = null\n  ): string {\n    if (sourceGapSymbol === null)\n      sourceGapSymbol = this.defaultGapSymbol;\n    const [prefix, leftWrapper, rightWrapper, postfix] = this.getHelmWrappers();\n    return this.convertToHelmHelper(sourcePolymer, sourceGapSymbol, prefix, leftWrapper, rightWrapper, postfix);\n  }\n\n  /**\n   * Convert a column to HELM\n   *\n   * @param {string | null} sourceGapSymbol\n   * @return {DG.Column}\n   */\n  private convertToHelm(sourceGapSymbol: string | null = null): DG.Column {\n    if (sourceGapSymbol === null)\n      sourceGapSymbol = this.defaultGapSymbol;\n\n    const [prefix, leftWrapper, rightWrapper, postfix] = this.getHelmWrappers();\n\n    const newColumn = this.getNewColumn(NOTATION.HELM);\n    // assign the values to the empty column\n    newColumn.init((idx: number) => {\n      const sourcePolymer = this.column.get(idx);\n      return this.convertToHelmHelper(sourcePolymer, sourceGapSymbol!, prefix, leftWrapper, rightWrapper, postfix);\n    });\n    newColumn.setTag(DG.TAGS.UNITS, NOTATION.HELM);\n    return newColumn;\n  }\n\n  /**\n   * Convert SEPARATOR column to FASTA notation\n   *\n   * @param {string | null} fastaGapSymbol Optional gap symbol for FASTA\n   * @return {DG.Column}  Converted column\n   */\n  private convertSeparatorToFasta(fastaGapSymbol: string | null = null): DG.Column {\n    if (fastaGapSymbol === null)\n      fastaGapSymbol = UnitsHandler._defaultGapSymbolsDict.FASTA;\n\n    const newColumn = this.getNewColumn(NOTATION.FASTA);\n    // assign the values to the empty column\n    newColumn.init((idx: number) => {\n      const separatorPolymer = this.column.get(idx);\n      // items can be monomers or separators\n      const separatorItemsArray = this.splitter(separatorPolymer);\n      const fastaMonomersArray: string[] = [];\n      for (let i = 0; i < separatorItemsArray.length; i++) {\n        const item = separatorItemsArray[i];\n        if (item.length === 0) {\n          fastaMonomersArray.push(fastaGapSymbol!);\n        } else if (item.length > 1) {\n          // the case of a multi-character monomer\n          const monomer = '[' + item + ']';\n          fastaMonomersArray.push(monomer);\n        } else {\n          fastaMonomersArray.push(item);\n        }\n      }\n      return fastaMonomersArray.join('');\n    });\n    newColumn.setTag(DG.TAGS.UNITS, NOTATION.FASTA);\n    return newColumn;\n  }\n\n  /**\n   *  Convert HELM column to FASTA/SEPARATOR\n   *\n   * @param {string} tgtNotation    Target notation: FASTA or SEPARATOR\n   * @param {string} tgtSeparator   Optional target separator (for HELM ->\n   * @param {string | null} tgtGapSymbol   Optional target gap symbol\n   * SEPARATOR)\n   * @return {DG.Column} Converted column\n   */\n  private convertHelm(tgtNotation: string, tgtSeparator?: string, tgtGapSymbol?: string): DG.Column {\n    // This function must not contain calls of isDna() and isRna(), for\n    // source helm columns may contain RNA, DNA and PT across different rows\n    if (!tgtGapSymbol) {\n      tgtGapSymbol = (this.toFasta(tgtNotation as NOTATION)) ?\n        UnitsHandler._defaultGapSymbolsDict.FASTA :\n        UnitsHandler._defaultGapSymbolsDict.SEPARATOR;\n    }\n\n    if (!tgtSeparator) {\n      tgtSeparator = (this.toFasta(tgtNotation as NOTATION)) ? '' : this.separator;\n    }\n\n    const helmWrappersRe = /(R\\(|D\\(|\\)|P)/g;\n    const newColumn = this.getNewColumn(tgtNotation as NOTATION);\n    // assign the values to the empty column\n    newColumn.init((idx: number) => {\n      const helmPolymer = this.column.get(idx);\n\n      // we cannot use isDna() or isRna() because source helm columns can\n      // contain DNA, RNA and PT in different cells, so the corresponding\n      // tags cannot be set for the whole column\n      const isNucleotide = helmPolymer.startsWith('DNA') || helmPolymer.startsWith('RNA');\n\n      // items can be monomers or helms\n      const helmItemsArray = this.splitter(helmPolymer);\n      const tgtMonomersArray: string[] = [];\n      for (let i = 0; i < helmItemsArray.length; i++) {\n        let item = helmItemsArray[i];\n        if (isNucleotide)\n          item = item.replace(helmWrappersRe, '');\n        if (item === UnitsHandler._defaultGapSymbolsDict.HELM) {\n          tgtMonomersArray.push(tgtGapSymbol!);\n        } else if (this.toFasta(tgtNotation as NOTATION) && item.length > 1) {\n          // the case of a multi-character monomer converted to FASTA\n          const monomer = '[' + item + ']';\n          tgtMonomersArray.push(monomer);\n        } else {\n          tgtMonomersArray.push(item);\n        }\n      }\n      return tgtMonomersArray.join(tgtSeparator);\n    });\n\n    // TAGS.aligned is mandatory for columns of NOTATION.FASTA and NOTATION.SEPARATOR\n    const splitter: SplitterFunc = getSplitterForColumn(newColumn);\n    const stats: SeqColStats = getStats(newColumn, 5, splitter);\n    const aligned = stats.sameLength ? 'SEQ.MSA' : 'SEQ';\n    newColumn.setTag(TAGS.aligned, aligned);\n\n    return newColumn;\n  }\n\n  private convertHelmToSeparator(): DG.Column {\n    // TODO: implementatioreturn this.getNewColumn();\n    return this.getNewColumn(NOTATION.SEPARATOR);\n  }\n\n  /** Dispatcher method for notation conversion\n   *\n   * @param {NOTATION} tgtNotation   Notation we want to convert to\n   * @param {string | null} tgtSeparator   Possible separator\n   * @return {DG.Column}                Converted column\n   */\n  public convert(tgtNotation: NOTATION, tgtSeparator: string | null = null): DG.Column {\n    // possible exceptions\n    if (this.notation === tgtNotation)\n      throw new Error('tgt notation is invalid');\n    if (this.toSeparator(tgtNotation) && tgtSeparator === null)\n      throw new Error('tgt separator is not specified');\n\n    if (this.isFasta() && this.toSeparator(tgtNotation) && tgtSeparator !== null)\n      return this.convertFastaToSeparator(tgtSeparator);\n    else if ((this.isFasta() || this.isSeparator()) && this.toHelm(tgtNotation))\n      return this.convertToHelm();\n    else if (this.isSeparator() && this.toFasta(tgtNotation))\n      return this.convertSeparatorToFasta();\n    else if (this.isHelm() && this.toFasta(tgtNotation)) // the case of HELM\n      return this.convertHelm(tgtNotation);\n    else if (this.isHelm() && this.toSeparator(tgtNotation))\n      return this.convertHelm(tgtNotation, tgtSeparator!);\n    else\n      throw new Error('Not supported conversion ' +\n        `from source notation '${this.notation}' to target notation '${tgtNotation}'.`);\n  }\n\n  public constructor(col: DG.Column) {\n    super(col);\n  }\n}\n"]}
|
|
@@ -19,7 +19,7 @@ export declare class UnitsHandler {
|
|
|
19
19
|
protected get column(): DG.Column;
|
|
20
20
|
get notation(): NOTATION;
|
|
21
21
|
get defaultGapSymbol(): string;
|
|
22
|
-
get separator(): string;
|
|
22
|
+
get separator(): string | undefined;
|
|
23
23
|
get aligned(): string;
|
|
24
24
|
/** Alphabet name (upper case) */
|
|
25
25
|
get alphabet(): string;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"units-handler.d.ts","sourceRoot":"","sources":["units-handler.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,MAAM,iBAAiB,CAAC;
|
|
1
|
+
{"version":3,"file":"units-handler.d.ts","sourceRoot":"","sources":["units-handler.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,MAAM,iBAAiB,CAAC;AAEtC,OAAO,EAKL,QAAQ,EAKT,MAAM,iBAAiB,CAAC;AAEzB,iEAAiE;AACjE,qBAAa,YAAY;IACvB,SAAS,CAAC,QAAQ,CAAC,OAAO,EAAE,EAAE,CAAC,MAAM,CAAC;IACtC,SAAS,CAAC,MAAM,EAAE,MAAM,CAAC;IACzB,SAAS,CAAC,SAAS,EAAE,QAAQ,CAAC;IAC9B,SAAS,CAAC,iBAAiB,EAAE,MAAM,CAAC;IACpC,SAAS,CAAC,MAAM,CAAC,QAAQ,CAAC,sBAAsB;;;;MAI9C;IAEF,gBAAuB,oBAAoB,cAGxC;IACH,gBAAuB,gBAAgB,cAAyC;IAChF,gBAAuB,gBAAgB,cAAyC;WAElE,qBAAqB,CAAC,GAAG,EAAE,EAAE,CAAC,MAAM;IAalD,SAAS,KAAK,KAAK,IAAI,MAAM,CAAwB;IAErD,SAAS,KAAK,MAAM,IAAI,EAAE,CAAC,MAAM,CAAyB;IAE1D,IAAW,QAAQ,IAAI,QAAQ,CAA2B;IAE1D,IAAW,gBAAgB,IAAI,MAAM,CAAmC;IAExE,IAAW,SAAS,IAAI,MAAM,GAAG,SAAS,CAKzC;IAED,IAAW,OAAO,IAAI,MAAM,CAQ3B;IAED,iCAAiC;IACjC,IAAW,QAAQ,IAAI,MAAM,CAQ5B;IAEM,eAAe,IAAI,MAAM;IA6BzB,sBAAsB,IAAI,OAAO;IAOjC,OAAO,IAAI,OAAO;IAElB,WAAW,IAAI,OAAO;IAEtB,MAAM,IAAI,OAAO;IAEjB,KAAK,IAAI,OAAO;IAEhB,KAAK,IAAI,OAAO;IAEhB,SAAS,IAAI,OAAO;IAEpB,KAAK,IAAI,OAAO;IAEvB,4DAA4D;IAC5D;;OAEG;IACH,SAAS,CAAC,WAAW,IAAI,QAAQ;IAWjC;;;;;;OAMG;IACH,SAAS,CAAC,YAAY,CAAC,cAAc,EAAE,QAAQ,GAAG,EAAE,CAAC,MAAM;IAkC3D;;;;;;OAMG;WACW,YAAY,CAAC,WAAW,EAAE,EAAE,CAAC,MAAM,GAAG,EAAE,CAAC,MAAM;IAM7D;;;;;OAKG;WACW,kBAAkB,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO;IASxD;;;;;;;;OAQG;WACW,sBAAsB,CAClC,GAAG,EAAE,MAAM,EACX,IAAI,EAAE,MAAM,EACZ,KAAK,EAAE,MAAM,GACZ,EAAE,CAAC,MAAM;gBAYO,GAAG,EAAE,EAAE,CAAC,MAAM;CAoClC"}
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import * as DG from 'datagrok-api/dg';
|
|
2
|
-
import { getStats, splitterAsFasta
|
|
2
|
+
import { detectAlphabet, getSplitterForColumn, getStats, splitterAsFasta } from './macromolecule';
|
|
3
3
|
/** Class for handling notation units in Macromolecule columns */
|
|
4
4
|
export class UnitsHandler {
|
|
5
5
|
constructor(col) {
|
|
@@ -18,14 +18,14 @@ export class UnitsHandler {
|
|
|
18
18
|
throw new Error(`For column '${this.column.name}' of notation '${this.notation}' ` +
|
|
19
19
|
`tag '${"aligned" /* TAGS.aligned */}' is mandatory.`);
|
|
20
20
|
}
|
|
21
|
-
if (!this.column.tags.has(
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
}
|
|
21
|
+
// if (!this.column.tags.has(TAGS.alphabetSize)) {
|
|
22
|
+
// if (this.isHelm())
|
|
23
|
+
// throw new Error(`For column '${this.column.name}' of notation '${this.notation}' ` +
|
|
24
|
+
// `tag '${TAGS.alphabetSize}' is mandatory.`);
|
|
25
|
+
// else if (['UN'].includes(this.alphabet))
|
|
26
|
+
// throw new Error(`For column '${this.column.name}' of alphabet '${this.alphabet}' ` +
|
|
27
|
+
// `tag '${TAGS.alphabetSize}' is mandatory.`);
|
|
28
|
+
// }
|
|
29
29
|
if (!this.column.tags.has(".alphabetIsMultichar" /* TAGS.alphabetIsMultichar */)) {
|
|
30
30
|
if (this.isHelm())
|
|
31
31
|
throw new Error(`For column '${this.column.name}' of notation '${this.notation}' ` +
|
|
@@ -50,11 +50,11 @@ export class UnitsHandler {
|
|
|
50
50
|
get notation() { return this._notation; }
|
|
51
51
|
get defaultGapSymbol() { return this._defaultGapSymbol; }
|
|
52
52
|
get separator() {
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
53
|
+
var _a;
|
|
54
|
+
const separator = (_a = this.column.getTag("separator" /* TAGS.separator */)) !== null && _a !== void 0 ? _a : undefined;
|
|
55
|
+
if (this.notation === "separator" /* NOTATION.SEPARATOR */ && separator === undefined)
|
|
56
|
+
throw new Error(`Separator is mandatory for column '${this.column.name}' of notation '${this.notation}'.`);
|
|
57
|
+
return separator;
|
|
58
58
|
}
|
|
59
59
|
get aligned() {
|
|
60
60
|
const aligned = this.column.getTag("aligned" /* TAGS.aligned */);
|
|
@@ -73,7 +73,17 @@ export class UnitsHandler {
|
|
|
73
73
|
}
|
|
74
74
|
getAlphabetSize() {
|
|
75
75
|
if (this.notation == "helm" /* NOTATION.HELM */ || this.alphabet == "UN" /* ALPHABET.UN */) {
|
|
76
|
-
const
|
|
76
|
+
const alphabetSizeStr = this.column.getTag(".alphabetSize" /* TAGS.alphabetSize */);
|
|
77
|
+
let alphabetSize;
|
|
78
|
+
if (alphabetSizeStr) {
|
|
79
|
+
alphabetSize = parseInt(alphabetSizeStr);
|
|
80
|
+
}
|
|
81
|
+
else {
|
|
82
|
+
// calculate alphabetSize on demand
|
|
83
|
+
const splitter = getSplitterForColumn(this.column);
|
|
84
|
+
const stats = getStats(this.column, 1, splitter);
|
|
85
|
+
alphabetSize = Object.keys(stats.freq).length;
|
|
86
|
+
}
|
|
77
87
|
return alphabetSize;
|
|
78
88
|
}
|
|
79
89
|
else {
|
|
@@ -209,4 +219,4 @@ UnitsHandler.PeptideFastaAlphabet = new Set([
|
|
|
209
219
|
]);
|
|
210
220
|
UnitsHandler.DnaFastaAlphabet = new Set(['A', 'C', 'G', 'T']);
|
|
211
221
|
UnitsHandler.RnaFastaAlphabet = new Set(['A', 'C', 'G', 'U']);
|
|
212
|
-
//# sourceMappingURL=data:application/json;base64,{"version":3,"file":"units-handler.js","sourceRoot":"","sources":["units-handler.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,MAAM,iBAAiB,CAAC;AAItC,OAAO,EAEL,QAAQ,EAER,eAAe,EACf,cAAc,EACf,MAAM,iBAAiB,CAAC;AAEzB,iEAAiE;AACjE,MAAM,OAAO,YAAY;IA0NvB,YAAmB,GAAc;QAC/B,IAAI,CAAC,OAAO,GAAG,GAAG,CAAC;QACnB,MAAM,KAAK,GAAG,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QAC/C,IAAI,KAAK,KAAK,IAAI;YAChB,IAAI,CAAC,MAAM,GAAG,KAAK,CAAC;;YAEpB,MAAM,IAAI,KAAK,CAAC,mCAAmC,CAAC,CAAC;QACvD,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC,WAAW,EAAE,CAAC;QACpC,IAAI,CAAC,iBAAiB,GAAG,CAAC,IAAI,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,sBAAsB,CAAC,KAAK,CAAC,CAAC;YACrF,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,sBAAsB,CAAC,IAAI,CAAC,CAAC;gBAC1D,YAAY,CAAC,sBAAsB,CAAC,SAAS,CAAC;QAElD,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,8BAAc,EAAE;YACvC,IAAI,IAAI,CAAC,OAAO,EAAE,IAAI,IAAI,CAAC,WAAW,EAAE;gBACtC,MAAM,IAAI,KAAK,CAAC,eAAe,IAAI,CAAC,MAAM,CAAC,IAAI,kBAAkB,IAAI,CAAC,QAAQ,IAAI;oBAChF,QAAQ,4BAAY,iBAAiB,CAAC,CAAC;SAC5C;QAED,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,yCAAmB,EAAE;YAC5C,IAAI,IAAI,CAAC,MAAM,EAAE;gBACf,MAAM,IAAI,KAAK,CAAC,eAAe,IAAI,CAAC,MAAM,CAAC,IAAI,kBAAkB,IAAI,CAAC,QAAQ,IAAI;oBAChF,QAAQ,uCAAiB,iBAAiB,CAAC,CAAC;iBAC3C,IAAI,CAAC,IAAI,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,QAAQ,CAAC;gBACrC,MAAM,IAAI,KAAK,CAAC,eAAe,IAAI,CAAC,MAAM,CAAC,IAAI,kBAAkB,IAAI,CAAC,QAAQ,IAAI;oBAChF,QAAQ,uCAAiB,iBAAiB,CAAC,CAAC;SACjD;QAED,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,uDAA0B,EAAE;YACnD,IAAI,IAAI,CAAC,MAAM,EAAE;gBACf,MAAM,IAAI,KAAK,CAAC,eAAe,IAAI,CAAC,MAAM,CAAC,IAAI,kBAAkB,IAAI,CAAC,QAAQ,IAAI;oBAChF,QAAQ,qDAAwB,iBAAiB,CAAC,CAAC;iBAClD,IAAI,CAAC,IAAI,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,QAAQ,CAAC;gBACrC,MAAM,IAAI,KAAK,CAAC,eAAe,IAAI,CAAC,MAAM,CAAC,IAAI,kBAAkB,IAAI,CAAC,QAAQ,IAAI;oBAChF,QAAQ,qDAAwB,iBAAiB,CAAC,CAAC;SACxD;IACH,CAAC;IA3OM,MAAM,CAAC,qBAAqB,CAAC,GAAc;QAChD,IAAI,GAAG,CAAC,OAAO,KAAK,EAAE,CAAC,OAAO,CAAC,aAAa;YAC1C,MAAM,IAAI,KAAK,CAAC,oCAAoC,CAAC,CAAC;QAExD,MAAM,KAAK,GAAgB,QAAQ,CAAC,GAAG,EAAE,CAAC,EAAE,eAAe,CAAC,CAAC;QAC7D,MAAM,OAAO,GAAG,KAAK,CAAC,UAAU,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,KAAK,CAAC;QACrD,MAAM,QAAQ,GAAG,cAAc,CAAC,KAAK,CAAC,CAAC;QAEvC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,IAAI,CAAC,KAAK,+BAAiB,CAAC;QAC1C,GAAG,CAAC,MAAM,+BAAe,OAAO,CAAC,CAAC;QAClC,GAAG,CAAC,MAAM,iCAAgB,QAAQ,CAAC,CAAC;IACtC,CAAC;IAED,IAAc,KAAK,KAAa,OAAO,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC;IAErD,IAAc,MAAM,KAAgB,OAAO,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC;IAE1D,IAAW,QAAQ,KAAe,OAAO,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC;IAE1D,IAAW,gBAAgB,KAAa,OAAO,IAAI,CAAC,iBAAiB,CAAC,CAAC,CAAC;IAExE,IAAW,SAAS;QAClB,MAAM,SAAS,GAAG,IAAI,CAAC,MAAM,CAAC,MAAM,kCAAgB,CAAC;QACrD,IAAI,SAAS,KAAK,IAAI;YACpB,OAAO,SAAS,CAAC;;YAEjB,MAAM,IAAI,KAAK,CAAC,mBAAmB,CAAC,CAAC;IACzC,CAAC;IAED,IAAW,OAAO;QAChB,MAAM,OAAO,GAAG,IAAI,CAAC,MAAM,CAAC,MAAM,8BAAc,CAAC;QAEjD,iFAAiF;QACjF,IAAI,CAAC,OAAO,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,IAAI,IAAI,CAAC,WAAW,EAAE,CAAC;YACpD,MAAM,IAAI,KAAK,CAAC,qBAAqB,CAAC,CAAC;QAEzC,OAAO,OAAO,CAAC;IACjB,CAAC;IAED,iCAAiC;IACjC,IAAW,QAAQ;QACjB,MAAM,QAAQ,GAAG,IAAI,CAAC,MAAM,CAAC,MAAM,gCAAe,CAAC;QAEnD,kFAAkF;QAClF,IAAI,CAAC,QAAQ,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,IAAI,IAAI,CAAC,WAAW,EAAE,CAAC;YACrD,MAAM,IAAI,KAAK,CAAC,sBAAsB,CAAC,CAAC;QAE1C,OAAO,QAAQ,CAAC;IAClB,CAAC;IAEM,eAAe;QACpB,IAAI,IAAI,CAAC,QAAQ,8BAAiB,IAAI,IAAI,CAAC,QAAQ,0BAAe,EAAE;YAClE,MAAM,YAAY,GAAG,QAAQ,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,yCAAmB,CAAC,CAAC;YACrE,OAAO,YAAY,CAAC;SACrB;aAAM;YACL,QAAQ,IAAI,CAAC,QAAQ,EAAE;gBACvB;oBACE,OAAO,EAAE,CAAC;gBACZ,8BAAkB;gBAClB;oBACE,OAAO,CAAC,CAAC;gBACX,KAAK,IAAI;oBACP,OAAO,CAAC,IAAI,CAAC,2BAA2B,CAAC,CAAC;oBAC1C,OAAO,CAAC,CAAC;gBACX;oBACE,MAAM,IAAI,KAAK,CAAC,wBAAwB,IAAI,CAAC,QAAQ,IAAI,CAAC,CAAC;aAC5D;SACF;IACH,CAAC;IAEM,sBAAsB;QAC3B,IAAI,IAAI,CAAC,QAAQ,8BAAiB,IAAI,IAAI,CAAC,QAAQ,0BAAe;YAChE,OAAO,IAAI,CAAC,MAAM,CAAC,MAAM,uDAA0B,IAAI,MAAM,CAAC;;YAE9D,OAAO,KAAK,CAAC;IACjB,CAAC;IAEM,OAAO,KAAc,OAAO,IAAI,CAAC,QAAQ,iCAAmB,CAAC,CAAC,CAAC;IAE/D,WAAW,KAAc,OAAO,IAAI,CAAC,QAAQ,yCAAuB,CAAC,CAAC,CAAC;IAEvE,MAAM,KAAc,OAAO,IAAI,CAAC,QAAQ,+BAAkB,CAAC,CAAC,CAAC;IAE7D,KAAK,KAAc,OAAO,IAAI,CAAC,QAAQ,6BAAiB,CAAC,CAAC,CAAC;IAE3D,KAAK,KAAc,OAAO,IAAI,CAAC,QAAQ,6BAAiB,CAAC,CAAC,CAAC;IAE3D,SAAS,KAAc,OAAO,IAAI,CAAC,QAAQ,2BAAgB,CAAC,CAAC,CAAC;IAE9D,KAAK,KAAc,OAAO,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC;IAErG,4DAA4D;IAC5D;;OAEG;IACO,WAAW;QACnB,IAAI,IAAI,CAAC,KAAK,CAAC,WAAW,EAAE,CAAC,UAAU,8BAAgB;YACrD,oCAAsB;aACnB,IAAI,IAAI,CAAC,KAAK,CAAC,WAAW,EAAE,CAAC,UAAU,sCAAoB;YAC9D,4CAA0B;aACvB,IAAI,IAAI,CAAC,KAAK,CAAC,WAAW,EAAE,CAAC,UAAU,4BAAe;YACzD,kCAAqB;;YAErB,MAAM,IAAI,KAAK,CAAC,WAAW,IAAI,CAAC,MAAM,CAAC,IAAI,8BAA8B,IAAI,CAAC,KAAK,IAAI,CAAC,CAAC;IAC7F,CAAC;IAED;;;;;;OAMG;IACO,YAAY,CAAC,cAAwB;QAC7C,MAAM,GAAG,GAAG,IAAI,CAAC,MAAM,CAAC;QACxB,MAAM,GAAG,GAAG,GAAG,CAAC,MAAM,CAAC;QACvB,MAAM,IAAI,GAAG,cAAc,CAAC,WAAW,EAAE,GAAG,GAAG,GAAG,GAAG,CAAC,IAAI,GAAG,GAAG,CAAC;QACjE,MAAM,UAAU,GAAG,GAAG,CAAC,SAAS,CAAC,OAAO,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC;QAC7D,MAAM,SAAS,GAAG,EAAE,CAAC,MAAM,CAAC,QAAQ,CAAC,QAAQ,EAAE,UAAU,EAAE,IAAI,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC;QACpF,SAAS,CAAC,OAAO,GAAG,EAAE,CAAC,OAAO,CAAC,aAAa,CAAC;QAC7C,SAAS,CAAC,MAAM,CAAC,EAAE,CAAC,IAAI,CAAC,KAAK,EAAE,cAAc,CAAC,CAAC;QAChD,SAAS,CAAC,MAAM,CAAC,EAAE,CAAC,IAAI,CAAC,aAAa,EAAE,eAAe,CAAC,CAAC;QAEzD,MAAM,UAAU,GAAG,GAAG,CAAC,MAAM,8BAAc,CAAC;QAC5C,IAAI,UAAU;YACZ,SAAS,CAAC,MAAM,+BAAe,UAAU,CAAC,CAAC;QAE7C,MAAM,WAAW,GAAG,GAAG,CAAC,MAAM,gCAAe,CAAC;QAC9C,IAAI,WAAW;YACb,SAAS,CAAC,MAAM,iCAAgB,WAAW,CAAC,CAAC;QAE/C,IAAI,eAAe,GAAW,GAAG,CAAC,MAAM,yCAAmB,CAAC;QAC5D,IAAI,eAAe;YACjB,SAAS,CAAC,MAAM,0CAAoB,eAAe,CAAC,CAAC;QAEvD,MAAM,sBAAsB,GAAW,GAAG,CAAC,MAAM,uDAA0B,CAAC;QAC5E,IAAI,sBAAsB,KAAK,SAAS;YACtC,SAAS,CAAC,MAAM,wDAA2B,sBAAsB,CAAC,CAAC;QAErE,IAAI,cAAc,8BAAiB,EAAE;YACnC,eAAe,GAAG,IAAI,CAAC,eAAe,EAAE,CAAC,QAAQ,EAAE,CAAC;YACpD,SAAS,CAAC,MAAM,0CAAoB,eAAe,CAAC,CAAC;SACtD;QAED,OAAO,SAAS,CAAC;IACnB,CAAC;IAED;;;;;;OAMG;IACI,MAAM,CAAC,YAAY,CAAC,WAAsB;QAC/C,MAAM,GAAG,GAAiB,IAAI,YAAY,CAAC,WAAW,CAAC,CAAC;QACxD,MAAM,cAAc,GAAG,GAAG,CAAC,QAAQ,CAAC;QACpC,OAAO,GAAG,CAAC,YAAY,CAAC,cAAc,CAAC,CAAC;IAC1C,CAAC;IAED;;;;;OAKG;IACI,MAAM,CAAC,kBAAkB,CAAC,KAAa;QAC5C,KAAK,GAAG,KAAK,CAAC,WAAW,EAAE,CAAC;QAC5B,MAAM,QAAQ,GAAG,gGAAmD,CAAC;QACrE,MAAM,SAAS,GAAG,CAAC,KAAK,EAAE,KAAK,EAAE,IAAI,CAAC,CAAC;QAEvC,MAAM,eAAe,GAAG,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,KAAK,CAAC,UAAU,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC;QAChF,OAAO,eAAe,CAAC;IACzB,CAAC;IAED;;;;;;;;OAQG;IACI,MAAM,CAAC,sBAAsB,CAClC,GAAW,EACX,IAAY,EACZ,KAAa;QAEb,4EAA4E;QAC5E,2BAA2B;QAC3B,+CAA+C;QAC/C,IAAI,CAAC,YAAY,CAAC,kBAAkB,CAAC,KAAK,CAAC;YACzC,MAAM,IAAI,KAAK,CAAC,uCAAuC,CAAC,CAAC;QAC3D,MAAM,SAAS,GAAG,EAAE,CAAC,MAAM,CAAC,QAAQ,CAAC,QAAQ,EAAE,IAAI,EAAE,IAAI,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC;QAC9E,SAAS,CAAC,OAAO,GAAG,EAAE,CAAC,OAAO,CAAC,aAAa,CAAC;QAC7C,SAAS,CAAC,MAAM,CAAC,EAAE,CAAC,IAAI,CAAC,KAAK,EAAE,KAAK,CAAC,CAAC;QACvC,OAAO,SAAS,CAAC;IACnB,CAAC;;AAnNyB,mCAAsB,GAAG;IACjD,IAAI,EAAE,GAAG;IACT,SAAS,EAAE,EAAE;IACb,KAAK,EAAE,GAAG;CACX,CAAC;AAEqB,iCAAoB,GAAG,IAAI,GAAG,CAAC;IACpD,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG;IAChD,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG;CACjD,CAAC,CAAC;AACoB,6BAAgB,GAAG,IAAI,GAAG,CAAC,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC,CAAC;AACjD,6BAAgB,GAAG,IAAI,GAAG,CAAC,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC,CAAC","sourcesContent":["import * as DG from 'datagrok-api/dg';\nimport * as ui from 'datagrok-api/ui';\nimport * as grok from 'datagrok-api/grok';\n\nimport {\n  ALPHABET, NOTATION,\n  getStats,\n  SeqColStats,\n  splitterAsFasta,\n  detectAlphabet, TAGS\n} from './macromolecule';\n\n/** Class for handling notation units in Macromolecule columns */\nexport class UnitsHandler {\n  protected readonly _column: DG.Column; // the column to be converted\n  protected _units: string; // units, of the form fasta, separator\n  protected _notation: NOTATION; // current notation (without :SEQ:NT, etc.)\n  protected _defaultGapSymbol: string;\n  protected static readonly _defaultGapSymbolsDict = {\n    HELM: '*',\n    SEPARATOR: '',\n    FASTA: '-',\n  };\n\n  public static readonly PeptideFastaAlphabet = new Set([\n    'G', 'L', 'Y', 'S', 'E', 'Q', 'D', 'N', 'F', 'A',\n    'K', 'R', 'H', 'C', 'V', 'P', 'W', 'I', 'M', 'T',\n  ]);\n  public static readonly DnaFastaAlphabet = new Set(['A', 'C', 'G', 'T']);\n  public static readonly RnaFastaAlphabet = new Set(['A', 'C', 'G', 'U']);\n\n  public static setUnitsToFastaColumn(col: DG.Column) {\n    if (col.semType !== DG.SEMTYPE.MACROMOLECULE)\n      throw new Error('Fasta column must be MACROMOLECULE');\n\n    const stats: SeqColStats = getStats(col, 5, splitterAsFasta);\n    const aligned = stats.sameLength ? 'SEQ.MSA' : 'SEQ';\n    const alphabet = detectAlphabet(stats);\n\n    col.setTag(DG.TAGS.UNITS, NOTATION.FASTA);\n    col.setTag(TAGS.aligned, aligned);\n    col.setTag(TAGS.alphabet, alphabet);\n  }\n\n  protected get units(): string { return this._units; }\n\n  protected get column(): DG.Column { return this._column; }\n\n  public get notation(): NOTATION { return this._notation; }\n\n  public get defaultGapSymbol(): string { return this._defaultGapSymbol; }\n\n  public get separator(): string {\n    const separator = this.column.getTag(TAGS.separator);\n    if (separator !== null)\n      return separator;\n    else\n      throw new Error('Separator not set');\n  }\n\n  public get aligned(): string {\n    const aligned = this.column.getTag(TAGS.aligned);\n\n    // TAGS.aligned is mandatory for columns of NOTATION.FASTA and NOTATION.SEPARATOR\n    if (!aligned && (this.isFasta() || this.isSeparator()))\n      throw new Error('Tag aligned not set');\n\n    return aligned;\n  }\n\n  /** Alphabet name (upper case) */\n  public get alphabet(): string {\n    const alphabet = this.column.getTag(TAGS.alphabet);\n\n    // TAGS.alphabet is mandatory for columns of NOTATION.FASTA and NOTATION.SEPARATOR\n    if (!alphabet && (this.isFasta() || this.isSeparator()))\n      throw new Error('Tag alphabet not set');\n\n    return alphabet;\n  }\n\n  public getAlphabetSize(): number {\n    if (this.notation == NOTATION.HELM || this.alphabet == ALPHABET.UN) {\n      const alphabetSize = parseInt(this.column.getTag(TAGS.alphabetSize));\n      return alphabetSize;\n    } else {\n      switch (this.alphabet) {\n      case ALPHABET.PT:\n        return 20;\n      case ALPHABET.DNA:\n      case ALPHABET.RNA:\n        return 4;\n      case 'NT':\n        console.warn(`Unexpected alphabet 'NT'.`);\n        return 4;\n      default:\n        throw new Error(`Unexpected alphabet '${this.alphabet}'.`);\n      }\n    }\n  }\n\n  public getAlphabetIsMultichar(): boolean {\n    if (this.notation == NOTATION.HELM || this.alphabet == ALPHABET.UN)\n      return this.column.getTag(TAGS.alphabetIsMultichar) == 'true';\n    else\n      return false;\n  }\n\n  public isFasta(): boolean { return this.notation === NOTATION.FASTA; }\n\n  public isSeparator(): boolean { return this.notation === NOTATION.SEPARATOR; }\n\n  public isHelm(): boolean { return this.notation === NOTATION.HELM; }\n\n  public isRna(): boolean { return this.alphabet === ALPHABET.RNA; }\n\n  public isDna(): boolean { return this.alphabet === ALPHABET.DNA; }\n\n  public isPeptide(): boolean { return this.alphabet === ALPHABET.PT; }\n\n  public isMsa(): boolean { return this.aligned ? this.aligned.toUpperCase().includes('MSA') : false; }\n\n  /** Associate notation types with the corresponding units */\n  /**\n   * @return {NOTATION}     Notation associated with the units type\n   */\n  protected getNotation(): NOTATION {\n    if (this.units.toLowerCase().startsWith(NOTATION.FASTA))\n      return NOTATION.FASTA;\n    else if (this.units.toLowerCase().startsWith(NOTATION.SEPARATOR))\n      return NOTATION.SEPARATOR;\n    else if (this.units.toLowerCase().startsWith(NOTATION.HELM))\n      return NOTATION.HELM;\n    else\n      throw new Error(`Column '${this.column.name}' has unexpected notation '${this.units}'.`);\n  }\n\n  /**\n   * Create a new empty column of the specified notation type and the same\n   * length as column\n   *\n   * @param {NOTATION} targetNotation\n   * @return {DG.Column}\n   */\n  protected getNewColumn(targetNotation: NOTATION): DG.Column {\n    const col = this.column;\n    const len = col.length;\n    const name = targetNotation.toLowerCase() + '(' + col.name + ')';\n    const newColName = col.dataFrame.columns.getUnusedName(name);\n    const newColumn = DG.Column.fromList('string', newColName, new Array(len).fill(''));\n    newColumn.semType = DG.SEMTYPE.MACROMOLECULE;\n    newColumn.setTag(DG.TAGS.UNITS, targetNotation);\n    newColumn.setTag(DG.TAGS.CELL_RENDERER, 'Macromolecule');\n\n    const srcAligned = col.getTag(TAGS.aligned);\n    if (srcAligned)\n      newColumn.setTag(TAGS.aligned, srcAligned);\n\n    const srcAlphabet = col.getTag(TAGS.alphabet);\n    if (srcAlphabet)\n      newColumn.setTag(TAGS.alphabet, srcAlphabet);\n\n    let srcAlphabetSize: string = col.getTag(TAGS.alphabetSize);\n    if (srcAlphabetSize)\n      newColumn.setTag(TAGS.alphabetSize, srcAlphabetSize);\n\n    const srcAlphabetIsMultichar: string = col.getTag(TAGS.alphabetIsMultichar);\n    if (srcAlphabetIsMultichar !== undefined)\n      newColumn.setTag(TAGS.alphabetIsMultichar, srcAlphabetIsMultichar);\n\n    if (targetNotation == NOTATION.HELM) {\n      srcAlphabetSize = this.getAlphabetSize().toString();\n      newColumn.setTag(TAGS.alphabetSize, srcAlphabetSize);\n    }\n\n    return newColumn;\n  }\n\n  /**\n   * Create a new empty column using templateCol as a template\n   *\n   * @param {DG.Column} templateCol  the properties and units of this column are used as a\n   * template to build the new one\n   * @return {DG.Column}\n   */\n  public static getNewColumn(templateCol: DG.Column): DG.Column {\n    const col: UnitsHandler = new UnitsHandler(templateCol);\n    const targetNotation = col.notation;\n    return col.getNewColumn(targetNotation);\n  }\n\n  /**\n   * A helper function checking the validity of the 'units' string\n   *\n   * @param {string} units  the string to be validated\n   * @return {boolean}\n   */\n  public static unitsStringIsValid(units: string): boolean {\n    units = units.toLowerCase();\n    const prefixes = [NOTATION.FASTA, NOTATION.SEPARATOR, NOTATION.HELM];\n    const postfixes = ['rna', 'dna', 'pt'];\n\n    const prefixCriterion = prefixes.some((p) => units.startsWith(p.toLowerCase()));\n    return prefixCriterion;\n  }\n\n  /**\n   * Construct a new column of semantic type MACROMOLECULE from the list of\n   * specified parameters\n   *\n   * @param {number}    len  the length of the new column\n   * @param {string}    name  the name of the new column\n   * @param {string}    units  the units of the new column\n   * @return {DG.Column}\n   */\n  public static getNewColumnFromParams(\n    len: number,\n    name: string,\n    units: string\n  ): DG.Column {\n    // WARNING: in this implementation is is impossible to verify the uniqueness\n    // of the new column's name\n    // TODO: verify the validity of units parameter\n    if (!UnitsHandler.unitsStringIsValid(units))\n      throw new Error('Invalid format of \\'units\\' parameter');\n    const newColumn = DG.Column.fromList('string', name, new Array(len).fill(''));\n    newColumn.semType = DG.SEMTYPE.MACROMOLECULE;\n    newColumn.setTag(DG.TAGS.UNITS, units);\n    return newColumn;\n  }\n\n  public constructor(col: DG.Column) {\n    this._column = col;\n    const units = this._column.tags[DG.TAGS.UNITS];\n    if (units !== null)\n      this._units = units;\n    else\n      throw new Error('Units are not specified in column');\n    this._notation = this.getNotation();\n    this._defaultGapSymbol = (this.isFasta()) ? UnitsHandler._defaultGapSymbolsDict.FASTA :\n      (this.isHelm()) ? UnitsHandler._defaultGapSymbolsDict.HELM :\n        UnitsHandler._defaultGapSymbolsDict.SEPARATOR;\n\n    if (!this.column.tags.has(TAGS.aligned)) {\n      if (this.isFasta() || this.isSeparator())\n        throw new Error(`For column '${this.column.name}' of notation '${this.notation}' ` +\n          `tag '${TAGS.aligned}' is mandatory.`);\n    }\n\n    if (!this.column.tags.has(TAGS.alphabetSize)) {\n      if (this.isHelm())\n        throw new Error(`For column '${this.column.name}' of notation '${this.notation}' ` +\n          `tag '${TAGS.alphabetSize}' is mandatory.`);\n      else if (['UN'].includes(this.alphabet))\n        throw new Error(`For column '${this.column.name}' of alphabet '${this.alphabet}' ` +\n          `tag '${TAGS.alphabetSize}' is mandatory.`);\n    }\n\n    if (!this.column.tags.has(TAGS.alphabetIsMultichar)) {\n      if (this.isHelm())\n        throw new Error(`For column '${this.column.name}' of notation '${this.notation}' ` +\n          `tag '${TAGS.alphabetIsMultichar}' is mandatory.`);\n      else if (['UN'].includes(this.alphabet))\n        throw new Error(`For column '${this.column.name}' of alphabet '${this.alphabet}' ` +\n          `tag '${TAGS.alphabetIsMultichar}' is mandatory.`);\n    }\n  }\n}\n"]}
|
|
222
|
+
//# sourceMappingURL=data:application/json;base64,{"version":3,"file":"units-handler.js","sourceRoot":"","sources":["units-handler.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,MAAM,iBAAiB,CAAC;AAEtC,OAAO,EAEL,cAAc,EACd,oBAAoB,EACpB,QAAQ,EAGR,eAAe,EAGhB,MAAM,iBAAiB,CAAC;AAEzB,iEAAiE;AACjE,MAAM,OAAO,YAAY;IAkOvB,YAAmB,GAAc;QAC/B,IAAI,CAAC,OAAO,GAAG,GAAG,CAAC;QACnB,MAAM,KAAK,GAAG,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QAC/C,IAAI,KAAK,KAAK,IAAI;YAChB,IAAI,CAAC,MAAM,GAAG,KAAK,CAAC;;YAEpB,MAAM,IAAI,KAAK,CAAC,mCAAmC,CAAC,CAAC;QACvD,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC,WAAW,EAAE,CAAC;QACpC,IAAI,CAAC,iBAAiB,GAAG,CAAC,IAAI,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,sBAAsB,CAAC,KAAK,CAAC,CAAC;YACrF,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,sBAAsB,CAAC,IAAI,CAAC,CAAC;gBAC1D,YAAY,CAAC,sBAAsB,CAAC,SAAS,CAAC;QAElD,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,8BAAc,EAAE;YACvC,IAAI,IAAI,CAAC,OAAO,EAAE,IAAI,IAAI,CAAC,WAAW,EAAE;gBACtC,MAAM,IAAI,KAAK,CAAC,eAAe,IAAI,CAAC,MAAM,CAAC,IAAI,kBAAkB,IAAI,CAAC,QAAQ,IAAI;oBAChF,QAAQ,4BAAY,iBAAiB,CAAC,CAAC;SAC5C;QAED,kDAAkD;QAClD,uBAAuB;QACvB,2FAA2F;QAC3F,qDAAqD;QACrD,6CAA6C;QAC7C,2FAA2F;QAC3F,qDAAqD;QACrD,IAAI;QAEJ,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,uDAA0B,EAAE;YACnD,IAAI,IAAI,CAAC,MAAM,EAAE;gBACf,MAAM,IAAI,KAAK,CAAC,eAAe,IAAI,CAAC,MAAM,CAAC,IAAI,kBAAkB,IAAI,CAAC,QAAQ,IAAI;oBAChF,QAAQ,qDAAwB,iBAAiB,CAAC,CAAC;iBAClD,IAAI,CAAC,IAAI,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,QAAQ,CAAC;gBACrC,MAAM,IAAI,KAAK,CAAC,eAAe,IAAI,CAAC,MAAM,CAAC,IAAI,kBAAkB,IAAI,CAAC,QAAQ,IAAI;oBAChF,QAAQ,qDAAwB,iBAAiB,CAAC,CAAC;SACxD;IACH,CAAC;IAnPM,MAAM,CAAC,qBAAqB,CAAC,GAAc;QAChD,IAAI,GAAG,CAAC,OAAO,KAAK,EAAE,CAAC,OAAO,CAAC,aAAa;YAC1C,MAAM,IAAI,KAAK,CAAC,oCAAoC,CAAC,CAAC;QAExD,MAAM,KAAK,GAAgB,QAAQ,CAAC,GAAG,EAAE,CAAC,EAAE,eAAe,CAAC,CAAC;QAC7D,MAAM,OAAO,GAAG,KAAK,CAAC,UAAU,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,KAAK,CAAC;QACrD,MAAM,QAAQ,GAAG,cAAc,CAAC,KAAK,CAAC,CAAC;QAEvC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,IAAI,CAAC,KAAK,+BAAiB,CAAC;QAC1C,GAAG,CAAC,MAAM,+BAAe,OAAO,CAAC,CAAC;QAClC,GAAG,CAAC,MAAM,iCAAgB,QAAQ,CAAC,CAAC;IACtC,CAAC;IAED,IAAc,KAAK,KAAa,OAAO,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC;IAErD,IAAc,MAAM,KAAgB,OAAO,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC;IAE1D,IAAW,QAAQ,KAAe,OAAO,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC;IAE1D,IAAW,gBAAgB,KAAa,OAAO,IAAI,CAAC,iBAAiB,CAAC,CAAC,CAAC;IAExE,IAAW,SAAS;;QAClB,MAAM,SAAS,GAAuB,MAAA,IAAI,CAAC,MAAM,CAAC,MAAM,kCAAgB,mCAAI,SAAS,CAAC;QACtF,IAAI,IAAI,CAAC,QAAQ,yCAAuB,IAAI,SAAS,KAAK,SAAS;YACjE,MAAM,IAAI,KAAK,CAAC,uCAAuC,IAAI,CAAC,MAAM,CAAC,IAAI,kBAAkB,IAAI,CAAC,QAAQ,IAAI,CAAC,CAAC;QAC9G,OAAO,SAAS,CAAC;IACnB,CAAC;IAED,IAAW,OAAO;QAChB,MAAM,OAAO,GAAG,IAAI,CAAC,MAAM,CAAC,MAAM,8BAAc,CAAC;QAEjD,iFAAiF;QACjF,IAAI,CAAC,OAAO,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,IAAI,IAAI,CAAC,WAAW,EAAE,CAAC;YACpD,MAAM,IAAI,KAAK,CAAC,qBAAqB,CAAC,CAAC;QAEzC,OAAO,OAAO,CAAC;IACjB,CAAC;IAED,iCAAiC;IACjC,IAAW,QAAQ;QACjB,MAAM,QAAQ,GAAG,IAAI,CAAC,MAAM,CAAC,MAAM,gCAAe,CAAC;QAEnD,kFAAkF;QAClF,IAAI,CAAC,QAAQ,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,IAAI,IAAI,CAAC,WAAW,EAAE,CAAC;YACrD,MAAM,IAAI,KAAK,CAAC,sBAAsB,CAAC,CAAC;QAE1C,OAAO,QAAQ,CAAC;IAClB,CAAC;IAEM,eAAe;QACpB,IAAI,IAAI,CAAC,QAAQ,8BAAiB,IAAI,IAAI,CAAC,QAAQ,0BAAe,EAAE;YAClE,MAAM,eAAe,GAAG,IAAI,CAAC,MAAM,CAAC,MAAM,yCAAmB,CAAC;YAC9D,IAAI,YAAoB,CAAC;YACzB,IAAI,eAAe,EAAE;gBACnB,YAAY,GAAG,QAAQ,CAAC,eAAe,CAAC,CAAC;aAC1C;iBAAM;gBACL,mCAAmC;gBACnC,MAAM,QAAQ,GAAiB,oBAAoB,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;gBACjE,MAAM,KAAK,GAAG,QAAQ,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC,EAAE,QAAQ,CAAC,CAAC;gBACjD,YAAY,GAAG,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC;aAC/C;YACD,OAAO,YAAY,CAAC;SACrB;aAAM;YACL,QAAQ,IAAI,CAAC,QAAQ,EAAE;gBACvB;oBACE,OAAO,EAAE,CAAC;gBACZ,8BAAkB;gBAClB;oBACE,OAAO,CAAC,CAAC;gBACX,KAAK,IAAI;oBACP,OAAO,CAAC,IAAI,CAAC,2BAA2B,CAAC,CAAC;oBAC1C,OAAO,CAAC,CAAC;gBACX;oBACE,MAAM,IAAI,KAAK,CAAC,wBAAwB,IAAI,CAAC,QAAQ,IAAI,CAAC,CAAC;aAC5D;SACF;IACH,CAAC;IAEM,sBAAsB;QAC3B,IAAI,IAAI,CAAC,QAAQ,8BAAiB,IAAI,IAAI,CAAC,QAAQ,0BAAe;YAChE,OAAO,IAAI,CAAC,MAAM,CAAC,MAAM,uDAA0B,IAAI,MAAM,CAAC;;YAE9D,OAAO,KAAK,CAAC;IACjB,CAAC;IAEM,OAAO,KAAc,OAAO,IAAI,CAAC,QAAQ,iCAAmB,CAAC,CAAC,CAAC;IAE/D,WAAW,KAAc,OAAO,IAAI,CAAC,QAAQ,yCAAuB,CAAC,CAAC,CAAC;IAEvE,MAAM,KAAc,OAAO,IAAI,CAAC,QAAQ,+BAAkB,CAAC,CAAC,CAAC;IAE7D,KAAK,KAAc,OAAO,IAAI,CAAC,QAAQ,6BAAiB,CAAC,CAAC,CAAC;IAE3D,KAAK,KAAc,OAAO,IAAI,CAAC,QAAQ,6BAAiB,CAAC,CAAC,CAAC;IAE3D,SAAS,KAAc,OAAO,IAAI,CAAC,QAAQ,2BAAgB,CAAC,CAAC,CAAC;IAE9D,KAAK,KAAc,OAAO,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC;IAErG,4DAA4D;IAC5D;;OAEG;IACO,WAAW;QACnB,IAAI,IAAI,CAAC,KAAK,CAAC,WAAW,EAAE,CAAC,UAAU,8BAAgB;YACrD,oCAAsB;aACnB,IAAI,IAAI,CAAC,KAAK,CAAC,WAAW,EAAE,CAAC,UAAU,sCAAoB;YAC9D,4CAA0B;aACvB,IAAI,IAAI,CAAC,KAAK,CAAC,WAAW,EAAE,CAAC,UAAU,4BAAe;YACzD,kCAAqB;;YAErB,MAAM,IAAI,KAAK,CAAC,WAAW,IAAI,CAAC,MAAM,CAAC,IAAI,8BAA8B,IAAI,CAAC,KAAK,IAAI,CAAC,CAAC;IAC7F,CAAC;IAED;;;;;;OAMG;IACO,YAAY,CAAC,cAAwB;QAC7C,MAAM,GAAG,GAAG,IAAI,CAAC,MAAM,CAAC;QACxB,MAAM,GAAG,GAAG,GAAG,CAAC,MAAM,CAAC;QACvB,MAAM,IAAI,GAAG,cAAc,CAAC,WAAW,EAAE,GAAG,GAAG,GAAG,GAAG,CAAC,IAAI,GAAG,GAAG,CAAC;QACjE,MAAM,UAAU,GAAG,GAAG,CAAC,SAAS,CAAC,OAAO,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC;QAC7D,MAAM,SAAS,GAAG,EAAE,CAAC,MAAM,CAAC,QAAQ,CAAC,QAAQ,EAAE,UAAU,EAAE,IAAI,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC;QACpF,SAAS,CAAC,OAAO,GAAG,EAAE,CAAC,OAAO,CAAC,aAAa,CAAC;QAC7C,SAAS,CAAC,MAAM,CAAC,EAAE,CAAC,IAAI,CAAC,KAAK,EAAE,cAAc,CAAC,CAAC;QAChD,SAAS,CAAC,MAAM,CAAC,EAAE,CAAC,IAAI,CAAC,aAAa,EAAE,eAAe,CAAC,CAAC;QAEzD,MAAM,UAAU,GAAG,GAAG,CAAC,MAAM,8BAAc,CAAC;QAC5C,IAAI,UAAU;YACZ,SAAS,CAAC,MAAM,+BAAe,UAAU,CAAC,CAAC;QAE7C,MAAM,WAAW,GAAG,GAAG,CAAC,MAAM,gCAAe,CAAC;QAC9C,IAAI,WAAW;YACb,SAAS,CAAC,MAAM,iCAAgB,WAAW,CAAC,CAAC;QAE/C,IAAI,eAAe,GAAW,GAAG,CAAC,MAAM,yCAAmB,CAAC;QAC5D,IAAI,eAAe;YACjB,SAAS,CAAC,MAAM,0CAAoB,eAAe,CAAC,CAAC;QAEvD,MAAM,sBAAsB,GAAW,GAAG,CAAC,MAAM,uDAA0B,CAAC;QAC5E,IAAI,sBAAsB,KAAK,SAAS;YACtC,SAAS,CAAC,MAAM,wDAA2B,sBAAsB,CAAC,CAAC;QAErE,IAAI,cAAc,8BAAiB,EAAE;YACnC,eAAe,GAAG,IAAI,CAAC,eAAe,EAAE,CAAC,QAAQ,EAAE,CAAC;YACpD,SAAS,CAAC,MAAM,0CAAoB,eAAe,CAAC,CAAC;SACtD;QAED,OAAO,SAAS,CAAC;IACnB,CAAC;IAED;;;;;;OAMG;IACI,MAAM,CAAC,YAAY,CAAC,WAAsB;QAC/C,MAAM,GAAG,GAAiB,IAAI,YAAY,CAAC,WAAW,CAAC,CAAC;QACxD,MAAM,cAAc,GAAG,GAAG,CAAC,QAAQ,CAAC;QACpC,OAAO,GAAG,CAAC,YAAY,CAAC,cAAc,CAAC,CAAC;IAC1C,CAAC;IAED;;;;;OAKG;IACI,MAAM,CAAC,kBAAkB,CAAC,KAAa;QAC5C,KAAK,GAAG,KAAK,CAAC,WAAW,EAAE,CAAC;QAC5B,MAAM,QAAQ,GAAG,gGAAmD,CAAC;QACrE,MAAM,SAAS,GAAG,CAAC,KAAK,EAAE,KAAK,EAAE,IAAI,CAAC,CAAC;QAEvC,MAAM,eAAe,GAAG,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,KAAK,CAAC,UAAU,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC;QAChF,OAAO,eAAe,CAAC;IACzB,CAAC;IAED;;;;;;;;OAQG;IACI,MAAM,CAAC,sBAAsB,CAClC,GAAW,EACX,IAAY,EACZ,KAAa;QAEb,4EAA4E;QAC5E,2BAA2B;QAC3B,+CAA+C;QAC/C,IAAI,CAAC,YAAY,CAAC,kBAAkB,CAAC,KAAK,CAAC;YACzC,MAAM,IAAI,KAAK,CAAC,uCAAuC,CAAC,CAAC;QAC3D,MAAM,SAAS,GAAG,EAAE,CAAC,MAAM,CAAC,QAAQ,CAAC,QAAQ,EAAE,IAAI,EAAE,IAAI,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC;QAC9E,SAAS,CAAC,OAAO,GAAG,EAAE,CAAC,OAAO,CAAC,aAAa,CAAC;QAC7C,SAAS,CAAC,MAAM,CAAC,EAAE,CAAC,IAAI,CAAC,KAAK,EAAE,KAAK,CAAC,CAAC;QACvC,OAAO,SAAS,CAAC;IACnB,CAAC;;AA3NyB,mCAAsB,GAAG;IACjD,IAAI,EAAE,GAAG;IACT,SAAS,EAAE,EAAE;IACb,KAAK,EAAE,GAAG;CACX,CAAC;AAEqB,iCAAoB,GAAG,IAAI,GAAG,CAAS;IAC5D,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG;IAChD,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG;CACjD,CAAC,CAAC;AACoB,6BAAgB,GAAG,IAAI,GAAG,CAAS,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC,CAAC;AACzD,6BAAgB,GAAG,IAAI,GAAG,CAAS,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC,CAAC","sourcesContent":["import * as DG from 'datagrok-api/dg';\n\nimport {\n  ALPHABET,\n  detectAlphabet,\n  getSplitterForColumn,\n  getStats,\n  NOTATION,\n  SeqColStats,\n  splitterAsFasta,\n  SplitterFunc,\n  TAGS\n} from './macromolecule';\n\n/** Class for handling notation units in Macromolecule columns */\nexport class UnitsHandler {\n  protected readonly _column: DG.Column; // the column to be converted\n  protected _units: string; // units, of the form fasta, separator\n  protected _notation: NOTATION; // current notation (without :SEQ:NT, etc.)\n  protected _defaultGapSymbol: string;\n  protected static readonly _defaultGapSymbolsDict = {\n    HELM: '*',\n    SEPARATOR: '',\n    FASTA: '-',\n  };\n\n  public static readonly PeptideFastaAlphabet = new Set<string>([\n    'G', 'L', 'Y', 'S', 'E', 'Q', 'D', 'N', 'F', 'A',\n    'K', 'R', 'H', 'C', 'V', 'P', 'W', 'I', 'M', 'T',\n  ]);\n  public static readonly DnaFastaAlphabet = new Set<string>(['A', 'C', 'G', 'T']);\n  public static readonly RnaFastaAlphabet = new Set<string>(['A', 'C', 'G', 'U']);\n\n  public static setUnitsToFastaColumn(col: DG.Column) {\n    if (col.semType !== DG.SEMTYPE.MACROMOLECULE)\n      throw new Error('Fasta column must be MACROMOLECULE');\n\n    const stats: SeqColStats = getStats(col, 5, splitterAsFasta);\n    const aligned = stats.sameLength ? 'SEQ.MSA' : 'SEQ';\n    const alphabet = detectAlphabet(stats);\n\n    col.setTag(DG.TAGS.UNITS, NOTATION.FASTA);\n    col.setTag(TAGS.aligned, aligned);\n    col.setTag(TAGS.alphabet, alphabet);\n  }\n\n  protected get units(): string { return this._units; }\n\n  protected get column(): DG.Column { return this._column; }\n\n  public get notation(): NOTATION { return this._notation; }\n\n  public get defaultGapSymbol(): string { return this._defaultGapSymbol; }\n\n  public get separator(): string | undefined {\n    const separator: string | undefined = this.column.getTag(TAGS.separator) ?? undefined;\n    if (this.notation === NOTATION.SEPARATOR && separator === undefined)\n      throw new Error(`Separator is mandatory  for column '${this.column.name}' of notation '${this.notation}'.`);\n    return separator;\n  }\n\n  public get aligned(): string {\n    const aligned = this.column.getTag(TAGS.aligned);\n\n    // TAGS.aligned is mandatory for columns of NOTATION.FASTA and NOTATION.SEPARATOR\n    if (!aligned && (this.isFasta() || this.isSeparator()))\n      throw new Error('Tag aligned not set');\n\n    return aligned;\n  }\n\n  /** Alphabet name (upper case) */\n  public get alphabet(): string {\n    const alphabet = this.column.getTag(TAGS.alphabet);\n\n    // TAGS.alphabet is mandatory for columns of NOTATION.FASTA and NOTATION.SEPARATOR\n    if (!alphabet && (this.isFasta() || this.isSeparator()))\n      throw new Error('Tag alphabet not set');\n\n    return alphabet;\n  }\n\n  public getAlphabetSize(): number {\n    if (this.notation == NOTATION.HELM || this.alphabet == ALPHABET.UN) {\n      const alphabetSizeStr = this.column.getTag(TAGS.alphabetSize);\n      let alphabetSize: number;\n      if (alphabetSizeStr) {\n        alphabetSize = parseInt(alphabetSizeStr);\n      } else {\n        // calculate alphabetSize on demand\n        const splitter: SplitterFunc = getSplitterForColumn(this.column);\n        const stats = getStats(this.column, 1, splitter);\n        alphabetSize = Object.keys(stats.freq).length;\n      }\n      return alphabetSize;\n    } else {\n      switch (this.alphabet) {\n      case ALPHABET.PT:\n        return 20;\n      case ALPHABET.DNA:\n      case ALPHABET.RNA:\n        return 4;\n      case 'NT':\n        console.warn(`Unexpected alphabet 'NT'.`);\n        return 4;\n      default:\n        throw new Error(`Unexpected alphabet '${this.alphabet}'.`);\n      }\n    }\n  }\n\n  public getAlphabetIsMultichar(): boolean {\n    if (this.notation == NOTATION.HELM || this.alphabet == ALPHABET.UN)\n      return this.column.getTag(TAGS.alphabetIsMultichar) == 'true';\n    else\n      return false;\n  }\n\n  public isFasta(): boolean { return this.notation === NOTATION.FASTA; }\n\n  public isSeparator(): boolean { return this.notation === NOTATION.SEPARATOR; }\n\n  public isHelm(): boolean { return this.notation === NOTATION.HELM; }\n\n  public isRna(): boolean { return this.alphabet === ALPHABET.RNA; }\n\n  public isDna(): boolean { return this.alphabet === ALPHABET.DNA; }\n\n  public isPeptide(): boolean { return this.alphabet === ALPHABET.PT; }\n\n  public isMsa(): boolean { return this.aligned ? this.aligned.toUpperCase().includes('MSA') : false; }\n\n  /** Associate notation types with the corresponding units */\n  /**\n   * @return {NOTATION}     Notation associated with the units type\n   */\n  protected getNotation(): NOTATION {\n    if (this.units.toLowerCase().startsWith(NOTATION.FASTA))\n      return NOTATION.FASTA;\n    else if (this.units.toLowerCase().startsWith(NOTATION.SEPARATOR))\n      return NOTATION.SEPARATOR;\n    else if (this.units.toLowerCase().startsWith(NOTATION.HELM))\n      return NOTATION.HELM;\n    else\n      throw new Error(`Column '${this.column.name}' has unexpected notation '${this.units}'.`);\n  }\n\n  /**\n   * Create a new empty column of the specified notation type and the same\n   * length as column\n   *\n   * @param {NOTATION} targetNotation\n   * @return {DG.Column}\n   */\n  protected getNewColumn(targetNotation: NOTATION): DG.Column {\n    const col = this.column;\n    const len = col.length;\n    const name = targetNotation.toLowerCase() + '(' + col.name + ')';\n    const newColName = col.dataFrame.columns.getUnusedName(name);\n    const newColumn = DG.Column.fromList('string', newColName, new Array(len).fill(''));\n    newColumn.semType = DG.SEMTYPE.MACROMOLECULE;\n    newColumn.setTag(DG.TAGS.UNITS, targetNotation);\n    newColumn.setTag(DG.TAGS.CELL_RENDERER, 'Macromolecule');\n\n    const srcAligned = col.getTag(TAGS.aligned);\n    if (srcAligned)\n      newColumn.setTag(TAGS.aligned, srcAligned);\n\n    const srcAlphabet = col.getTag(TAGS.alphabet);\n    if (srcAlphabet)\n      newColumn.setTag(TAGS.alphabet, srcAlphabet);\n\n    let srcAlphabetSize: string = col.getTag(TAGS.alphabetSize);\n    if (srcAlphabetSize)\n      newColumn.setTag(TAGS.alphabetSize, srcAlphabetSize);\n\n    const srcAlphabetIsMultichar: string = col.getTag(TAGS.alphabetIsMultichar);\n    if (srcAlphabetIsMultichar !== undefined)\n      newColumn.setTag(TAGS.alphabetIsMultichar, srcAlphabetIsMultichar);\n\n    if (targetNotation == NOTATION.HELM) {\n      srcAlphabetSize = this.getAlphabetSize().toString();\n      newColumn.setTag(TAGS.alphabetSize, srcAlphabetSize);\n    }\n\n    return newColumn;\n  }\n\n  /**\n   * Create a new empty column using templateCol as a template\n   *\n   * @param {DG.Column} templateCol  the properties and units of this column are used as a\n   * template to build the new one\n   * @return {DG.Column}\n   */\n  public static getNewColumn(templateCol: DG.Column): DG.Column {\n    const col: UnitsHandler = new UnitsHandler(templateCol);\n    const targetNotation = col.notation;\n    return col.getNewColumn(targetNotation);\n  }\n\n  /**\n   * A helper function checking the validity of the 'units' string\n   *\n   * @param {string} units  the string to be validated\n   * @return {boolean}\n   */\n  public static unitsStringIsValid(units: string): boolean {\n    units = units.toLowerCase();\n    const prefixes = [NOTATION.FASTA, NOTATION.SEPARATOR, NOTATION.HELM];\n    const postfixes = ['rna', 'dna', 'pt'];\n\n    const prefixCriterion = prefixes.some((p) => units.startsWith(p.toLowerCase()));\n    return prefixCriterion;\n  }\n\n  /**\n   * Construct a new column of semantic type MACROMOLECULE from the list of\n   * specified parameters\n   *\n   * @param {number}    len  the length of the new column\n   * @param {string}    name  the name of the new column\n   * @param {string}    units  the units of the new column\n   * @return {DG.Column}\n   */\n  public static getNewColumnFromParams(\n    len: number,\n    name: string,\n    units: string\n  ): DG.Column {\n    // WARNING: in this implementation is is impossible to verify the uniqueness\n    // of the new column's name\n    // TODO: verify the validity of units parameter\n    if (!UnitsHandler.unitsStringIsValid(units))\n      throw new Error('Invalid format of \\'units\\' parameter');\n    const newColumn = DG.Column.fromList('string', name, new Array(len).fill(''));\n    newColumn.semType = DG.SEMTYPE.MACROMOLECULE;\n    newColumn.setTag(DG.TAGS.UNITS, units);\n    return newColumn;\n  }\n\n  public constructor(col: DG.Column) {\n    this._column = col;\n    const units = this._column.tags[DG.TAGS.UNITS];\n    if (units !== null)\n      this._units = units;\n    else\n      throw new Error('Units are not specified in column');\n    this._notation = this.getNotation();\n    this._defaultGapSymbol = (this.isFasta()) ? UnitsHandler._defaultGapSymbolsDict.FASTA :\n      (this.isHelm()) ? UnitsHandler._defaultGapSymbolsDict.HELM :\n        UnitsHandler._defaultGapSymbolsDict.SEPARATOR;\n\n    if (!this.column.tags.has(TAGS.aligned)) {\n      if (this.isFasta() || this.isSeparator())\n        throw new Error(`For column '${this.column.name}' of notation '${this.notation}' ` +\n          `tag '${TAGS.aligned}' is mandatory.`);\n    }\n\n    // if (!this.column.tags.has(TAGS.alphabetSize)) {\n    //   if (this.isHelm())\n    //     throw new Error(`For column '${this.column.name}' of notation '${this.notation}' ` +\n    //       `tag '${TAGS.alphabetSize}' is mandatory.`);\n    //   else if (['UN'].includes(this.alphabet))\n    //     throw new Error(`For column '${this.column.name}' of alphabet '${this.alphabet}' ` +\n    //       `tag '${TAGS.alphabetSize}' is mandatory.`);\n    // }\n\n    if (!this.column.tags.has(TAGS.alphabetIsMultichar)) {\n      if (this.isHelm())\n        throw new Error(`For column '${this.column.name}' of notation '${this.notation}' ` +\n          `tag '${TAGS.alphabetIsMultichar}' is mandatory.`);\n      else if (['UN'].includes(this.alphabet))\n        throw new Error(`For column '${this.column.name}' of alphabet '${this.alphabet}' ` +\n          `tag '${TAGS.alphabetIsMultichar}' is mandatory.`);\n    }\n  }\n}\n"]}
|
|
@@ -33,4 +33,4 @@ export function getPhylocanvasGlService() {
|
|
|
33
33
|
return svc;
|
|
34
34
|
});
|
|
35
35
|
}
|
|
36
|
-
//# sourceMappingURL=data:application/json;base64,eyJ2ZXJzaW9uIjozLCJmaWxlIjoicGh5bG9jYW52YXMtZ2wtdmlld2VyLmpzIiwic291cmNlUm9vdCI6IiIsInNvdXJjZXMiOlsicGh5bG9jYW52YXMtZ2wtdmlld2VyLnRzIl0sIm5hbWVzIjpbXSwibWFwcGluZ3MiOiI7Ozs7Ozs7OztBQUVBLE9BQU8sS0FBSyxFQUFFLE1BQU0saUJBQWlCLENBQUM7QUFXdEMsTUFBTSxDQUFOLElBQVksY0FRWDtBQVJELFdBQVksY0FBYztJQUN4QixtQ0FBaUIsQ0FBQTtJQUNqQixzREFBc0Q7SUFDdEQsNkNBQTJCLENBQUE7SUFDM0IsaUNBQWUsQ0FBQTtJQUNmLHVDQUFxQixDQUFBO0lBQ3JCLHdEQUF3RDtJQUN4RCwyQ0FBeUIsQ0FBQTtBQUMzQixDQUFDLEVBUlcsY0FBYyxLQUFkLGNBQWMsUUFRekI7QUFzQkQsTUFBTSxPQUFnQix3QkFBd0I7O0FBQzlCLG1DQUFVLEdBQWdDO0lBQ3RELElBQUksRUFBRSxPQUFPO0lBQ2IsSUFBSSxFQUFFLEVBQUMsSUFBSSxFQUFFLE1BQU0sRUFBRSxhQUFhLEVBQUUsQ0FBQyxFQUFFLFFBQVEsRUFBRSxFQUFFLEVBQUM7Q0FDckQsQ0FBQztBQWtCSixNQUFNLFVBQWdCLHVCQUF1Qjs7UUFDM0MsTUFBTSxRQUFRLEdBQUcsRUFBRSxDQUFDLElBQUksQ0FBQyxJQUFJLENBQUMsRUFBQyxPQUFPLEVBQUUsaUJBQWlCLEVBQUUsSUFBSSxFQUFFLHlCQUF5QixFQUFDLENBQUMsQ0FBQztRQUM3RixJQUFJLFFBQVEsQ0FBQyxNQUFNLEtBQUssQ0FBQztZQUN2QixNQUFNLElBQUksS0FBSyxDQUFDLDBFQUEwRSxDQUFDLENBQUM7UUFFOUYsTUFBTSxHQUFHLEdBQTZCLENBQUMsTUFBTSxRQUFRLENBQUMsQ0FBQyxDQUFDLENBQUMsT0FBTyxFQUFFLENBQUMsSUFBSSxFQUFFLENBQUMsQ0FBQyxtQkFBbUIsRUFBOEIsQ0FBQztRQUM3SCxPQUFPLEdBQUcsQ0FBQztJQUNiLENBQUM7Q0FBQSIsInNvdXJjZXNDb250ZW50IjpbImltcG9ydCAqIGFzIGdyb2sgZnJvbSAnZGF0YWdyb2stYXBpL2dyb2snO1xuaW1wb3J0ICogYXMgdWkgZnJvbSAnZGF0YWdyb2stYXBpL3VpJztcbmltcG9ydCAqIGFzIERHIGZyb20gJ2RhdGFncm9rLWFwaS9kZyc7XG5cbmltcG9ydCB7SVZpZXdlcn0gZnJvbSAnLi92aWV3ZXInO1xuaW1wb3J0IHtPYnNlcnZhYmxlfSBmcm9tICdyeGpzJztcbmltcG9ydCB7TWpvbG5pclBvaW50ZXJFdmVudH0gZnJvbSAnbWpvbG5pci5qcyc7XG5pbXBvcnQge1BpY2tpbmdJbmZvfSBmcm9tICdAZGVjay5nbC9jb3JlL3R5cGVkJztcblxuXG5leHBvcnQgdHlwZSBOb2RlU3R5bGVUeXBlID0geyBbcHJvcE5hbWU6IHN0cmluZ106IGFueSB9O1xuZXhwb3J0IHR5cGUgU3R5bGVzVHlwZSA9IHsgW25vZGVOYW1lOiBzdHJpbmddOiBOb2RlU3R5bGVUeXBlIH07XG5cbmV4cG9ydCBlbnVtIFRyZWVUeXBlc05hbWVzIHtcbiAgUmFkaWFsID0gJ1JhZGlhbCcsXG4gIC8qKiBSZWN0YW5ndWxhciBlZGdlcywgbGVhdmVzIGxpc3RlZCBfX3ZlcnRpY2FsbHlfXyAqL1xuICBSZWN0YW5ndWxhciA9ICdSZWN0YW5ndWxhcicsXG4gIFBvbGFyID0gJ1BvbGFyJyxcbiAgRGlhZ29uYWwgPSAnRGlhZ29uYWwnLFxuICAvKiogUmVjdGFuZ3VsYXIgZWRnZXMsIGxlYXZlcyBsaXN0ZWQgX19ob3Jpem9udGFsbHlfXyAqL1xuICBPcnRob2dvbmFsID0gJ09ydGhvZ29uYWwnLFxufVxuXG5leHBvcnQgaW50ZXJmYWNlIElQaHlsb2NhbnZhc0dsVmlld2VyIGV4dGVuZHMgSVZpZXdlciB7XG4gIGdldCBud2tEZigpOiBERy5EYXRhRnJhbWU7XG5cbiAgc2V0IG53a0RmKHZhbHVlOiBERy5EYXRhRnJhbWUpO1xuXG4gIHNldFByb3BzKHVwZGF0ZXI6IHsgW3Byb3BOYW1lOiBzdHJpbmddOiBhbnkgfSk6IHZvaWQ7XG5cbiAgZ2V0IG9uQWZ0ZXJSZW5kZXIoKTogT2JzZXJ2YWJsZTx7IGdsOiBXZWJHTFJlbmRlcmluZ0NvbnRleHQgfT47XG5cbiAgZ2V0IG9uSG92ZXIoKTogT2JzZXJ2YWJsZTx7IGluZm86IFBpY2tpbmdJbmZvLCBldmVudDogTWpvbG5pclBvaW50ZXJFdmVudCB9Pjtcbn1cblxuLy8gZXhwb3J0IGludGVyZmFjZSBJUGh5bG9jYW52YXNHbFJlbmRlcmVyIHtcbi8vICAgZ2V0IG9uQWZ0ZXJSZW5kZXIoKTogT2JzZXJ2YWJsZTxIVE1MQ2FudmFzRWxlbWVudD47XG4vLyB9XG5cbmV4cG9ydCB0eXBlIFBoeWxvY2FudmFzR2xUYXNrID0geyBuYW1lOiBzdHJpbmcsIGJhY2tDb2xvcjogbnVtYmVyLCBwcm9wczogeyBbcHJvcE5hbWU6IHN0cmluZ106IGFueSB9LCBvbkFmdGVyUmVuZGVyOiBDYW52YXNDYWxsYmFjayB9O1xuXG5leHBvcnQgdHlwZSBDYW52YXNDYWxsYmFjayA9IChjYW52YXM6IEhUTUxDYW52YXNFbGVtZW50KSA9PiB2b2lkO1xuXG5leHBvcnQgYWJzdHJhY3QgY2xhc3MgUGh5bG9jYW52YXNHbFNlcnZpY2VCYXNlIHtcbiAgcHVibGljIHN0YXRpYyBub25lU291cmNlOiB7IHR5cGU6IHN0cmluZywgZGF0YTogYW55IH0gPSB7XG4gICAgdHlwZTogJ2Jpb2pzJyxcbiAgICBkYXRhOiB7bmFtZTogJ25vbmUnLCBicmFuY2hfbGVuZ3RoOiAxLCBjaGlsZHJlbjogW119XG4gIH07XG5cbiAgLyoqIFF1ZXVlcyBQaHlsb2NhbnZhc0dMIHJlbmRlciB0YXNrXG4gICAqIEBwYXJhbSBrZXkgIFNwZWNpZnkgdG8gc2tpcCBwcmV2aW91c2x5IHF1ZXVlZCB0YXNrcyB3aXRoIHRoZSBzYW1lIGtleVxuICAgKi9cbiAgYWJzdHJhY3QgcmVuZGVyKGFyZ3M6IFBoeWxvY2FudmFzR2xUYXNrLCBrZXk/
|
|
36
|
+
//# sourceMappingURL=data:application/json;base64,eyJ2ZXJzaW9uIjozLCJmaWxlIjoicGh5bG9jYW52YXMtZ2wtdmlld2VyLmpzIiwic291cmNlUm9vdCI6IiIsInNvdXJjZXMiOlsicGh5bG9jYW52YXMtZ2wtdmlld2VyLnRzIl0sIm5hbWVzIjpbXSwibWFwcGluZ3MiOiI7Ozs7Ozs7OztBQUVBLE9BQU8sS0FBSyxFQUFFLE1BQU0saUJBQWlCLENBQUM7QUFXdEMsTUFBTSxDQUFOLElBQVksY0FRWDtBQVJELFdBQVksY0FBYztJQUN4QixtQ0FBaUIsQ0FBQTtJQUNqQixzREFBc0Q7SUFDdEQsNkNBQTJCLENBQUE7SUFDM0IsaUNBQWUsQ0FBQTtJQUNmLHVDQUFxQixDQUFBO0lBQ3JCLHdEQUF3RDtJQUN4RCwyQ0FBeUIsQ0FBQTtBQUMzQixDQUFDLEVBUlcsY0FBYyxLQUFkLGNBQWMsUUFRekI7QUFzQkQsTUFBTSxPQUFnQix3QkFBd0I7O0FBQzlCLG1DQUFVLEdBQWdDO0lBQ3RELElBQUksRUFBRSxPQUFPO0lBQ2IsSUFBSSxFQUFFLEVBQUMsSUFBSSxFQUFFLE1BQU0sRUFBRSxhQUFhLEVBQUUsQ0FBQyxFQUFFLFFBQVEsRUFBRSxFQUFFLEVBQUM7Q0FDckQsQ0FBQztBQWtCSixNQUFNLFVBQWdCLHVCQUF1Qjs7UUFDM0MsTUFBTSxRQUFRLEdBQUcsRUFBRSxDQUFDLElBQUksQ0FBQyxJQUFJLENBQUMsRUFBQyxPQUFPLEVBQUUsaUJBQWlCLEVBQUUsSUFBSSxFQUFFLHlCQUF5QixFQUFDLENBQUMsQ0FBQztRQUM3RixJQUFJLFFBQVEsQ0FBQyxNQUFNLEtBQUssQ0FBQztZQUN2QixNQUFNLElBQUksS0FBSyxDQUFDLDBFQUEwRSxDQUFDLENBQUM7UUFFOUYsTUFBTSxHQUFHLEdBQTZCLENBQUMsTUFBTSxRQUFRLENBQUMsQ0FBQyxDQUFDLENBQUMsT0FBTyxFQUFFLENBQUMsSUFBSSxFQUFFLENBQUMsQ0FBQyxtQkFBbUIsRUFBOEIsQ0FBQztRQUM3SCxPQUFPLEdBQUcsQ0FBQztJQUNiLENBQUM7Q0FBQSIsInNvdXJjZXNDb250ZW50IjpbImltcG9ydCAqIGFzIGdyb2sgZnJvbSAnZGF0YWdyb2stYXBpL2dyb2snO1xuaW1wb3J0ICogYXMgdWkgZnJvbSAnZGF0YWdyb2stYXBpL3VpJztcbmltcG9ydCAqIGFzIERHIGZyb20gJ2RhdGFncm9rLWFwaS9kZyc7XG5cbmltcG9ydCB7SVZpZXdlcn0gZnJvbSAnLi92aWV3ZXInO1xuaW1wb3J0IHtPYnNlcnZhYmxlfSBmcm9tICdyeGpzJztcbmltcG9ydCB7TWpvbG5pclBvaW50ZXJFdmVudH0gZnJvbSAnbWpvbG5pci5qcyc7XG5pbXBvcnQge1BpY2tpbmdJbmZvfSBmcm9tICdAZGVjay5nbC9jb3JlL3R5cGVkJztcblxuXG5leHBvcnQgdHlwZSBOb2RlU3R5bGVUeXBlID0geyBbcHJvcE5hbWU6IHN0cmluZ106IGFueSB9O1xuZXhwb3J0IHR5cGUgU3R5bGVzVHlwZSA9IHsgW25vZGVOYW1lOiBzdHJpbmddOiBOb2RlU3R5bGVUeXBlIH07XG5cbmV4cG9ydCBlbnVtIFRyZWVUeXBlc05hbWVzIHtcbiAgUmFkaWFsID0gJ1JhZGlhbCcsXG4gIC8qKiBSZWN0YW5ndWxhciBlZGdlcywgbGVhdmVzIGxpc3RlZCBfX3ZlcnRpY2FsbHlfXyAqL1xuICBSZWN0YW5ndWxhciA9ICdSZWN0YW5ndWxhcicsXG4gIFBvbGFyID0gJ1BvbGFyJyxcbiAgRGlhZ29uYWwgPSAnRGlhZ29uYWwnLFxuICAvKiogUmVjdGFuZ3VsYXIgZWRnZXMsIGxlYXZlcyBsaXN0ZWQgX19ob3Jpem9udGFsbHlfXyAqL1xuICBPcnRob2dvbmFsID0gJ09ydGhvZ29uYWwnLFxufVxuXG5leHBvcnQgaW50ZXJmYWNlIElQaHlsb2NhbnZhc0dsVmlld2VyIGV4dGVuZHMgSVZpZXdlciB7XG4gIGdldCBud2tEZigpOiBERy5EYXRhRnJhbWU7XG5cbiAgc2V0IG53a0RmKHZhbHVlOiBERy5EYXRhRnJhbWUpO1xuXG4gIHNldFByb3BzKHVwZGF0ZXI6IHsgW3Byb3BOYW1lOiBzdHJpbmddOiBhbnkgfSk6IHZvaWQ7XG5cbiAgZ2V0IG9uQWZ0ZXJSZW5kZXIoKTogT2JzZXJ2YWJsZTx7IGdsOiBXZWJHTFJlbmRlcmluZ0NvbnRleHQgfT47XG5cbiAgZ2V0IG9uSG92ZXIoKTogT2JzZXJ2YWJsZTx7IGluZm86IFBpY2tpbmdJbmZvLCBldmVudDogTWpvbG5pclBvaW50ZXJFdmVudCB9Pjtcbn1cblxuLy8gZXhwb3J0IGludGVyZmFjZSBJUGh5bG9jYW52YXNHbFJlbmRlcmVyIHtcbi8vICAgZ2V0IG9uQWZ0ZXJSZW5kZXIoKTogT2JzZXJ2YWJsZTxIVE1MQ2FudmFzRWxlbWVudD47XG4vLyB9XG5cbmV4cG9ydCB0eXBlIFBoeWxvY2FudmFzR2xUYXNrID0geyBuYW1lOiBzdHJpbmcsIGJhY2tDb2xvcjogbnVtYmVyLCBwcm9wczogeyBbcHJvcE5hbWU6IHN0cmluZ106IGFueSB9LCBvbkFmdGVyUmVuZGVyOiBDYW52YXNDYWxsYmFjayB9O1xuXG5leHBvcnQgdHlwZSBDYW52YXNDYWxsYmFjayA9IChjYW52YXM6IEhUTUxDYW52YXNFbGVtZW50KSA9PiB2b2lkO1xuXG5leHBvcnQgYWJzdHJhY3QgY2xhc3MgUGh5bG9jYW52YXNHbFNlcnZpY2VCYXNlIHtcbiAgcHVibGljIHN0YXRpYyBub25lU291cmNlOiB7IHR5cGU6IHN0cmluZywgZGF0YTogYW55IH0gPSB7XG4gICAgdHlwZTogJ2Jpb2pzJyxcbiAgICBkYXRhOiB7bmFtZTogJ25vbmUnLCBicmFuY2hfbGVuZ3RoOiAxLCBjaGlsZHJlbjogW119XG4gIH07XG5cbiAgLyoqIFF1ZXVlcyBQaHlsb2NhbnZhc0dMIHJlbmRlciB0YXNrXG4gICAqIEBwYXJhbSBrZXkgIFNwZWNpZnkgdG8gc2tpcCBwcmV2aW91c2x5IHF1ZXVlZCB0YXNrcyB3aXRoIHRoZSBzYW1lIGtleVxuICAgKi9cbiAgYWJzdHJhY3QgcmVuZGVyKGFyZ3M6IFBoeWxvY2FudmFzR2xUYXNrLCBrZXk/OiBrZXlvZiBhbnkpOiB2b2lkO1xuXG4gIC8qKiBEZWZhdWx0IGltcGxlbWVudGF0aW9uIG9mIHJlbmRlcmluZyB0cmVlIG9uIGdyaWQgY2VsbFxuICAgKiBAcGFyYW0gZ0N0eCAgICBDb250ZXh0IHRvIGRyYXcgb24gZ3JpZFxuICAgKiBAcGFyYW0gYmQgICAgICBCb3VuZCByZWN0IHRvIGNsaXAgZHJhd2luZyBvbiB0YXNrIG1vbWVudFxuICAgKiBAcGFyYW0gZ0NlbGwgICBHcmlkIGNlbGwgdG8gZHJhd1xuICAgKiBAcGFyYW0gY2FudmFzICBJbWFnZSBvZiB0aGUgdHJlZSByZW5kZXJlZFxuICAgKi9cbiAgYWJzdHJhY3QgcmVuZGVyT25HcmlkQ2VsbChcbiAgICBnQ3R4OiBDYW52YXNSZW5kZXJpbmdDb250ZXh0MkQsIGJkOiBERy5SZWN0LCBnQ2VsbDogREcuR3JpZENlbGwsIGNhbnZhczogQ2FudmFzSW1hZ2VTb3VyY2UpOiB2b2lkO1xufVxuXG5cbmV4cG9ydCBhc3luYyBmdW5jdGlvbiBnZXRQaHlsb2NhbnZhc0dsU2VydmljZSgpOiBQcm9taXNlPFBoeWxvY2FudmFzR2xTZXJ2aWNlQmFzZT4ge1xuICBjb25zdCBmdW5jTGlzdCA9IERHLkZ1bmMuZmluZCh7cGFja2FnZTogJ1BoeWxvVHJlZVZpZXdlcicsIG5hbWU6ICdnZXRQaHlsb2NhbnZhc0dsU2VydmljZSd9KTtcbiAgaWYgKGZ1bmNMaXN0Lmxlbmd0aCA9PT0gMClcbiAgICB0aHJvdyBuZXcgRXJyb3IoJ1BhY2thZ2UgXCJQaHlsb1RyZWVWaWV3ZXJcIlwiIG11c3QgYmUgaW5zdGFsbGVkIGZvciBQaHlsb2NhbnZhc0dMIHNlcnZpY2VzLicpO1xuXG4gIGNvbnN0IHN2YzogUGh5bG9jYW52YXNHbFNlcnZpY2VCYXNlID0gKGF3YWl0IGZ1bmNMaXN0WzBdLnByZXBhcmUoKS5jYWxsKCkpLmdldE91dHB1dFBhcmFtVmFsdWUoKSBhcyBQaHlsb2NhbnZhc0dsU2VydmljZUJhc2U7XG4gIHJldHVybiBzdmM7XG59XG4iXX0=
|