@datagrok-libraries/bio 4.4.7 → 5.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.d.ts +16 -0
- package/index.d.ts.map +1 -0
- package/index.js +14 -0
- package/index.ts +62 -0
- package/package.json +1 -1
- package/src/utils/cell-renderer.d.ts.map +1 -1
- package/src/utils/cell-renderer.js +3 -3
- package/src/utils/macromolecule.d.ts +86 -0
- package/src/utils/macromolecule.d.ts.map +1 -0
- package/src/utils/macromolecule.js +190 -0
- package/src/utils/notation-converter.d.ts +2 -2
- package/src/utils/notation-converter.d.ts.map +1 -1
- package/src/utils/notation-converter.js +4 -4
- package/src/utils/splitter.d.ts.map +1 -1
- package/src/utils/splitter.js +4 -5
- package/src/utils/units-handler.d.ts +1 -19
- package/src/utils/units-handler.d.ts.map +1 -1
- package/src/utils/units-handler.js +24 -39
- package/src/viewers/phylocanvas-gl-viewer.d.ts +7 -0
- package/src/viewers/phylocanvas-gl-viewer.d.ts.map +1 -0
- package/src/viewers/phylocanvas-gl-viewer.js +2 -0
- package/src/viewers/vd-regions-viewer.d.ts +2 -2
- package/src/viewers/vd-regions-viewer.d.ts.map +1 -1
- package/src/viewers/vd-regions-viewer.js +1 -1
- package/src/viewers/viewer.d.ts +9 -0
- package/src/viewers/viewer.d.ts.map +1 -0
- package/src/viewers/viewer.js +2 -0
- package/src/viewers/web-logo.d.ts +8 -70
- package/src/viewers/web-logo.d.ts.map +1 -1
- package/src/viewers/web-logo.js +68 -246
package/index.d.ts
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import { AminoacidsPalettes } from './src/aminoacids';
|
|
2
|
+
import { NucleotidesPalettes } from './src/nucleotides';
|
|
3
|
+
import { SeqPalette, SeqPaletteBase } from './src/seq-palettes';
|
|
4
|
+
import { UnknownSeqPalettes } from './src/unknown';
|
|
5
|
+
import { DrawStyle, printLeftOrCentered } from './src/utils/cell-renderer';
|
|
6
|
+
import { FastaFileHandler } from './src/utils/fasta-handler';
|
|
7
|
+
import { getSplitter, splitterAsFasta, getSplitterForColumn, SplitterFunc, monomerToShort, splitterAsHelm, getStats, pickUpPalette, getPaletteByType, getAlphabetSimilarity, ALPHABET, NOTATION, TAGS } from './src/utils/macromolecule';
|
|
8
|
+
import { NotationConverter } from './src/utils/notation-converter';
|
|
9
|
+
import { splitAlignedSequences } from './src/utils/splitter';
|
|
10
|
+
import { UnitsHandler } from './src/utils/units-handler';
|
|
11
|
+
import { VdRegion, VdRegionType } from './src/vd-regions';
|
|
12
|
+
import { IPhylocanvasGlViewer } from './src/viewers/phylocanvas-gl-viewer';
|
|
13
|
+
import { IVdRegionsViewer } from './src/viewers/vd-regions-viewer';
|
|
14
|
+
import { PositionInfo, PositionMonomerInfo, WebLogo } from './src/viewers/web-logo';
|
|
15
|
+
export { ALPHABET, NOTATION, TAGS, NotationConverter, SplitterFunc, getStats, getAlphabetSimilarity, getSplitter, splitterAsFasta, splitterAsHelm, getSplitterForColumn, monomerToShort, splitAlignedSequences, SeqPalette, SeqPaletteBase, AminoacidsPalettes, NucleotidesPalettes, UnknownSeqPalettes, pickUpPalette, getPaletteByType, PositionInfo, PositionMonomerInfo, WebLogo, UnitsHandler, DrawStyle, printLeftOrCentered, FastaFileHandler, VdRegionType, VdRegion, IVdRegionsViewer, IPhylocanvasGlViewer, };
|
|
16
|
+
//# sourceMappingURL=index.d.ts.map
|
package/index.d.ts.map
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAC,kBAAkB,EAAC,MAAM,kBAAkB,CAAC;AACpD,OAAO,EAAC,mBAAmB,EAAC,MAAM,mBAAmB,CAAC;AACtD,OAAO,EAAC,UAAU,EAAE,cAAc,EAAC,MAAM,oBAAoB,CAAC;AAC9D,OAAO,EAAC,kBAAkB,EAAC,MAAM,eAAe,CAAC;AACjD,OAAO,EAAC,SAAS,EAAE,mBAAmB,EAAC,MAAM,2BAA2B,CAAC;AACzE,OAAO,EAAC,gBAAgB,EAAC,MAAM,2BAA2B,CAAC;AAC3D,OAAO,EACL,WAAW,EACX,eAAe,EACf,oBAAoB,EACpB,YAAY,EACZ,cAAc,EACd,cAAc,EACd,QAAQ,EACR,aAAa,EACb,gBAAgB,EAChB,qBAAqB,EACrB,QAAQ,EACR,QAAQ,EACR,IAAI,EACL,MAAM,2BAA2B,CAAC;AACnC,OAAO,EAAC,iBAAiB,EAAC,MAAM,gCAAgC,CAAC;AACjE,OAAO,EAAC,qBAAqB,EAAC,MAAM,sBAAsB,CAAC;AAC3D,OAAO,EAAC,YAAY,EAAC,MAAM,2BAA2B,CAAC;AACvD,OAAO,EAAC,QAAQ,EAAE,YAAY,EAAC,MAAM,kBAAkB,CAAC;AACxD,OAAO,EAAC,oBAAoB,EAAC,MAAM,qCAAqC,CAAC;AACzE,OAAO,EAAC,gBAAgB,EAAC,MAAM,iCAAiC,CAAC;AACjE,OAAO,EAAC,YAAY,EAAE,mBAAmB,EAAE,OAAO,EAAC,MAAM,wBAAwB,CAAC;AAElF,OAAO,EACL,QAAQ,EACR,QAAQ,EACR,IAAI,EACJ,iBAAiB,EACjB,YAAY,EACZ,QAAQ,EACR,qBAAqB,EACrB,WAAW,EACX,eAAe,EACf,cAAc,EACd,oBAAoB,EACpB,cAAc,EACd,qBAAqB,EACrB,UAAU,EACV,cAAc,EACd,kBAAkB,EAClB,mBAAmB,EACnB,kBAAkB,EAClB,aAAa,EACb,gBAAgB,EAChB,YAAY,EACZ,mBAAmB,EACnB,OAAO,EACP,YAAY,EACZ,SAAS,EACT,mBAAmB,EACnB,gBAAgB,EAChB,YAAY,EACZ,QAAQ,EACR,gBAAgB,EAChB,oBAAoB,GACrB,CAAC"}
|
package/index.js
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
import { AminoacidsPalettes } from './src/aminoacids';
|
|
2
|
+
import { NucleotidesPalettes } from './src/nucleotides';
|
|
3
|
+
import { SeqPaletteBase } from './src/seq-palettes';
|
|
4
|
+
import { UnknownSeqPalettes } from './src/unknown';
|
|
5
|
+
import { DrawStyle, printLeftOrCentered } from './src/utils/cell-renderer';
|
|
6
|
+
import { FastaFileHandler } from './src/utils/fasta-handler';
|
|
7
|
+
import { getSplitter, splitterAsFasta, getSplitterForColumn, monomerToShort, splitterAsHelm, getStats, pickUpPalette, getPaletteByType, getAlphabetSimilarity } from './src/utils/macromolecule';
|
|
8
|
+
import { NotationConverter } from './src/utils/notation-converter';
|
|
9
|
+
import { splitAlignedSequences } from './src/utils/splitter';
|
|
10
|
+
import { UnitsHandler } from './src/utils/units-handler';
|
|
11
|
+
import { VdRegion, VdRegionType } from './src/vd-regions';
|
|
12
|
+
import { PositionInfo, PositionMonomerInfo, WebLogo } from './src/viewers/web-logo';
|
|
13
|
+
export { NotationConverter, getStats, getAlphabetSimilarity, getSplitter, splitterAsFasta, splitterAsHelm, getSplitterForColumn, monomerToShort, splitAlignedSequences, SeqPaletteBase, AminoacidsPalettes, NucleotidesPalettes, UnknownSeqPalettes, pickUpPalette, getPaletteByType, PositionInfo, PositionMonomerInfo, WebLogo, UnitsHandler, DrawStyle, printLeftOrCentered, FastaFileHandler, VdRegionType, VdRegion, };
|
|
14
|
+
//# sourceMappingURL=data:application/json;base64,eyJ2ZXJzaW9uIjozLCJmaWxlIjoiaW5kZXguanMiLCJzb3VyY2VSb290IjoiIiwic291cmNlcyI6WyJpbmRleC50cyJdLCJuYW1lcyI6W10sIm1hcHBpbmdzIjoiQUFBQSxPQUFPLEVBQUMsa0JBQWtCLEVBQUMsTUFBTSxrQkFBa0IsQ0FBQztBQUNwRCxPQUFPLEVBQUMsbUJBQW1CLEVBQUMsTUFBTSxtQkFBbUIsQ0FBQztBQUN0RCxPQUFPLEVBQWEsY0FBYyxFQUFDLE1BQU0sb0JBQW9CLENBQUM7QUFDOUQsT0FBTyxFQUFDLGtCQUFrQixFQUFDLE1BQU0sZUFBZSxDQUFDO0FBQ2pELE9BQU8sRUFBQyxTQUFTLEVBQUUsbUJBQW1CLEVBQUMsTUFBTSwyQkFBMkIsQ0FBQztBQUN6RSxPQUFPLEVBQUMsZ0JBQWdCLEVBQUMsTUFBTSwyQkFBMkIsQ0FBQztBQUMzRCxPQUFPLEVBQ0wsV0FBVyxFQUNYLGVBQWUsRUFDZixvQkFBb0IsRUFFcEIsY0FBYyxFQUNkLGNBQWMsRUFDZCxRQUFRLEVBQ1IsYUFBYSxFQUNiLGdCQUFnQixFQUNoQixxQkFBcUIsRUFJdEIsTUFBTSwyQkFBMkIsQ0FBQztBQUNuQyxPQUFPLEVBQUMsaUJBQWlCLEVBQUMsTUFBTSxnQ0FBZ0MsQ0FBQztBQUNqRSxPQUFPLEVBQUMscUJBQXFCLEVBQUMsTUFBTSxzQkFBc0IsQ0FBQztBQUMzRCxPQUFPLEVBQUMsWUFBWSxFQUFDLE1BQU0sMkJBQTJCLENBQUM7QUFDdkQsT0FBTyxFQUFDLFFBQVEsRUFBRSxZQUFZLEVBQUMsTUFBTSxrQkFBa0IsQ0FBQztBQUd4RCxPQUFPLEVBQUMsWUFBWSxFQUFFLG1CQUFtQixFQUFFLE9BQU8sRUFBQyxNQUFNLHdCQUF3QixDQUFDO0FBRWxGLE9BQU8sRUFJTCxpQkFBaUIsRUFFakIsUUFBUSxFQUNSLHFCQUFxQixFQUNyQixXQUFXLEVBQ1gsZUFBZSxFQUNmLGNBQWMsRUFDZCxvQkFBb0IsRUFDcEIsY0FBYyxFQUNkLHFCQUFxQixFQUVyQixjQUFjLEVBQ2Qsa0JBQWtCLEVBQ2xCLG1CQUFtQixFQUNuQixrQkFBa0IsRUFDbEIsYUFBYSxFQUNiLGdCQUFnQixFQUNoQixZQUFZLEVBQ1osbUJBQW1CLEVBQ25CLE9BQU8sRUFDUCxZQUFZLEVBQ1osU0FBUyxFQUNULG1CQUFtQixFQUNuQixnQkFBZ0IsRUFDaEIsWUFBWSxFQUNaLFFBQVEsR0FHVCxDQUFDIiwic291cmNlc0NvbnRlbnQiOlsiaW1wb3J0IHtBbWlub2FjaWRzUGFsZXR0ZXN9IGZyb20gJy4vc3JjL2FtaW5vYWNpZHMnO1xuaW1wb3J0IHtOdWNsZW90aWRlc1BhbGV0dGVzfSBmcm9tICcuL3NyYy9udWNsZW90aWRlcyc7XG5pbXBvcnQge1NlcVBhbGV0dGUsIFNlcVBhbGV0dGVCYXNlfSBmcm9tICcuL3NyYy9zZXEtcGFsZXR0ZXMnO1xuaW1wb3J0IHtVbmtub3duU2VxUGFsZXR0ZXN9IGZyb20gJy4vc3JjL3Vua25vd24nO1xuaW1wb3J0IHtEcmF3U3R5bGUsIHByaW50TGVmdE9yQ2VudGVyZWR9IGZyb20gJy4vc3JjL3V0aWxzL2NlbGwtcmVuZGVyZXInO1xuaW1wb3J0IHtGYXN0YUZpbGVIYW5kbGVyfSBmcm9tICcuL3NyYy91dGlscy9mYXN0YS1oYW5kbGVyJztcbmltcG9ydCB7XG4gIGdldFNwbGl0dGVyLFxuICBzcGxpdHRlckFzRmFzdGEsXG4gIGdldFNwbGl0dGVyRm9yQ29sdW1uLFxuICBTcGxpdHRlckZ1bmMsXG4gIG1vbm9tZXJUb1Nob3J0LFxuICBzcGxpdHRlckFzSGVsbSxcbiAgZ2V0U3RhdHMsXG4gIHBpY2tVcFBhbGV0dGUsXG4gIGdldFBhbGV0dGVCeVR5cGUsXG4gIGdldEFscGhhYmV0U2ltaWxhcml0eSxcbiAgQUxQSEFCRVQsXG4gIE5PVEFUSU9OLFxuICBUQUdTXG59IGZyb20gJy4vc3JjL3V0aWxzL21hY3JvbW9sZWN1bGUnO1xuaW1wb3J0IHtOb3RhdGlvbkNvbnZlcnRlcn0gZnJvbSAnLi9zcmMvdXRpbHMvbm90YXRpb24tY29udmVydGVyJztcbmltcG9ydCB7c3BsaXRBbGlnbmVkU2VxdWVuY2VzfSBmcm9tICcuL3NyYy91dGlscy9zcGxpdHRlcic7XG5pbXBvcnQge1VuaXRzSGFuZGxlcn0gZnJvbSAnLi9zcmMvdXRpbHMvdW5pdHMtaGFuZGxlcic7XG5pbXBvcnQge1ZkUmVnaW9uLCBWZFJlZ2lvblR5cGV9IGZyb20gJy4vc3JjL3ZkLXJlZ2lvbnMnO1xuaW1wb3J0IHtJUGh5bG9jYW52YXNHbFZpZXdlcn0gZnJvbSAnLi9zcmMvdmlld2Vycy9waHlsb2NhbnZhcy1nbC12aWV3ZXInO1xuaW1wb3J0IHtJVmRSZWdpb25zVmlld2VyfSBmcm9tICcuL3NyYy92aWV3ZXJzL3ZkLXJlZ2lvbnMtdmlld2VyJztcbmltcG9ydCB7UG9zaXRpb25JbmZvLCBQb3NpdGlvbk1vbm9tZXJJbmZvLCBXZWJMb2dvfSBmcm9tICcuL3NyYy92aWV3ZXJzL3dlYi1sb2dvJztcblxuZXhwb3J0IHtcbiAgQUxQSEFCRVQsXG4gIE5PVEFUSU9OLFxuICBUQUdTLFxuICBOb3RhdGlvbkNvbnZlcnRlcixcbiAgU3BsaXR0ZXJGdW5jLFxuICBnZXRTdGF0cyxcbiAgZ2V0QWxwaGFiZXRTaW1pbGFyaXR5LFxuICBnZXRTcGxpdHRlcixcbiAgc3BsaXR0ZXJBc0Zhc3RhLFxuICBzcGxpdHRlckFzSGVsbSxcbiAgZ2V0U3BsaXR0ZXJGb3JDb2x1bW4sXG4gIG1vbm9tZXJUb1Nob3J0LFxuICBzcGxpdEFsaWduZWRTZXF1ZW5jZXMsXG4gIFNlcVBhbGV0dGUsXG4gIFNlcVBhbGV0dGVCYXNlLFxuICBBbWlub2FjaWRzUGFsZXR0ZXMsXG4gIE51Y2xlb3RpZGVzUGFsZXR0ZXMsXG4gIFVua25vd25TZXFQYWxldHRlcyxcbiAgcGlja1VwUGFsZXR0ZSxcbiAgZ2V0UGFsZXR0ZUJ5VHlwZSxcbiAgUG9zaXRpb25JbmZvLFxuICBQb3NpdGlvbk1vbm9tZXJJbmZvLFxuICBXZWJMb2dvLFxuICBVbml0c0hhbmRsZXIsXG4gIERyYXdTdHlsZSxcbiAgcHJpbnRMZWZ0T3JDZW50ZXJlZCxcbiAgRmFzdGFGaWxlSGFuZGxlcixcbiAgVmRSZWdpb25UeXBlLFxuICBWZFJlZ2lvbixcbiAgSVZkUmVnaW9uc1ZpZXdlcixcbiAgSVBoeWxvY2FudmFzR2xWaWV3ZXIsXG59OyJdfQ==
|
package/index.ts
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
import {AminoacidsPalettes} from './src/aminoacids';
|
|
2
|
+
import {NucleotidesPalettes} from './src/nucleotides';
|
|
3
|
+
import {SeqPalette, SeqPaletteBase} from './src/seq-palettes';
|
|
4
|
+
import {UnknownSeqPalettes} from './src/unknown';
|
|
5
|
+
import {DrawStyle, printLeftOrCentered} from './src/utils/cell-renderer';
|
|
6
|
+
import {FastaFileHandler} from './src/utils/fasta-handler';
|
|
7
|
+
import {
|
|
8
|
+
getSplitter,
|
|
9
|
+
splitterAsFasta,
|
|
10
|
+
getSplitterForColumn,
|
|
11
|
+
SplitterFunc,
|
|
12
|
+
monomerToShort,
|
|
13
|
+
splitterAsHelm,
|
|
14
|
+
getStats,
|
|
15
|
+
pickUpPalette,
|
|
16
|
+
getPaletteByType,
|
|
17
|
+
getAlphabetSimilarity,
|
|
18
|
+
ALPHABET,
|
|
19
|
+
NOTATION,
|
|
20
|
+
TAGS
|
|
21
|
+
} from './src/utils/macromolecule';
|
|
22
|
+
import {NotationConverter} from './src/utils/notation-converter';
|
|
23
|
+
import {splitAlignedSequences} from './src/utils/splitter';
|
|
24
|
+
import {UnitsHandler} from './src/utils/units-handler';
|
|
25
|
+
import {VdRegion, VdRegionType} from './src/vd-regions';
|
|
26
|
+
import {IPhylocanvasGlViewer} from './src/viewers/phylocanvas-gl-viewer';
|
|
27
|
+
import {IVdRegionsViewer} from './src/viewers/vd-regions-viewer';
|
|
28
|
+
import {PositionInfo, PositionMonomerInfo, WebLogo} from './src/viewers/web-logo';
|
|
29
|
+
|
|
30
|
+
export {
|
|
31
|
+
ALPHABET,
|
|
32
|
+
NOTATION,
|
|
33
|
+
TAGS,
|
|
34
|
+
NotationConverter,
|
|
35
|
+
SplitterFunc,
|
|
36
|
+
getStats,
|
|
37
|
+
getAlphabetSimilarity,
|
|
38
|
+
getSplitter,
|
|
39
|
+
splitterAsFasta,
|
|
40
|
+
splitterAsHelm,
|
|
41
|
+
getSplitterForColumn,
|
|
42
|
+
monomerToShort,
|
|
43
|
+
splitAlignedSequences,
|
|
44
|
+
SeqPalette,
|
|
45
|
+
SeqPaletteBase,
|
|
46
|
+
AminoacidsPalettes,
|
|
47
|
+
NucleotidesPalettes,
|
|
48
|
+
UnknownSeqPalettes,
|
|
49
|
+
pickUpPalette,
|
|
50
|
+
getPaletteByType,
|
|
51
|
+
PositionInfo,
|
|
52
|
+
PositionMonomerInfo,
|
|
53
|
+
WebLogo,
|
|
54
|
+
UnitsHandler,
|
|
55
|
+
DrawStyle,
|
|
56
|
+
printLeftOrCentered,
|
|
57
|
+
FastaFileHandler,
|
|
58
|
+
VdRegionType,
|
|
59
|
+
VdRegion,
|
|
60
|
+
IVdRegionsViewer,
|
|
61
|
+
IPhylocanvasGlViewer,
|
|
62
|
+
};
|
package/package.json
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"cell-renderer.d.ts","sourceRoot":"","sources":["cell-renderer.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,MAAM,iBAAiB,CAAC;
|
|
1
|
+
{"version":3,"file":"cell-renderer.d.ts","sourceRoot":"","sources":["cell-renderer.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,MAAM,iBAAiB,CAAC;AAStC,oBAAY,SAAS;IACnB,GAAG,QAAQ;IACX,OAAO,YAAY;CACpB;AAED;;;;;;;;;;;;;;;;;;;;;;GAsBG;AACH,wBAAgB,mBAAmB,CACjC,CAAC,EAAE,MAAM,EAAE,CAAC,EAAE,MAAM,EAAE,CAAC,EAAE,MAAM,EAAE,CAAC,EAAE,MAAM,EAC1C,CAAC,EAAE,wBAAwB,EAAE,CAAC,EAAE,MAAM,EAAE,KAAK,GAAE,MAAuB,EACtE,KAAK,GAAE,MAAU,EAAE,IAAI,UAAQ,EAAE,gBAAgB,GAAE,MAAY,EAC/D,SAAS,GAAE,MAAW,EAAE,IAAI,GAAE,OAAe,EAAE,SAAS,GAAE,SAA6B,EAAE,OAAO,GAAE;IAAE,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM,CAAA;CAAO,EAAE,OAAO,GAAE,MAAU,EAAE,QAAQ,GAAE,EAAE,CAAC,QAAQ,GAAG,IAAW,EAAE,iBAAiB,GAAE,MAAM,EAAO,EAAE,kBAAkB,GAAE,MAAM,GAAG,IAAW,GAAG,MAAM,CA0DlR"}
|
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { monomerToShort } from './macromolecule';
|
|
2
2
|
const undefinedColor = 'rgb(100,100,100)';
|
|
3
3
|
const grayColor = '#808080';
|
|
4
4
|
const blackColor = 'rgb(0,0,0)';
|
|
5
|
-
const monomerToShortFunction =
|
|
5
|
+
const monomerToShortFunction = monomerToShort;
|
|
6
6
|
export var DrawStyle;
|
|
7
7
|
(function (DrawStyle) {
|
|
8
8
|
DrawStyle["MSA"] = "MSA";
|
|
@@ -86,4 +86,4 @@ export function printLeftOrCentered(x, y, w, h, g, s, color = undefinedColor, pi
|
|
|
86
86
|
return x + dx + maxColorTextSize;
|
|
87
87
|
}
|
|
88
88
|
}
|
|
89
|
-
//# sourceMappingURL=data:application/json;base64,
|
|
89
|
+
//# sourceMappingURL=data:application/json;base64,{"version":3,"file":"cell-renderer.js","sourceRoot":"","sources":["cell-renderer.ts"],"names":[],"mappings":"AAEA,OAAO,EAAC,cAAc,EAAC,MAAM,iBAAiB,CAAC;AAE/C,MAAM,cAAc,GAAG,kBAAkB,CAAC;AAC1C,MAAM,SAAS,GAAG,SAAS,CAAC;AAC5B,MAAM,UAAU,GAAG,YAAY,CAAC;AAChC,MAAM,sBAAsB,GAA0D,cAAc,CAAC;AAErG,MAAM,CAAN,IAAY,SAGX;AAHD,WAAY,SAAS;IACnB,wBAAW,CAAA;IACX,gCAAmB,CAAA;AACrB,CAAC,EAHW,SAAS,KAAT,SAAS,QAGpB;AAED;;;;;;;;;;;;;;;;;;;;;;GAsBG;AACH,MAAM,UAAU,mBAAmB,CACjC,CAAS,EAAE,CAAS,EAAE,CAAS,EAAE,CAAS,EAC1C,CAA2B,EAAE,CAAS,EAAE,QAAgB,cAAc,EACtE,QAAgB,CAAC,EAAE,IAAI,GAAG,KAAK,EAAE,mBAA2B,GAAG,EAC/D,YAAoB,EAAE,EAAE,OAAgB,KAAK,EAAE,YAAuB,SAAS,CAAC,OAAO,EAAE,UAAuC,EAAE,EAAE,UAAkB,CAAC,EAAE,WAA+B,IAAI,EAAE,oBAA8B,EAAE,EAAE,qBAAoC,IAAI;;IACxQ,CAAC,CAAC,SAAS,GAAG,OAAO,CAAC;IACtB,IAAI,SAAS,GAAG,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC;IAC/B,IAAI,QAAQ,GAAG,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC;IACrC,IAAI,SAAS,KAAK,SAAS,CAAC,GAAG,EAAE;QAC/B,QAAQ,GAAG,EAAE,CAAC;KACf;IACD,IAAI,SAAS,GAAG,IAAI,CAAC;IACrB,IAAI,kBAAkB,GAAG,IAAI,CAAC;IAC9B,IAAI,QAAQ,IAAI,IAAI,EAAE;QACpB,SAAS,GAAG,CAAC,CAAA,MAAA,QAAQ,CAAC,IAAI,CAAC,MAAM,0CAAE,IAAI,CAAC,YAAY,CAAC,KAAI,IAAI,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;QAChH,kBAAkB,GAAG,CAAC,CAAA,MAAA,QAAQ,CAAC,IAAI,CAAC,MAAM,0CAAE,IAAI,CAAC,sBAAsB,CAAC,KAAI,IAAI,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,sBAAsB,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;KAC9I;IACD,MAAM,cAAc,GAAW,iBAAiB,CAAC,OAAO,CAAC,CAAC;IAC1D,IAAI,kBAAkB,IAAI,CAAC,iBAAiB,CAAC,MAAM,GAAG,CAAC,CAAC,EAAE;QACxD,gBAAgB,GAAG,CAAC,SAAS,IAAI,cAAc,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,gBAAgB,CAAC;KAC3E;IACD,IAAI,kBAAkB,IAAI,IAAI,EAAE;QAC9B,SAAS,GAAG,sBAAsB,CAAC,SAAS,EAAE,kBAAkB,CAAC,CAAC;KACnE;IAGD,IAAI,QAAQ,GAAQ,CAAC,CAAC,WAAW,CAAC,SAAS,GAAG,QAAQ,CAAC,CAAC;IACxD,MAAM,MAAM,GAAG,CAAC,CAAC;IAEjB,IAAI,gBAAgB,GAAG,CAAC,CAAC,WAAW,CAAC,SAAS,CAAC,CAAC,KAAK,CAAC;IACtD,IAAI,aAAa,GAAG,CAAC,CAAC,WAAW,CAAC,SAAS,CAAC,CAAC,KAAK,CAAC;IACnD,MAAM,EAAE,GAAG,CAAC,QAAQ,CAAC,qBAAqB,GAAG,QAAQ,CAAC,sBAAsB,CAAC,GAAG,CAAC,CAAC;IAClF,QAAQ,GAAG,QAAQ,CAAC,KAAK,CAAC;IAC1B,IAAI,SAAS,KAAK,SAAS,CAAC,GAAG,EAAE;QAC/B,gBAAgB,GAAG,OAAO,CAAC,OAAO,CAAC,CAAC;QACpC,QAAQ,GAAG,OAAO,CAAC,OAAO,CAAC,CAAC;KAC7B;IAED,SAAS,IAAI,CAAC,GAAW,EAAE,GAAW;QACpC,MAAM,SAAS,GAAG,SAAS,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,UAAU,CAAC;QACjD,CAAC,CAAC,SAAS,GAAG,SAAS,CAAC;QACxB,CAAC,CAAC,WAAW,GAAG,gBAAgB,CAAC;QACjC,IAAI,SAAS,KAAK,SAAS,CAAC,OAAO,EAAE;YACnC,CAAC,CAAC,QAAQ,CAAC,SAAS,EAAE,CAAC,GAAG,GAAG,EAAE,CAAC,GAAG,EAAE,CAAC,CAAC;YACvC,CAAC,CAAC,SAAS,GAAG,SAAS,CAAC;YACxB,CAAC,CAAC,QAAQ,CAAC,QAAQ,EAAE,CAAC,GAAG,GAAG,EAAE,CAAC,GAAG,EAAE,CAAC,CAAC;SACvC;QACD,IAAI,SAAS,KAAK,SAAS,CAAC,GAAG,EAAE;YAC/B,CAAC,CAAC,SAAS,GAAG,SAAS,CAAC;YACxB,CAAC,CAAC,QAAQ,CAAC,SAAS,EAAE,CAAC,GAAG,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,GAAG,aAAa,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC,GAAG,EAAE,CAAC,CAAC;SACnF;IACH,CAAC;IAED,IAAI,IAAI,IAAI,QAAQ,GAAG,CAAC,EAAE;QACxB,IAAI,CAAC,MAAM,EAAE,MAAM,GAAG,gBAAgB,CAAC,CAAC;QACxC,OAAO,CAAC,GAAG,gBAAgB,GAAG,CAAC,CAAC,WAAW,CAAC,QAAQ,CAAC,CAAC,KAAK,CAAC;KAE7D;SAAM;QACL,MAAM,EAAE,GAAG,CAAC,CAAC,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC;QAC9B,IAAI,CAAC,EAAE,EAAE,EAAE,GAAG,gBAAgB,CAAC,CAAC;QAChC,OAAO,CAAC,GAAG,EAAE,GAAG,gBAAgB,CAAC;KAClC;AACH,CAAC","sourcesContent":["import * as DG from 'datagrok-api/dg';\n\nimport {monomerToShort} from './macromolecule';\n\nconst undefinedColor = 'rgb(100,100,100)';\nconst grayColor = '#808080';\nconst blackColor = 'rgb(0,0,0)';\nconst monomerToShortFunction: (amino: string, maxLengthOfMonomer: number) => string = monomerToShort;\n\nexport enum DrawStyle {\n  MSA = 'MSA',\n  classic = 'classic',\n}\n\n/**\n * A function that prints a string aligned to left or centered.\n *\n * @param {number} x x coordinate.\n * @param {number} y y coordinate.\n * @param {number} w Width.\n * @param {number} h Height.\n * @param {CanvasRenderingContext2D} g Canvas rendering context.\n * @param {string} s String to print.\n * @param {string} [color=undefinedColor] String color.\n * @param {number} [pivot=0] Pirvot.\n * @param {boolean} [left=false] Is left aligned.\n * @param {number} [transparencyRate=0.0] Transparency rate where 1.0 is fully transparent\n * @param {string} [separator=''] Is separator for sequence.\n * @param {boolean} [last=false] Is checker if element last or not.\n * @param drawStyle Is draw style. MSA - for multicharSeq, classic - for other seq.\n * @param maxWord Is array of max words for each line.\n * @param wordIdx Is index of word we currently draw.\n * @param gridCell Is grid cell.\n * @param referenceSequence Is reference sequence for diff mode.\n * @param maxLengthOfMonomer Is max length of monomer.\n * @return {number} x coordinate to start printing at.\n */\nexport function printLeftOrCentered(\n  x: number, y: number, w: number, h: number,\n  g: CanvasRenderingContext2D, s: string, color: string = undefinedColor,\n  pivot: number = 0, left = false, transparencyRate: number = 1.0,\n  separator: string = '', last: boolean = false, drawStyle: DrawStyle = DrawStyle.classic, maxWord: { [index: string]: number } = {}, wordIdx: number = 0, gridCell: DG.GridCell | null = null, referenceSequence: string[] = [], maxLengthOfMonomer: number | null = null): number {\n  g.textAlign = 'start';\n  let colorPart = s.substring(0);\n  let grayPart = last ? '' : separator;\n  if (drawStyle === DrawStyle.MSA) {\n    grayPart = '';\n  }\n  let colorCode = true;\n  let compareWithCurrent = true;\n  if (gridCell != null) {\n    colorCode = (gridCell.cell.column?.temp['color-code'] != null) ? gridCell.cell.column.temp['color-code'] : true;\n    compareWithCurrent = (gridCell.cell.column?.temp['compare-with-current'] != null) ? gridCell.cell.column.temp['compare-with-current'] : true;\n  }\n  const currentMonomer: string = referenceSequence[wordIdx];\n  if (compareWithCurrent && (referenceSequence.length > 0)) {\n    transparencyRate = (colorPart == currentMonomer) ? 0.3 : transparencyRate;\n  }\n  if (maxLengthOfMonomer != null) {\n    colorPart = monomerToShortFunction(colorPart, maxLengthOfMonomer);\n  }\n\n\n  let textSize: any = g.measureText(colorPart + grayPart);\n  const indent = 5;\n\n  let maxColorTextSize = g.measureText(colorPart).width;\n  let colorTextSize = g.measureText(colorPart).width;\n  const dy = (textSize.fontBoundingBoxAscent + textSize.fontBoundingBoxDescent) / 2;\n  textSize = textSize.width;\n  if (drawStyle === DrawStyle.MSA) {\n    maxColorTextSize = maxWord[wordIdx];\n    textSize = maxWord[wordIdx];\n  }\n\n  function draw(dx1: number, dx2: number): void {\n    const drawColor = colorCode ? color : blackColor;\n    g.fillStyle = drawColor;\n    g.globalAlpha = transparencyRate;\n    if (drawStyle === DrawStyle.classic) {\n      g.fillText(colorPart, x + dx1, y + dy);\n      g.fillStyle = grayColor;\n      g.fillText(grayPart, x + dx2, y + dy);\n    }\n    if (drawStyle === DrawStyle.MSA) {\n      g.fillStyle = drawColor;\n      g.fillText(colorPart, x + dx1 + ((maxWord[wordIdx] - colorTextSize) / 2), y + dy);\n    }\n  }\n\n  if (left || textSize > w) {\n    draw(indent, indent + maxColorTextSize);\n    return x + maxColorTextSize + g.measureText(grayPart).width;\n\n  } else {\n    const dx = (w - textSize) / 2;\n    draw(dx, dx + maxColorTextSize);\n    return x + dx + maxColorTextSize;\n  }\n}\n\n"]}
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
import * as DG from 'datagrok-api/dg';
|
|
2
|
+
/** Stats of sequences with specified splitter func, returns { freq, sameLength }.
|
|
3
|
+
* @param {DG.Column} seqCol
|
|
4
|
+
* @param {number} minLength
|
|
5
|
+
* @param {SplitterFunc} splitter
|
|
6
|
+
* @return { SeqColStats }, sameLength: boolean } stats of column sequences
|
|
7
|
+
*/
|
|
8
|
+
import { SeqPalette } from '../seq-palettes';
|
|
9
|
+
import * as bio from '../../index';
|
|
10
|
+
/** enum type to simplify setting "user-friendly" notation if necessary */
|
|
11
|
+
export declare const enum NOTATION {
|
|
12
|
+
FASTA = "fasta",
|
|
13
|
+
SEPARATOR = "separator",
|
|
14
|
+
HELM = "helm"
|
|
15
|
+
}
|
|
16
|
+
export declare const enum ALPHABET {
|
|
17
|
+
DNA = "DNA",
|
|
18
|
+
RNA = "RNA",
|
|
19
|
+
PT = "PT",
|
|
20
|
+
UN = "UN"
|
|
21
|
+
}
|
|
22
|
+
export declare const enum TAGS {
|
|
23
|
+
aligned = "aligned",
|
|
24
|
+
alphabet = "alphabet",
|
|
25
|
+
alphabetSize = ".alphabetSize",
|
|
26
|
+
alphabetIsMultichar = ".alphabetIsMultichar",
|
|
27
|
+
separator = "separator"
|
|
28
|
+
}
|
|
29
|
+
export declare type SeqColStats = {
|
|
30
|
+
freq: MonomerFreqs;
|
|
31
|
+
sameLength: boolean;
|
|
32
|
+
};
|
|
33
|
+
export declare type SplitterFunc = (seq: string) => string[];
|
|
34
|
+
export declare type MonomerFreqs = {
|
|
35
|
+
[m: string]: number;
|
|
36
|
+
};
|
|
37
|
+
export declare function getStats(seqCol: DG.Column, minLength: number, splitter: SplitterFunc): SeqColStats;
|
|
38
|
+
export declare const monomerRe: RegExp;
|
|
39
|
+
/** Split sequence for single character monomers, square brackets multichar monomer names or gap symbol.
|
|
40
|
+
* @param {any} seq object with sequence
|
|
41
|
+
* @return {string[]} array of monomers
|
|
42
|
+
*/
|
|
43
|
+
export declare function splitterAsFasta(seq: any): string[];
|
|
44
|
+
/** Gets method to split sequence by separator
|
|
45
|
+
* @param {string} separator
|
|
46
|
+
* @param limit
|
|
47
|
+
* @return {SplitterFunc}
|
|
48
|
+
*/
|
|
49
|
+
export declare function getSplitterWithSeparator(separator: string, limit?: number | undefined): SplitterFunc;
|
|
50
|
+
/** Splits Helm string to monomers, but does not replace monomer names to other notation (e.g. for RNA).
|
|
51
|
+
* Only for linear polymers, does not split RNA for ribose and phosphate monomers.
|
|
52
|
+
* @param {string} seq Source string of HELM notation
|
|
53
|
+
* @return {string[]}
|
|
54
|
+
*/
|
|
55
|
+
export declare function splitterAsHelm(seq: any): string[];
|
|
56
|
+
/** Get splitter method to split sequences to monomers
|
|
57
|
+
* @param {string} units
|
|
58
|
+
* @param {string} separator
|
|
59
|
+
* @param limit
|
|
60
|
+
* @return {SplitterFunc}
|
|
61
|
+
*/
|
|
62
|
+
export declare function getSplitter(units: string, separator: string, limit?: number | undefined): SplitterFunc;
|
|
63
|
+
/** Generate splitter function for sequence column
|
|
64
|
+
* @param {DG.Column} col
|
|
65
|
+
* @return {SplitterFunc} Splitter function
|
|
66
|
+
*/
|
|
67
|
+
export declare function getSplitterForColumn(col: DG.Column): SplitterFunc;
|
|
68
|
+
/** Convert long monomer names to short ones */
|
|
69
|
+
export declare function monomerToShort(amino: string, maxLengthOfMonomer: number): string;
|
|
70
|
+
/** Calculate similarity in current sequence and alphabet.
|
|
71
|
+
* @param {MonomerFreqs} freq
|
|
72
|
+
* @param {Set<string>} alphabet
|
|
73
|
+
* @param {string} gapSymbol
|
|
74
|
+
* @return {number} Cosine similarity
|
|
75
|
+
*/
|
|
76
|
+
export declare function getAlphabetSimilarity(freq: MonomerFreqs, alphabet: Set<string>, gapSymbol?: string): number;
|
|
77
|
+
export declare function detectAlphabet(stats: SeqColStats): string;
|
|
78
|
+
/** Selects a suitable palette based on column data
|
|
79
|
+
* @param {DG.Column} seqCol Column to look for a palette
|
|
80
|
+
* @param {number} minLength minimum length of sequence to detect palette (empty strings are allowed)
|
|
81
|
+
* @return {SeqPalette} Palette corresponding to the alphabet of the sequences in the column
|
|
82
|
+
*/
|
|
83
|
+
export declare function pickUpPalette(seqCol: DG.Column, minLength?: number): SeqPalette;
|
|
84
|
+
export declare function getPaletteByType(paletteType: string): bio.SeqPalette;
|
|
85
|
+
export declare function pickUpSeqCol(df: DG.DataFrame): DG.Column | null;
|
|
86
|
+
//# sourceMappingURL=macromolecule.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"macromolecule.d.ts","sourceRoot":"","sources":["macromolecule.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,MAAM,iBAAiB,CAAC;AAKtC;;;;;GAKG;AACH,OAAO,EAAC,UAAU,EAAC,MAAM,iBAAiB,CAAC;AAM3C,OAAO,KAAK,GAAG,MAAM,aAAa,CAAC;AAEnC,0EAA0E;AAC1E,0BAAkB,QAAQ;IACxB,KAAK,UAAU;IACf,SAAS,cAAc;IACvB,IAAI,SAAS;CACd;AAED,0BAAkB,QAAQ;IACxB,GAAG,QAAQ;IACX,GAAG,QAAQ;IACX,EAAE,OAAO;IACT,EAAE,OAAO;CACV;AAED,0BAAkB,IAAI;IACpB,OAAO,YAAY;IACnB,QAAQ,aAAa;IACrB,YAAY,kBAAkB;IAC9B,mBAAmB,yBAAyB;IAC5C,SAAS,cAAc;CACxB;AAED,oBAAY,WAAW,GAAG;IAAE,IAAI,EAAE,YAAY,CAAC;IAAC,UAAU,EAAE,OAAO,CAAA;CAAE,CAAA;AACrE,oBAAY,YAAY,GAAG,CAAC,GAAG,EAAE,MAAM,KAAK,MAAM,EAAE,CAAC;AACrD,oBAAY,YAAY,GAAG;IAAE,CAAC,CAAC,EAAE,MAAM,GAAG,MAAM,CAAA;CAAE,CAAC;AAEnD,wBAAgB,QAAQ,CAAC,MAAM,EAAE,EAAE,CAAC,MAAM,EAAE,SAAS,EAAE,MAAM,EAAE,QAAQ,EAAE,YAAY,GAAG,WAAW,CAsBlG;AAED,eAAO,MAAM,SAAS,EAAE,MAA8B,CAAC;AAEvD;;;GAGG;AACH,wBAAgB,eAAe,CAAC,GAAG,EAAE,GAAG,GAAG,MAAM,EAAE,CAYlD;AAED;;;;GAIG;AACH,wBAAgB,wBAAwB,CAAC,SAAS,EAAE,MAAM,EAAE,KAAK,GAAE,MAAM,GAAG,SAAqB,GAAG,YAAY,CAI/G;AAMD;;;;GAIG;AACH,wBAAgB,cAAc,CAAC,GAAG,EAAE,GAAG,GAAG,MAAM,EAAE,CAgBjD;AAED;;;;;GAKG;AACH,wBAAgB,WAAW,CAAC,KAAK,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,EAAE,KAAK,GAAE,MAAM,GAAG,SAAqB,GAAG,YAAY,CAWjH;AAED;;;GAGG;AACH,wBAAgB,oBAAoB,CAAC,GAAG,EAAE,EAAE,CAAC,MAAM,GAAG,YAAY,CAOjE;AAID,+CAA+C;AAC/C,wBAAgB,cAAc,CAAC,KAAK,EAAE,MAAM,EAAE,kBAAkB,EAAE,MAAM,GAAG,MAAM,CAKhF;AAED;;;;;GAKG;AACH,wBAAgB,qBAAqB,CAAC,IAAI,EAAE,YAAY,EAAE,QAAQ,EAAE,GAAG,CAAC,MAAM,CAAC,EAAE,SAAS,GAAE,MAAY,GAAG,MAAM,CAchH;AAED,wBAAgB,cAAc,CAAC,KAAK,EAAE,WAAW,GAAG,MAAM,CAazD;AAED;;;;GAIG;AACH,wBAAgB,aAAa,CAAC,MAAM,EAAE,EAAE,CAAC,MAAM,EAAE,SAAS,GAAE,MAAU,GAAG,UAAU,CAYlF;AAED,wBAAgB,gBAAgB,CAAC,WAAW,EAAE,MAAM,GAAG,GAAG,CAAC,UAAU,CAYpE;AAED,wBAAgB,YAAY,CAAC,EAAE,EAAE,EAAE,CAAC,SAAS,GAAG,EAAE,CAAC,MAAM,GAAG,IAAI,CAS/D"}
|
|
@@ -0,0 +1,190 @@
|
|
|
1
|
+
import * as DG from 'datagrok-api/dg';
|
|
2
|
+
import { Vector } from '@datagrok-libraries/utils/src/type-declarations';
|
|
3
|
+
import { vectorLength, vectorDotProduct } from '@datagrok-libraries/utils/src/vector-operations';
|
|
4
|
+
import wu from 'wu';
|
|
5
|
+
import { UnitsHandler } from '../utils/units-handler';
|
|
6
|
+
import * as bio from '../../index';
|
|
7
|
+
;
|
|
8
|
+
export function getStats(seqCol, minLength, splitter) {
|
|
9
|
+
const freq = {};
|
|
10
|
+
let sameLength = true;
|
|
11
|
+
let firstLength = null;
|
|
12
|
+
for (const seq of seqCol.categories) {
|
|
13
|
+
const mSeq = splitter(seq);
|
|
14
|
+
if (firstLength == null)
|
|
15
|
+
firstLength = mSeq.length;
|
|
16
|
+
else if (mSeq.length !== firstLength)
|
|
17
|
+
sameLength = false;
|
|
18
|
+
if (mSeq.length > minLength) {
|
|
19
|
+
for (const m of mSeq) {
|
|
20
|
+
if (!(m in freq))
|
|
21
|
+
freq[m] = 0;
|
|
22
|
+
freq[m] += 1;
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
return { freq: freq, sameLength: sameLength };
|
|
27
|
+
}
|
|
28
|
+
export const monomerRe = /\[(\w+)\]|(\w)|(-)/g;
|
|
29
|
+
/** Split sequence for single character monomers, square brackets multichar monomer names or gap symbol.
|
|
30
|
+
* @param {any} seq object with sequence
|
|
31
|
+
* @return {string[]} array of monomers
|
|
32
|
+
*/
|
|
33
|
+
export function splitterAsFasta(seq) {
|
|
34
|
+
return wu(seq.toString().matchAll(monomerRe))
|
|
35
|
+
.map((ma) => {
|
|
36
|
+
let mRes;
|
|
37
|
+
const m = ma[0];
|
|
38
|
+
if (m.length > 1) {
|
|
39
|
+
mRes = ma[1];
|
|
40
|
+
}
|
|
41
|
+
else {
|
|
42
|
+
mRes = m;
|
|
43
|
+
}
|
|
44
|
+
return mRes;
|
|
45
|
+
}).toArray();
|
|
46
|
+
}
|
|
47
|
+
/** Gets method to split sequence by separator
|
|
48
|
+
* @param {string} separator
|
|
49
|
+
* @param limit
|
|
50
|
+
* @return {SplitterFunc}
|
|
51
|
+
*/
|
|
52
|
+
export function getSplitterWithSeparator(separator, limit = undefined) {
|
|
53
|
+
return (seq) => {
|
|
54
|
+
return seq.split(separator, limit);
|
|
55
|
+
};
|
|
56
|
+
}
|
|
57
|
+
const helmRe = /(PEPTIDE1|DNA1|RNA1)\{([^}]+)}/g;
|
|
58
|
+
const helmPp1Re = /\[([^\[\]]+)]/g;
|
|
59
|
+
/** Splits Helm string to monomers, but does not replace monomer names to other notation (e.g. for RNA).
|
|
60
|
+
* Only for linear polymers, does not split RNA for ribose and phosphate monomers.
|
|
61
|
+
* @param {string} seq Source string of HELM notation
|
|
62
|
+
* @return {string[]}
|
|
63
|
+
*/
|
|
64
|
+
export function splitterAsHelm(seq) {
|
|
65
|
+
helmRe.lastIndex = 0;
|
|
66
|
+
const ea = helmRe.exec(seq.toString());
|
|
67
|
+
const inSeq = ea ? ea[2] : null;
|
|
68
|
+
const mmPostProcess = (mm) => {
|
|
69
|
+
helmPp1Re.lastIndex = 0;
|
|
70
|
+
const pp1M = helmPp1Re.exec(mm);
|
|
71
|
+
if (pp1M && pp1M.length >= 2)
|
|
72
|
+
return pp1M[1];
|
|
73
|
+
else
|
|
74
|
+
return mm;
|
|
75
|
+
};
|
|
76
|
+
const mmList = inSeq ? inSeq.split('.') : [];
|
|
77
|
+
return mmList.map(mmPostProcess);
|
|
78
|
+
}
|
|
79
|
+
/** Get splitter method to split sequences to monomers
|
|
80
|
+
* @param {string} units
|
|
81
|
+
* @param {string} separator
|
|
82
|
+
* @param limit
|
|
83
|
+
* @return {SplitterFunc}
|
|
84
|
+
*/
|
|
85
|
+
export function getSplitter(units, separator, limit = undefined) {
|
|
86
|
+
if (units.toLowerCase().startsWith("fasta" /* NOTATION.FASTA */))
|
|
87
|
+
return splitterAsFasta;
|
|
88
|
+
else if (units.toLowerCase().startsWith("separator" /* NOTATION.SEPARATOR */))
|
|
89
|
+
return getSplitterWithSeparator(separator, limit);
|
|
90
|
+
else if (units.toLowerCase().startsWith("helm" /* NOTATION.HELM */))
|
|
91
|
+
return splitterAsHelm;
|
|
92
|
+
else
|
|
93
|
+
throw new Error(`Unexpected units ${units} .`);
|
|
94
|
+
// TODO: Splitter for HELM
|
|
95
|
+
}
|
|
96
|
+
/** Generate splitter function for sequence column
|
|
97
|
+
* @param {DG.Column} col
|
|
98
|
+
* @return {SplitterFunc} Splitter function
|
|
99
|
+
*/
|
|
100
|
+
export function getSplitterForColumn(col) {
|
|
101
|
+
if (col.semType !== DG.SEMTYPE.MACROMOLECULE)
|
|
102
|
+
throw new Error(`Get splitter for semType "${DG.SEMTYPE.MACROMOLECULE}" only.`);
|
|
103
|
+
const units = col.getTag(DG.TAGS.UNITS);
|
|
104
|
+
const separator = col.getTag("separator" /* TAGS.separator */);
|
|
105
|
+
return getSplitter(units, separator);
|
|
106
|
+
}
|
|
107
|
+
const longMonomerPartRe = /(\w+)/g;
|
|
108
|
+
/** Convert long monomer names to short ones */
|
|
109
|
+
export function monomerToShort(amino, maxLengthOfMonomer) {
|
|
110
|
+
var _a, _b;
|
|
111
|
+
const shortAminoMatch = amino.match(longMonomerPartRe);
|
|
112
|
+
const needAddDots = amino.length > maxLengthOfMonomer || ((_a = shortAminoMatch === null || shortAminoMatch === void 0 ? void 0 : shortAminoMatch.length) !== null && _a !== void 0 ? _a : 0) > 1;
|
|
113
|
+
const shortAmino = (_b = shortAminoMatch === null || shortAminoMatch === void 0 ? void 0 : shortAminoMatch[0]) !== null && _b !== void 0 ? _b : ' ';
|
|
114
|
+
return !needAddDots ? shortAmino : shortAmino.substring(0, maxLengthOfMonomer) + '…';
|
|
115
|
+
}
|
|
116
|
+
/** Calculate similarity in current sequence and alphabet.
|
|
117
|
+
* @param {MonomerFreqs} freq
|
|
118
|
+
* @param {Set<string>} alphabet
|
|
119
|
+
* @param {string} gapSymbol
|
|
120
|
+
* @return {number} Cosine similarity
|
|
121
|
+
*/
|
|
122
|
+
export function getAlphabetSimilarity(freq, alphabet, gapSymbol = '-') {
|
|
123
|
+
const keys = new Set([...new Set(Object.keys(freq)), ...alphabet]);
|
|
124
|
+
keys.delete(gapSymbol);
|
|
125
|
+
const freqA = [];
|
|
126
|
+
const alphabetA = [];
|
|
127
|
+
for (const m of keys) {
|
|
128
|
+
freqA.push(m in freq ? freq[m] : 0);
|
|
129
|
+
alphabetA.push(alphabet.has(m) ? 1 : 0);
|
|
130
|
+
}
|
|
131
|
+
/* There were a few ideas: chi-squared, pearson correlation (variance?), scalar product */
|
|
132
|
+
const freqV = new Vector(freqA);
|
|
133
|
+
const alphabetV = new Vector(alphabetA);
|
|
134
|
+
return vectorDotProduct(freqV, alphabetV) / (vectorLength(freqV) * vectorLength(alphabetV));
|
|
135
|
+
}
|
|
136
|
+
export function detectAlphabet(stats) {
|
|
137
|
+
const alphabetCandidates = [
|
|
138
|
+
["PT" /* ALPHABET.PT */, UnitsHandler.PeptideFastaAlphabet],
|
|
139
|
+
["DNA" /* ALPHABET.DNA */, UnitsHandler.DnaFastaAlphabet],
|
|
140
|
+
["RNA" /* ALPHABET.RNA */, UnitsHandler.RnaFastaAlphabet],
|
|
141
|
+
];
|
|
142
|
+
// Calculate likelihoods for alphabet_candidates
|
|
143
|
+
const alphabetCandidatesSim = alphabetCandidates.map((c) => getAlphabetSimilarity(stats.freq, c[1]));
|
|
144
|
+
const maxCos = Math.max(...alphabetCandidatesSim);
|
|
145
|
+
const alphabet = maxCos > 0.65 ? alphabetCandidates[alphabetCandidatesSim.indexOf(maxCos)][0] : 'UN';
|
|
146
|
+
return alphabet;
|
|
147
|
+
}
|
|
148
|
+
/** Selects a suitable palette based on column data
|
|
149
|
+
* @param {DG.Column} seqCol Column to look for a palette
|
|
150
|
+
* @param {number} minLength minimum length of sequence to detect palette (empty strings are allowed)
|
|
151
|
+
* @return {SeqPalette} Palette corresponding to the alphabet of the sequences in the column
|
|
152
|
+
*/
|
|
153
|
+
export function pickUpPalette(seqCol, minLength = 5) {
|
|
154
|
+
let alphabet;
|
|
155
|
+
if (seqCol.semType == DG.SEMTYPE.MACROMOLECULE) {
|
|
156
|
+
const uh = new UnitsHandler(seqCol);
|
|
157
|
+
alphabet = uh.alphabet;
|
|
158
|
+
}
|
|
159
|
+
else {
|
|
160
|
+
const stats = getStats(seqCol, minLength, splitterAsFasta);
|
|
161
|
+
alphabet = detectAlphabet(stats);
|
|
162
|
+
}
|
|
163
|
+
const res = getPaletteByType(alphabet);
|
|
164
|
+
return res;
|
|
165
|
+
}
|
|
166
|
+
export function getPaletteByType(paletteType) {
|
|
167
|
+
switch (paletteType) {
|
|
168
|
+
case 'PT':
|
|
169
|
+
return bio.AminoacidsPalettes.GrokGroups;
|
|
170
|
+
case 'NT':
|
|
171
|
+
case 'DNA':
|
|
172
|
+
case 'RNA':
|
|
173
|
+
return bio.NucleotidesPalettes.Chromatogram;
|
|
174
|
+
// other
|
|
175
|
+
default:
|
|
176
|
+
return bio.UnknownSeqPalettes.Color;
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
export function pickUpSeqCol(df) {
|
|
180
|
+
var _a;
|
|
181
|
+
const semTypeColList = df.columns.bySemTypeAll(DG.SEMTYPE.MACROMOLECULE);
|
|
182
|
+
let resCol = (_a = semTypeColList.find((col) => {
|
|
183
|
+
const units = col.getTag(DG.TAGS.UNITS);
|
|
184
|
+
return units ? units.indexOf('MSA') !== -1 : false;
|
|
185
|
+
})) !== null && _a !== void 0 ? _a : null;
|
|
186
|
+
if (!resCol && semTypeColList.length > 0)
|
|
187
|
+
resCol = semTypeColList[0];
|
|
188
|
+
return resCol;
|
|
189
|
+
}
|
|
190
|
+
//# sourceMappingURL=data:application/json;base64,{"version":3,"file":"macromolecule.js","sourceRoot":"","sources":["macromolecule.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,MAAM,iBAAiB,CAAC;AAEtC,OAAO,EAAC,MAAM,EAAC,MAAM,iDAAiD,CAAC;AACvE,OAAO,EAAC,YAAY,EAAE,gBAAgB,EAAC,MAAM,iDAAiD,CAAC;AAY/F,OAAO,EAAE,MAAM,IAAI,CAAC;AACpB,OAAO,EAAC,YAAY,EAAC,MAAM,wBAAwB,CAAC;AACpD,OAAO,KAAK,GAAG,MAAM,aAAa,CAAC;AAsBlC,CAAC;AAMF,MAAM,UAAU,QAAQ,CAAC,MAAiB,EAAE,SAAiB,EAAE,QAAsB;IACnF,MAAM,IAAI,GAA4B,EAAE,CAAC;IACzC,IAAI,UAAU,GAAG,IAAI,CAAC;IACtB,IAAI,WAAW,GAAG,IAAI,CAAC;IAEvB,KAAK,MAAM,GAAG,IAAI,MAAM,CAAC,UAAU,EAAE;QACnC,MAAM,IAAI,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC;QAE3B,IAAI,WAAW,IAAI,IAAI;YACrB,WAAW,GAAG,IAAI,CAAC,MAAM,CAAC;aACvB,IAAI,IAAI,CAAC,MAAM,KAAK,WAAW;YAClC,UAAU,GAAG,KAAK,CAAC;QAErB,IAAI,IAAI,CAAC,MAAM,GAAG,SAAS,EAAE;YAC3B,KAAK,MAAM,CAAC,IAAI,IAAI,EAAE;gBACpB,IAAI,CAAC,CAAC,CAAC,IAAI,IAAI,CAAC;oBACd,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;gBACd,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;aACd;SACF;KACF;IACD,OAAO,EAAC,IAAI,EAAE,IAAI,EAAE,UAAU,EAAE,UAAU,EAAC,CAAC;AAC9C,CAAC;AAED,MAAM,CAAC,MAAM,SAAS,GAAW,qBAAqB,CAAC;AAEvD;;;GAGG;AACH,MAAM,UAAU,eAAe,CAAC,GAAQ;IACtC,OAAO,EAAE,CAAmB,GAAG,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC;SAC5D,GAAG,CAAC,CAAC,EAAoB,EAAE,EAAE;QAC5B,IAAI,IAAY,CAAC;QACjB,MAAM,CAAC,GAAW,EAAE,CAAC,CAAC,CAAC,CAAC;QACxB,IAAI,CAAC,CAAC,MAAM,GAAG,CAAC,EAAE;YAChB,IAAI,GAAG,EAAE,CAAC,CAAC,CAAC,CAAC;SACd;aAAM;YACL,IAAI,GAAG,CAAC,CAAC;SACV;QACD,OAAO,IAAI,CAAC;IACd,CAAC,CAAC,CAAC,OAAO,EAAE,CAAC;AACjB,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,wBAAwB,CAAC,SAAiB,EAAE,QAA4B,SAAS;IAC/F,OAAO,CAAC,GAAW,EAAE,EAAE;QACrB,OAAO,GAAG,CAAC,KAAK,CAAC,SAAS,EAAE,KAAK,CAAC,CAAC;IACrC,CAAC,CAAC;AACJ,CAAC;AAED,MAAM,MAAM,GAAW,iCAAiC,CAAC;AACzD,MAAM,SAAS,GAAW,gBAAgB,CAAC;AAG3C;;;;GAIG;AACH,MAAM,UAAU,cAAc,CAAC,GAAQ;IACrC,MAAM,CAAC,SAAS,GAAG,CAAC,CAAC;IACrB,MAAM,EAAE,GAA2B,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,QAAQ,EAAE,CAAC,CAAC;IAC/D,MAAM,KAAK,GAAkB,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;IAE/C,MAAM,aAAa,GAAG,CAAC,EAAU,EAAU,EAAE;QAC3C,SAAS,CAAC,SAAS,GAAG,CAAC,CAAC;QACxB,MAAM,IAAI,GAAG,SAAS,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QAChC,IAAI,IAAI,IAAI,IAAI,CAAC,MAAM,IAAI,CAAC;YAC1B,OAAO,IAAI,CAAC,CAAC,CAAC,CAAC;;YAEf,OAAO,EAAE,CAAC;IACd,CAAC,CAAC;IAEF,MAAM,MAAM,GAAa,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;IACvD,OAAO,MAAM,CAAC,GAAG,CAAC,aAAa,CAAC,CAAC;AACnC,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,WAAW,CAAC,KAAa,EAAE,SAAiB,EAAE,QAA4B,SAAS;IACjG,IAAI,KAAK,CAAC,WAAW,EAAE,CAAC,UAAU,8BAAgB;QAChD,OAAO,eAAe,CAAC;SACpB,IAAI,KAAK,CAAC,WAAW,EAAE,CAAC,UAAU,sCAAoB;QACzD,OAAO,wBAAwB,CAAC,SAAS,EAAE,KAAK,CAAC,CAAC;SAC/C,IAAI,KAAK,CAAC,WAAW,EAAE,CAAC,UAAU,4BAAe;QACpD,OAAO,cAAc,CAAC;;QAEtB,MAAM,IAAI,KAAK,CAAC,oBAAoB,KAAK,IAAI,CAAC,CAAC;IAEjD,0BAA0B;AAC5B,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,oBAAoB,CAAC,GAAc;IACjD,IAAI,GAAG,CAAC,OAAO,KAAK,EAAE,CAAC,OAAO,CAAC,aAAa;QAC1C,MAAM,IAAI,KAAK,CAAC,6BAA6B,EAAE,CAAC,OAAO,CAAC,aAAa,SAAS,CAAC,CAAC;IAElF,MAAM,KAAK,GAAG,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IACxC,MAAM,SAAS,GAAG,GAAG,CAAC,MAAM,kCAAgB,CAAC;IAC7C,OAAO,WAAW,CAAC,KAAK,EAAE,SAAS,CAAC,CAAC;AACvC,CAAC;AAED,MAAM,iBAAiB,GAAW,QAAQ,CAAC;AAE3C,+CAA+C;AAC/C,MAAM,UAAU,cAAc,CAAC,KAAa,EAAE,kBAA0B;;IACtE,MAAM,eAAe,GAA4B,KAAK,CAAC,KAAK,CAAC,iBAAiB,CAAC,CAAC;IAChF,MAAM,WAAW,GAAY,KAAK,CAAC,MAAM,GAAG,kBAAkB,IAAI,CAAC,MAAA,eAAe,aAAf,eAAe,uBAAf,eAAe,CAAE,MAAM,mCAAI,CAAC,CAAC,GAAG,CAAC,CAAC;IACrG,MAAM,UAAU,GAAG,MAAA,eAAe,aAAf,eAAe,uBAAf,eAAe,CAAG,CAAC,CAAC,mCAAI,GAAG,CAAC;IAC/C,OAAO,CAAC,WAAW,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,UAAU,CAAC,SAAS,CAAC,CAAC,EAAE,kBAAkB,CAAC,GAAG,GAAG,CAAC;AACvF,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,qBAAqB,CAAC,IAAkB,EAAE,QAAqB,EAAE,YAAoB,GAAG;IACtG,MAAM,IAAI,GAAG,IAAI,GAAG,CAAS,CAAC,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,EAAE,GAAG,QAAQ,CAAC,CAAC,CAAC;IAC3E,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;IAEvB,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,MAAM,SAAS,GAAa,EAAE,CAAC;IAC/B,KAAK,MAAM,CAAC,IAAI,IAAI,EAAE;QACpB,KAAK,CAAC,IAAI,CAAC,CAAC,IAAI,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QACpC,SAAS,CAAC,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;KACzC;IACD,0FAA0F;IAC1F,MAAM,KAAK,GAAW,IAAI,MAAM,CAAC,KAAK,CAAC,CAAC;IACxC,MAAM,SAAS,GAAW,IAAI,MAAM,CAAC,SAAS,CAAC,CAAC;IAChD,OAAO,gBAAgB,CAAC,KAAK,EAAE,SAAS,CAAC,GAAG,CAAC,YAAY,CAAC,KAAK,CAAC,GAAG,YAAY,CAAC,SAAS,CAAC,CAAC,CAAC;AAC9F,CAAC;AAED,MAAM,UAAU,cAAc,CAAC,KAAkB;IAC/C,MAAM,kBAAkB,GAA4B;QAClD,yBAAc,YAAY,CAAC,oBAAoB,CAAC;QAChD,2BAAe,YAAY,CAAC,gBAAgB,CAAC;QAC7C,2BAAe,YAAY,CAAC,gBAAgB,CAAC;KAC9C,CAAC;IAEF,gDAAgD;IAChD,MAAM,qBAAqB,GAAa,kBAAkB,CAAC,GAAG,CAC5D,CAAC,CAAC,EAAE,EAAE,CAAC,qBAAqB,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAClD,MAAM,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,qBAAqB,CAAC,CAAC;IAClD,MAAM,QAAQ,GAAG,MAAM,GAAG,IAAI,CAAC,CAAC,CAAC,kBAAkB,CAAC,qBAAqB,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;IACrG,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,aAAa,CAAC,MAAiB,EAAE,YAAoB,CAAC;IACpE,IAAI,QAAgB,CAAC;IACrB,IAAI,MAAM,CAAC,OAAO,IAAI,EAAE,CAAC,OAAO,CAAC,aAAa,EAAE;QAC9C,MAAM,EAAE,GAAiB,IAAI,YAAY,CAAC,MAAM,CAAC,CAAC;QAClD,QAAQ,GAAG,EAAE,CAAC,QAAQ,CAAC;KACxB;SAAM;QACL,MAAM,KAAK,GAAgB,QAAQ,CAAC,MAAM,EAAE,SAAS,EAAE,eAAe,CAAC,CAAC;QACxE,QAAQ,GAAG,cAAc,CAAC,KAAK,CAAC,CAAC;KAClC;IAED,MAAM,GAAG,GAAG,gBAAgB,CAAC,QAAQ,CAAC,CAAC;IACvC,OAAO,GAAG,CAAC;AACb,CAAC;AAED,MAAM,UAAU,gBAAgB,CAAC,WAAmB;IAClD,QAAQ,WAAW,EAAE;QACrB,KAAK,IAAI;YACP,OAAO,GAAG,CAAC,kBAAkB,CAAC,UAAU,CAAC;QAC3C,KAAK,IAAI,CAAC;QACV,KAAK,KAAK,CAAC;QACX,KAAK,KAAK;YACR,OAAO,GAAG,CAAC,mBAAmB,CAAC,YAAY,CAAC;QAC5C,QAAQ;QACV;YACE,OAAO,GAAG,CAAC,kBAAkB,CAAC,KAAK,CAAC;KACrC;AACH,CAAC;AAED,MAAM,UAAU,YAAY,CAAC,EAAgB;;IAC3C,MAAM,cAAc,GAAG,EAAE,CAAC,OAAO,CAAC,YAAY,CAAC,EAAE,CAAC,OAAO,CAAC,aAAa,CAAC,CAAC;IACzE,IAAI,MAAM,GAAqB,MAAA,cAAc,CAAC,IAAI,CAAC,CAAC,GAAG,EAAE,EAAE;QACzD,MAAM,KAAK,GAAG,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QACxC,OAAO,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC;IACrD,CAAC,CAAC,mCAAI,IAAI,CAAC;IACX,IAAI,CAAC,MAAM,IAAI,cAAc,CAAC,MAAM,GAAG,CAAC;QACtC,MAAM,GAAG,cAAc,CAAC,CAAC,CAAC,CAAC;IAC7B,OAAO,MAAM,CAAC;AAChB,CAAC","sourcesContent":["import * as grok from 'datagrok-api/grok';\nimport * as ui from 'datagrok-api/ui';\nimport * as DG from 'datagrok-api/dg';\n\nimport {Vector} from '@datagrok-libraries/utils/src/type-declarations';\nimport {vectorLength, vectorDotProduct} from '@datagrok-libraries/utils/src/vector-operations';\n\n/** Stats of sequences with specified splitter func, returns { freq, sameLength }.\n * @param {DG.Column} seqCol\n * @param {number} minLength\n * @param {SplitterFunc} splitter\n * @return { SeqColStats }, sameLength: boolean } stats of column sequences\n */\nimport {SeqPalette} from '../seq-palettes';\nimport {Aminoacids, AminoacidsPalettes} from '../aminoacids';\nimport {Nucleotides, NucleotidesPalettes} from '../nucleotides';\nimport {UnknownSeqPalettes} from '../unknown';\nimport wu from 'wu';\nimport {UnitsHandler} from '../utils/units-handler';\nimport * as bio from '../../index';\n\n/** enum type to simplify setting \"user-friendly\" notation if necessary */\nexport const enum NOTATION {\n  FASTA = 'fasta',\n  SEPARATOR = 'separator',\n  HELM = 'helm',\n}\n\nexport const enum ALPHABET {\n  DNA = 'DNA',\n  RNA = 'RNA',\n  PT = 'PT',\n  UN = 'UN',\n}\n\nexport const enum TAGS {\n  aligned = 'aligned',\n  alphabet = 'alphabet',\n  alphabetSize = '.alphabetSize',\n  alphabetIsMultichar = '.alphabetIsMultichar',\n  separator = 'separator',\n};\n\nexport type SeqColStats = { freq: MonomerFreqs, sameLength: boolean }\nexport type SplitterFunc = (seq: string) => string[];\nexport type MonomerFreqs = { [m: string]: number };\n\nexport function getStats(seqCol: DG.Column, minLength: number, splitter: SplitterFunc): SeqColStats {\n  const freq: { [m: string]: number } = {};\n  let sameLength = true;\n  let firstLength = null;\n\n  for (const seq of seqCol.categories) {\n    const mSeq = splitter(seq);\n\n    if (firstLength == null)\n      firstLength = mSeq.length;\n    else if (mSeq.length !== firstLength)\n      sameLength = false;\n\n    if (mSeq.length > minLength) {\n      for (const m of mSeq) {\n        if (!(m in freq))\n          freq[m] = 0;\n        freq[m] += 1;\n      }\n    }\n  }\n  return {freq: freq, sameLength: sameLength};\n}\n\nexport const monomerRe: RegExp = /\\[(\\w+)\\]|(\\w)|(-)/g;\n\n/** Split sequence for single character monomers, square brackets multichar monomer names or gap symbol.\n * @param {any} seq object with sequence\n * @return {string[]} array of monomers\n */\nexport function splitterAsFasta(seq: any): string[] {\n  return wu<RegExpMatchArray>(seq.toString().matchAll(monomerRe))\n    .map((ma: RegExpMatchArray) => {\n      let mRes: string;\n      const m: string = ma[0];\n      if (m.length > 1) {\n        mRes = ma[1];\n      } else {\n        mRes = m;\n      }\n      return mRes;\n    }).toArray();\n}\n\n/** Gets method to split sequence by separator\n * @param {string} separator\n * @param limit\n * @return {SplitterFunc}\n */\nexport function getSplitterWithSeparator(separator: string, limit: number | undefined = undefined): SplitterFunc {\n  return (seq: string) => {\n    return seq.split(separator, limit);\n  };\n}\n\nconst helmRe: RegExp = /(PEPTIDE1|DNA1|RNA1)\\{([^}]+)}/g;\nconst helmPp1Re: RegExp = /\\[([^\\[\\]]+)]/g;\n\n\n/** Splits Helm string to monomers, but does not replace monomer names to other notation (e.g. for RNA).\n * Only for linear polymers, does not split RNA for ribose and phosphate monomers.\n * @param {string} seq Source string of HELM notation\n * @return {string[]}\n */\nexport function splitterAsHelm(seq: any): string[] {\n  helmRe.lastIndex = 0;\n  const ea: RegExpExecArray | null = helmRe.exec(seq.toString());\n  const inSeq: string | null = ea ? ea[2] : null;\n\n  const mmPostProcess = (mm: string): string => {\n    helmPp1Re.lastIndex = 0;\n    const pp1M = helmPp1Re.exec(mm);\n    if (pp1M && pp1M.length >= 2)\n      return pp1M[1];\n    else\n      return mm;\n  };\n\n  const mmList: string[] = inSeq ? inSeq.split('.') : [];\n  return mmList.map(mmPostProcess);\n}\n\n/** Get splitter method to split sequences to monomers\n * @param {string} units\n * @param {string} separator\n * @param limit\n * @return {SplitterFunc}\n */\nexport function getSplitter(units: string, separator: string, limit: number | undefined = undefined): SplitterFunc {\n  if (units.toLowerCase().startsWith(NOTATION.FASTA))\n    return splitterAsFasta;\n  else if (units.toLowerCase().startsWith(NOTATION.SEPARATOR))\n    return getSplitterWithSeparator(separator, limit);\n  else if (units.toLowerCase().startsWith(NOTATION.HELM))\n    return splitterAsHelm;\n  else\n    throw new Error(`Unexpected units ${units} .`);\n\n  // TODO: Splitter for HELM\n}\n\n/** Generate splitter function for sequence column\n * @param {DG.Column} col\n * @return {SplitterFunc} Splitter function\n */\nexport function getSplitterForColumn(col: DG.Column): SplitterFunc {\n  if (col.semType !== DG.SEMTYPE.MACROMOLECULE)\n    throw new Error(`Get splitter for semType \"${DG.SEMTYPE.MACROMOLECULE}\" only.`);\n\n  const units = col.getTag(DG.TAGS.UNITS);\n  const separator = col.getTag(TAGS.separator);\n  return getSplitter(units, separator);\n}\n\nconst longMonomerPartRe: RegExp = /(\\w+)/g;\n\n/** Convert long monomer names to short ones */\nexport function monomerToShort(amino: string, maxLengthOfMonomer: number): string {\n  const shortAminoMatch: RegExpMatchArray | null = amino.match(longMonomerPartRe);\n  const needAddDots: boolean = amino.length > maxLengthOfMonomer || (shortAminoMatch?.length ?? 0) > 1;\n  const shortAmino = shortAminoMatch?.[0] ?? ' ';\n  return !needAddDots ? shortAmino : shortAmino.substring(0, maxLengthOfMonomer) + '…';\n}\n\n/** Calculate similarity in current sequence and alphabet.\n * @param {MonomerFreqs} freq\n * @param {Set<string>} alphabet\n * @param {string} gapSymbol\n * @return {number} Cosine similarity\n */\nexport function getAlphabetSimilarity(freq: MonomerFreqs, alphabet: Set<string>, gapSymbol: string = '-'): number {\n  const keys = new Set<string>([...new Set(Object.keys(freq)), ...alphabet]);\n  keys.delete(gapSymbol);\n\n  const freqA: number[] = [];\n  const alphabetA: number[] = [];\n  for (const m of keys) {\n    freqA.push(m in freq ? freq[m] : 0);\n    alphabetA.push(alphabet.has(m) ? 1 : 0);\n  }\n  /* There were a few ideas: chi-squared, pearson correlation (variance?), scalar product */\n  const freqV: Vector = new Vector(freqA);\n  const alphabetV: Vector = new Vector(alphabetA);\n  return vectorDotProduct(freqV, alphabetV) / (vectorLength(freqV) * vectorLength(alphabetV));\n}\n\nexport function detectAlphabet(stats: SeqColStats): string {\n  const alphabetCandidates: [string, Set<string>][] = [\n    [ALPHABET.PT, UnitsHandler.PeptideFastaAlphabet],\n    [ALPHABET.DNA, UnitsHandler.DnaFastaAlphabet],\n    [ALPHABET.RNA, UnitsHandler.RnaFastaAlphabet],\n  ];\n\n  // Calculate likelihoods for alphabet_candidates\n  const alphabetCandidatesSim: number[] = alphabetCandidates.map(\n    (c) => getAlphabetSimilarity(stats.freq, c[1]));\n  const maxCos = Math.max(...alphabetCandidatesSim);\n  const alphabet = maxCos > 0.65 ? alphabetCandidates[alphabetCandidatesSim.indexOf(maxCos)][0] : 'UN';\n  return alphabet;\n}\n\n/** Selects a suitable palette based on column data\n * @param {DG.Column} seqCol Column to look for a palette\n * @param {number}  minLength minimum length of sequence to detect palette (empty strings are allowed)\n * @return {SeqPalette} Palette corresponding to the alphabet of the sequences in the column\n */\nexport function pickUpPalette(seqCol: DG.Column, minLength: number = 5): SeqPalette {\n  let alphabet: string;\n  if (seqCol.semType == DG.SEMTYPE.MACROMOLECULE) {\n    const uh: UnitsHandler = new UnitsHandler(seqCol);\n    alphabet = uh.alphabet;\n  } else {\n    const stats: SeqColStats = getStats(seqCol, minLength, splitterAsFasta);\n    alphabet = detectAlphabet(stats);\n  }\n\n  const res = getPaletteByType(alphabet);\n  return res;\n}\n\nexport function getPaletteByType(paletteType: string): bio.SeqPalette {\n  switch (paletteType) {\n  case 'PT':\n    return bio.AminoacidsPalettes.GrokGroups;\n  case 'NT':\n  case 'DNA':\n  case 'RNA':\n    return bio.NucleotidesPalettes.Chromatogram;\n    // other\n  default:\n    return bio.UnknownSeqPalettes.Color;\n  }\n}\n\nexport function pickUpSeqCol(df: DG.DataFrame): DG.Column | null {\n  const semTypeColList = df.columns.bySemTypeAll(DG.SEMTYPE.MACROMOLECULE);\n  let resCol: DG.Column | null = semTypeColList.find((col) => {\n    const units = col.getTag(DG.TAGS.UNITS);\n    return units ? units.indexOf('MSA') !== -1 : false;\n  }) ?? null;\n  if (!resCol && semTypeColList.length > 0)\n    resCol = semTypeColList[0];\n  return resCol;\n}\n"]}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import * as DG from 'datagrok-api/dg';
|
|
2
|
-
import {
|
|
3
|
-
import {
|
|
2
|
+
import { UnitsHandler } from './units-handler';
|
|
3
|
+
import { NOTATION, SplitterFunc } from './macromolecule';
|
|
4
4
|
/** Class for handling conversion of notation systems in Macromolecule columns */
|
|
5
5
|
export declare class NotationConverter extends UnitsHandler {
|
|
6
6
|
private _splitter;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"notation-converter.d.ts","sourceRoot":"","sources":["notation-converter.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,MAAM,iBAAiB,CAAC;
|
|
1
|
+
{"version":3,"file":"notation-converter.d.ts","sourceRoot":"","sources":["notation-converter.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,MAAM,iBAAiB,CAAC;AAEtC,OAAO,EAAC,YAAY,EAAC,MAAM,iBAAiB,CAAC;AAC7C,OAAO,EAAuB,QAAQ,EAAE,YAAY,EAAO,MAAM,iBAAiB,CAAC;AAEnF,iFAAiF;AACjF,qBAAa,iBAAkB,SAAQ,YAAY;IACjD,OAAO,CAAC,SAAS,CAA6B;IAC9C,SAAS,KAAK,QAAQ,IAAI,YAAY,CAIrC;IAEM,OAAO,CAAC,cAAc,EAAE,QAAQ,GAAG,OAAO;IAE1C,WAAW,CAAC,cAAc,EAAE,QAAQ,GAAG,OAAO;IAE9C,MAAM,CAAC,cAAc,EAAE,QAAQ,GAAG,OAAO;IAEhD;;;;;;OAMG;IACH,OAAO,CAAC,uBAAuB;IAmB/B;;;;;OAKG;IACH,OAAO,CAAC,eAAe;IAiBvB,OAAO,CAAC,mBAAmB;IAkB3B;;;;;;;OAOG;IACI,mBAAmB,CACxB,aAAa,EAAE,MAAM,EACrB,eAAe,GAAE,MAAM,GAAG,IAAW,GACpC,MAAM;IAOT;;;;;OAKG;IACH,OAAO,CAAC,aAAa;IAerB;;;;;OAKG;IACH,OAAO,CAAC,uBAAuB;IA4B/B;;;;;;;;OAQG;IACH,OAAO,CAAC,WAAW;IAiDnB,OAAO,CAAC,sBAAsB;IAK9B;;;;;OAKG;IACI,OAAO,CAAC,WAAW,EAAE,QAAQ,EAAE,YAAY,GAAE,MAAM,GAAG,IAAW,GAAG,EAAE,CAAC,MAAM;gBAmBjE,GAAG,EAAE,EAAE,CAAC,MAAM;CAGlC"}
|