@datagrok/bio 2.1.12 → 2.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. package/.eslintrc.json +1 -1
  2. package/README.md +11 -12
  3. package/css/helm.css +10 -0
  4. package/detectors.js +97 -69
  5. package/dist/package-test.js +2 -13168
  6. package/dist/package-test.js.map +1 -0
  7. package/dist/package.js +2 -10560
  8. package/dist/package.js.map +1 -0
  9. package/dockerfiles/Dockerfile +86 -0
  10. package/files/icons/composition-analysis.svg +17 -0
  11. package/files/icons/sequence-diversity-viewer.svg +4 -0
  12. package/files/icons/sequence-similarity-viewer.svg +4 -0
  13. package/files/icons/vdregions-viewer.svg +22 -0
  14. package/files/icons/weblogo-viewer.svg +7 -0
  15. package/files/tests/testUrl.csv +11 -0
  16. package/files/tests/toAtomicLevelTest.csv +4 -0
  17. package/package.json +24 -25
  18. package/src/analysis/sequence-activity-cliffs.ts +11 -9
  19. package/src/analysis/sequence-search-base-viewer.ts +2 -1
  20. package/src/analysis/sequence-similarity-viewer.ts +3 -3
  21. package/src/analysis/sequence-space.ts +2 -1
  22. package/src/calculations/monomerLevelMols.ts +4 -4
  23. package/src/package-test.ts +10 -2
  24. package/src/package.ts +215 -131
  25. package/src/substructure-search/substructure-search.ts +19 -16
  26. package/src/tests/Palettes-test.ts +1 -1
  27. package/src/tests/WebLogo-positions-test.ts +113 -57
  28. package/src/tests/_first-tests.ts +9 -0
  29. package/src/tests/activity-cliffs-tests.ts +8 -7
  30. package/src/tests/activity-cliffs-utils.ts +17 -9
  31. package/src/tests/bio-tests.ts +4 -5
  32. package/src/tests/checkInputColumn-tests.ts +1 -1
  33. package/src/tests/converters-test.ts +52 -17
  34. package/src/tests/detectors-benchmark-tests.ts +3 -2
  35. package/src/tests/detectors-tests.ts +177 -172
  36. package/src/tests/detectors-weak-and-likely-tests.ts +129 -0
  37. package/src/tests/fasta-export-tests.ts +1 -1
  38. package/src/tests/monomer-libraries-tests.ts +34 -0
  39. package/src/tests/pepsea-tests.ts +21 -0
  40. package/src/tests/renderers-test.ts +21 -19
  41. package/src/tests/sequence-space-test.ts +6 -4
  42. package/src/tests/similarity-diversity-tests.ts +4 -4
  43. package/src/tests/splitters-test.ts +4 -5
  44. package/src/tests/substructure-filters-tests.ts +23 -1
  45. package/src/tests/utils/sequences-generators.ts +1 -1
  46. package/src/tests/utils.ts +2 -1
  47. package/src/tests/viewers.ts +16 -0
  48. package/src/utils/cell-renderer.ts +88 -35
  49. package/src/utils/constants.ts +7 -6
  50. package/src/utils/convert.ts +8 -2
  51. package/src/utils/monomer-lib.ts +174 -0
  52. package/src/utils/multiple-sequence-alignment.ts +44 -20
  53. package/src/utils/pepsea.ts +78 -0
  54. package/src/utils/save-as-fasta.ts +2 -1
  55. package/src/utils/ui-utils.ts +15 -3
  56. package/src/viewers/vd-regions-viewer.ts +113 -72
  57. package/src/viewers/web-logo-viewer.ts +1031 -0
  58. package/src/widgets/bio-substructure-filter.ts +38 -24
  59. package/tsconfig.json +71 -72
  60. package/webpack.config.js +4 -11
  61. package/dist/vendors-node_modules_datagrok-libraries_ml_src_workers_dimensionality-reducer_js.js +0 -9039
@@ -0,0 +1,174 @@
1
+ // import * as ui from 'datagrok-api/ui';
2
+ import * as DG from 'datagrok-api/dg';
3
+ import * as grok from 'datagrok-api/grok';
4
+
5
+ import {Observable, Subject} from 'rxjs';
6
+ import {IMonomerLib, Monomer} from '@datagrok-libraries/bio/src/types/index';
7
+ import {
8
+ createJsonMonomerLibFromSdf,
9
+ expectedMonomerData,
10
+ IMonomerLibHelper
11
+ } from '@datagrok-libraries/bio/src/monomer-works/monomer-utils';
12
+
13
+ // -- Monomer libraries --
14
+ export const LIB_STORAGE_NAME = 'Libraries';
15
+ export const LIB_PATH = 'System:AppData/Bio/libraries/';
16
+ export const LIB_DEFAULT: { [fileName: string]: string } = {'HELMCoreLibrary.json': 'HELMCoreLibrary.json'};
17
+
18
+ export class MonomerLib implements IMonomerLib {
19
+ private _monomers: { [type: string]: { [name: string]: Monomer } } = {};
20
+ private _onChanged = new Subject<any>();
21
+
22
+ constructor(monomers: { [type: string]: { [name: string]: Monomer } }) {
23
+ this._monomers = monomers;
24
+ }
25
+
26
+ getMonomer(monomerType: string, monomerName: string): Monomer | null {
27
+ if (monomerType in this._monomers! && monomerName in this._monomers![monomerType])
28
+ return this._monomers![monomerType][monomerName];
29
+ else
30
+ return null;
31
+ }
32
+
33
+ getTypes(): string[] {
34
+ return Object.keys(this._monomers);
35
+ }
36
+
37
+ getMonomerMolsByType(type: string): { [symbol: string]: string } {
38
+ const res: { [symbol: string]: string } = {};
39
+
40
+ Object.keys(this._monomers[type]).forEach((monomerSymbol) => {
41
+ res[monomerSymbol] = this._monomers[type][monomerSymbol].molfile;
42
+ });
43
+
44
+ return res;
45
+ }
46
+
47
+ getMonomerNamesByType(type: string): string[] {
48
+ return Object.keys(this._monomers[type]);
49
+ }
50
+
51
+ get onChanged(): Observable<any> {
52
+ return this._onChanged;
53
+ }
54
+
55
+ private _updateInt(lib: IMonomerLib): void {
56
+ const typesNew = lib.getTypes();
57
+ const types = this.getTypes();
58
+
59
+ typesNew.forEach((type) => {
60
+ //could possibly rewrite -> TODO: check duplicated monomer symbol
61
+
62
+ if (!types.includes(type))
63
+ this._monomers![type] = {};
64
+
65
+ const monomers = lib.getMonomerNamesByType(type);
66
+ monomers.forEach((monomerName) => {
67
+ this._monomers[type][monomerName] = lib.getMonomer(type, monomerName)!;
68
+ });
69
+ });
70
+ }
71
+
72
+ public update(lib: IMonomerLib): void {
73
+ this._updateInt(lib);
74
+ this._onChanged.next();
75
+ }
76
+
77
+ public updateLibs(libList: IMonomerLib[], reload: boolean = false): void {
78
+ if (reload) this._monomers = {};
79
+ for (const lib of libList) this._updateInt(lib);
80
+ this._onChanged.next();
81
+ }
82
+
83
+ public clear(): void {
84
+ this._monomers = {};
85
+ this._onChanged.next();
86
+ }
87
+ }
88
+
89
+ export class MonomerLibHelper implements IMonomerLibHelper {
90
+ private readonly _monomerLib: MonomerLib = new MonomerLib({});
91
+
92
+ /** Protect constructor to prevent multiple instantiation. */
93
+ protected constructor() {}
94
+
95
+ /** Singleton monomer library */
96
+ getBioLib(): IMonomerLib {
97
+ return this._monomerLib;
98
+ }
99
+
100
+ private loadLibrariesPromise: Promise<void> = Promise.resolve();
101
+
102
+ /** Loads libraries based on settings in user storage {@link LIB_STORAGE_NAME}
103
+ * @param {boolean} reload Clean {@link monomerLib} before load libraries [false]
104
+ */
105
+ async loadLibraries(reload: boolean = false): Promise<void> {
106
+ return this.loadLibrariesPromise = this.loadLibrariesPromise.then(async () => {
107
+ const userLibrariesSettings: string[] = Object.keys(await grok.dapi.userDataStorage.get(LIB_STORAGE_NAME, true));
108
+ const libs: IMonomerLib[] = await Promise.all(userLibrariesSettings.map((libFileName) => {
109
+ //TODO handle whether files are in place
110
+ return this.readLibrary(LIB_PATH, libFileName);
111
+ }));
112
+ this._monomerLib.updateLibs(libs, reload);
113
+ });
114
+ }
115
+
116
+ /** Reads library from file shares, handles .json and .sdf */
117
+ async readLibrary(path: string, fileName: string): Promise<IMonomerLib> {
118
+ let data: any[] = [];
119
+ let file;
120
+ let dfSdf;
121
+ const fileSource = new DG.FileSource(path);
122
+ if (fileName.endsWith('.sdf')) {
123
+ const funcList: DG.Func[] = DG.Func.find({package: 'Chem', name: 'importSdf'});
124
+ if (funcList.length === 1) {
125
+ file = await fileSource.readAsBytes(fileName);
126
+ dfSdf = await grok.functions.call('Chem:importSdf', {bytes: file});
127
+ data = createJsonMonomerLibFromSdf(dfSdf[0]);
128
+ } else {
129
+ grok.shell.warning('Chem package is not installed');
130
+ }
131
+ } else {
132
+ const file = await fileSource.readAsText(fileName);
133
+ data = JSON.parse(file);
134
+ }
135
+
136
+ const monomers: { [type: string]: { [name: string]: Monomer } } = {};
137
+ const types: string[] = [];
138
+ //group monomers by their type
139
+ data.forEach((monomer) => {
140
+ const monomerAdd: Monomer = {
141
+ 'symbol': monomer['symbol'],
142
+ 'name': monomer['name'],
143
+ 'naturalAnalog': monomer['naturalAnalog'],
144
+ 'molfile': monomer['molfile'],
145
+ 'rgroups': monomer['rgroups'],
146
+ 'polymerType': monomer['polymerType'],
147
+ 'monomerType': monomer['monomerType'],
148
+ 'data': {}
149
+ };
150
+
151
+ Object.keys(monomer).forEach((prop) => {
152
+ if (!expectedMonomerData.includes(prop))
153
+ monomerAdd.data[prop] = monomer[prop];
154
+ });
155
+
156
+ if (!types.includes(monomer['polymerType'])) {
157
+ monomers[monomer['polymerType']] = {};
158
+ types.push(monomer['polymerType']);
159
+ }
160
+
161
+ monomers[monomer['polymerType']][monomer['symbol']] = monomerAdd;
162
+ });
163
+
164
+ return new MonomerLib(monomers);
165
+ }
166
+
167
+ // -- Instance singleton --
168
+ private static _instance: MonomerLibHelper | null = null;
169
+
170
+ public static get instance(): MonomerLibHelper {
171
+ if (!MonomerLibHelper._instance) MonomerLibHelper._instance = new MonomerLibHelper();
172
+ return MonomerLibHelper._instance;
173
+ }
174
+ }
@@ -1,15 +1,14 @@
1
1
  /* Do not change these import lines to match external modules in webpack configuration */
2
- import * as grok from 'datagrok-api/grok';
3
- import * as ui from 'datagrok-api/ui';
4
2
  import * as DG from 'datagrok-api/dg';
5
3
 
6
4
  import {FastaFileHandler} from '@datagrok-libraries/bio/src/utils/fasta-handler';
7
- import {TAGS as bioTAGS} from '@datagrok-libraries/bio';
8
- //@ts-ignore
5
+ import {TAGS as bioTAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
6
+ //@ts-ignore: there are no types for this library
9
7
  import Aioli from '@biowasm/aioli';
10
8
 
11
9
  import {AlignedSequenceEncoder} from '@datagrok-libraries/bio/src/sequence-encoder';
12
- import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
10
+ const fastaInputFilename = 'input.fa';
11
+ const fastaOutputFilename = 'result.fasta';
13
12
 
14
13
  /**
15
14
  * Converts array of sequences into simple fasta string.
@@ -29,31 +28,56 @@ function _stringsToFasta(sequences: string[]): string {
29
28
  * @param {string} unUsedName
30
29
  * @return {Promise<DG.Column>} Aligned sequences.
31
30
  */
32
- export async function runKalign(srcCol: DG.Column, isAligned = false, unUsedName: string = ''): Promise<DG.Column> {
33
- let sequences = srcCol.toList();
31
+ export async function runKalign(srcCol: DG.Column<string>, isAligned: boolean = false, unUsedName: string = '',
32
+ clustersCol: DG.Column | null = null): Promise<DG.Column> {
33
+ let sequences: string[] = srcCol.toList();
34
34
 
35
35
  if (isAligned)
36
- sequences = sequences.map((v: string, _) => AlignedSequenceEncoder.clean(v).replace(/\-/g, ''));
36
+ sequences = sequences.map((v: string) => AlignedSequenceEncoder.clean(v).replace(/\-/g, ''));
37
+
38
+ const sequencesLength = srcCol.length;
39
+ clustersCol ??= DG.Column.string('Clusters', sequencesLength).init('0');
40
+ if (clustersCol.type != DG.COLUMN_TYPE.STRING)
41
+ clustersCol = clustersCol.convertTo(DG.TYPE.STRING);
42
+ clustersCol.compact();
43
+
44
+ //TODO: use fixed-size inner arrays, but first need to expose the method to get each category count
45
+ const clustersColCategories = clustersCol.categories;
46
+ const clustersColData = clustersCol.getRawData();
47
+ const fastaSequences: string[][] = new Array(clustersColCategories.length);
48
+ const clusterIndexes: number[][] = new Array(clustersColCategories.length);
49
+ for (let rowIdx = 0; rowIdx < sequencesLength; ++rowIdx) {
50
+ const clusterCategoryIdx = clustersColData[rowIdx];
51
+ (fastaSequences[clusterCategoryIdx] ??= []).push(sequences[rowIdx]);
52
+ (clusterIndexes[clusterCategoryIdx] ??= []).push(rowIdx);
53
+ }
37
54
 
38
- const fasta = _stringsToFasta(sequences);
39
55
  const CLI = await new Aioli([
40
56
  'base/1.0.0',
41
57
  {tool: 'kalign', version: '3.3.1', reinit: true}
42
58
  ]);
59
+ const tgtCol = DG.Column.string(unUsedName, sequencesLength);
43
60
 
44
- console.log(['fasta.length =', fasta.length]);
61
+ for (let clusterIdx = 0; clusterIdx < clustersColCategories.length; ++clusterIdx) {
62
+ const clusterSequences = fastaSequences[clusterIdx];
63
+ const fasta = _stringsToFasta(clusterSequences);
64
+
65
+ console.log(['fasta.length =', fasta.length]);
45
66
 
46
- await CLI.fs.writeFile('input.fa', fasta);
47
- const output = await CLI.exec('kalign input.fa -f fasta -o result.fasta');
48
- console.warn(output);
67
+ await CLI.fs.writeFile(fastaInputFilename, fasta);
68
+ const output = await CLI.exec(`kalign ${fastaInputFilename} -f fasta -o ${fastaOutputFilename}`);
69
+ console.warn(output);
49
70
 
50
- const buf = await CLI.cat('result.fasta');
51
- if (!buf)
52
- throw new Error(`kalign output no result`);
71
+ const buf = await CLI.cat(fastaOutputFilename);
72
+ if (!buf)
73
+ throw new Error(`kalign output no result`);
53
74
 
54
- const ffh = new FastaFileHandler(buf);
55
- const aligned = ffh.sequencesArray; // array of sequences extracted from FASTA
56
- const tgtCol = DG.Column.fromStrings(unUsedName, aligned);
75
+ const ffh = new FastaFileHandler(buf);
76
+ const aligned = ffh.sequencesArray; // array of sequences extracted from FASTA
77
+ const clusterRowIds = clusterIndexes[clusterIdx];
78
+ for (let clusterRowIdIdx = 0; clusterRowIdIdx < aligned.length; ++clusterRowIdIdx)
79
+ tgtCol.set(clusterRowIds[clusterRowIdIdx], aligned[clusterRowIdIdx]);
80
+ }
57
81
 
58
82
  // units
59
83
  const srcUnits = srcCol.getTag(DG.TAGS.UNITS);
@@ -70,7 +94,7 @@ export async function runKalign(srcCol: DG.Column, isAligned = false, unUsedName
70
94
  return tgtCol;
71
95
  }
72
96
 
73
- export async function testMSAEnoughMemory(col: DG.Column): Promise<void> {
97
+ export async function testMSAEnoughMemory(col: DG.Column<string>): Promise<void> {
74
98
  const sequencesCount = col.length;
75
99
  const delta = sequencesCount / 100;
76
100
 
@@ -0,0 +1,78 @@
1
+ /* Do not change these import lines to match external modules in webpack configuration */
2
+ import * as grok from 'datagrok-api/grok';
3
+ import * as DG from 'datagrok-api/dg';
4
+ import {NOTATION, TAGS as bioTAGS, ALIGNMENT, ALPHABET} from '@datagrok-libraries/bio/src/utils/macromolecule';
5
+ import * as C from './constants';
6
+
7
+ export const pepseaMethods = ['mafft --auto', 'mafft', 'linsi', 'ginsi', 'einsi', 'fftns', 'fftnsi', 'nwns', 'nwnsi'];
8
+ const alignmentObjectMetaKeys = ['AlignedSeq', 'AlignedSubpeptide', 'HELM', 'ID', 'PolymerID'];
9
+ type PepseaRepsonse = {
10
+ Alignment: {
11
+ PolymerID: string, AlignedSubpeptide: string, HELM: string, ID: string, AlignedSeq: string, [key: string]: string,
12
+ }[],
13
+ AlignmentScore: {[key: string]: number | null},
14
+ };
15
+ type PepseaBodyUnit = {ID: string, HELM: string};
16
+
17
+ export async function runPepsea(srcCol: DG.Column<string>, unUsedName: string,
18
+ method: typeof pepseaMethods[number] = 'ginsi', gapOpen: number = 1.53, gapExtend: number = 0.0,
19
+ clustersCol: DG.Column<string | number> | null = null,
20
+ ): Promise<DG.Column<string>> {
21
+ const peptideCount = srcCol.length;
22
+ clustersCol ??= DG.Column.int('Clusters', peptideCount).init(0);
23
+ if (clustersCol.type != DG.COLUMN_TYPE.STRING)
24
+ clustersCol = clustersCol.convertTo(DG.TYPE.STRING);
25
+
26
+ const clusters = clustersCol.categories;
27
+ const bodies: PepseaBodyUnit[][] = new Array(clusters.length);
28
+
29
+ // Grouping data by clusters
30
+ for (let rowIndex = 0; rowIndex < peptideCount; ++rowIndex) {
31
+ const cluster = clustersCol.get(rowIndex) as string;
32
+ if (cluster === '')
33
+ continue;
34
+
35
+ const clusterId = clusters.indexOf(cluster);
36
+ const helmSeq = srcCol.get(rowIndex);
37
+ if (helmSeq)
38
+ (bodies[clusterId] ??= []).push({ID: rowIndex.toString(), HELM: helmSeq});
39
+ }
40
+
41
+ //@ts-ignore: this is a temporary workaround for the issue with docker containers. This will be fixed in 1.14.0
42
+ const pepseaContainer = await (grok.dapi.docker !== undefined ? grok.dapi.docker.dockerContainers : grok.dapi.dockerfiles).filter('bio').first();
43
+ const alignedSequences: string[] = new Array(peptideCount);
44
+ for (const body of bodies) { // getting aligned sequences for each cluster
45
+ const alignedObject = await requestAlignedObjects(pepseaContainer.id, body, method, gapOpen, gapExtend);
46
+ const alignments = alignedObject.Alignment;
47
+
48
+ for (const alignment of alignments) { // filling alignedSequencesCol
49
+ alignedSequences[parseInt(alignment.ID)] = Object.entries(alignment)
50
+ .filter((v) => !alignmentObjectMetaKeys.includes(v[0]))
51
+ .map((v) => v[1] !== '-' ? v[1] : '')
52
+ .join(C.PEPSEA.SEPARATOR);
53
+ }
54
+ }
55
+
56
+ const alignedSequencesCol: DG.Column<string> = DG.Column.fromStrings(unUsedName, alignedSequences);
57
+ alignedSequencesCol.setTag(DG.TAGS.UNITS, NOTATION.SEPARATOR);
58
+ alignedSequencesCol.setTag(bioTAGS.separator, C.PEPSEA.SEPARATOR);
59
+ alignedSequencesCol.setTag(bioTAGS.aligned, ALIGNMENT.SEQ_MSA);
60
+ alignedSequencesCol.setTag(bioTAGS.alphabet, ALPHABET.UN);
61
+ alignedSequencesCol.semType = DG.SEMTYPE.MACROMOLECULE;
62
+
63
+ return alignedSequencesCol;
64
+ }
65
+
66
+ async function requestAlignedObjects(dockerfileId: string, body: PepseaBodyUnit[], method: string, gapOpen: number,
67
+ gapExtend: number): Promise<PepseaRepsonse> {
68
+ const params = {
69
+ method: 'POST',
70
+ headers: {'Accept': 'application/json', 'Content-Type': 'application/json'},
71
+ body: JSON.stringify(body),
72
+ };
73
+ const path = `/align?method=${method}&gap_open=${gapOpen}&gap_extend=${gapExtend}`;
74
+ //@ts-ignore: this is a temporary workaround for the issue with docker containers
75
+ const response = await (grok.dapi.docker !== undefined ? grok.dapi.docker.dockerContainers : grok.dapi.dockerfiles)
76
+ .request(dockerfileId, path, params);
77
+ return JSON.parse(response ?? '{}');
78
+ }
@@ -3,7 +3,8 @@ import * as ui from 'datagrok-api/ui';
3
3
  import * as grok from 'datagrok-api/grok';
4
4
 
5
5
  import wu from 'wu';
6
- import {splitterAsFasta, SplitterFunc, UnitsHandler} from '@datagrok-libraries/bio';
6
+ import {splitterAsFasta, SplitterFunc} from '@datagrok-libraries/bio/src/utils/macromolecule';
7
+ import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
7
8
 
8
9
  const FASTA_LINE_WIDTH = 60;
9
10
 
@@ -1,4 +1,16 @@
1
+ import * as grok from 'datagrok-api/grok';
2
+ import * as DG from 'datagrok-api/dg';
3
+
4
+ export function getMacromoleculeColumn(): DG.Column | any {
5
+ const col = grok.shell.t.columns.bySemType(DG.SEMTYPE.MACROMOLECULE);
6
+ if (col === null) {
7
+ grok.shell.error('Current table does not contain macromolecules');
8
+ return;
9
+ }
10
+ return col;
11
+ }
12
+
1
13
  export function updateDivInnerHTML(div: HTMLElement, content: string | Node): void {
2
- div.innerHTML = '';
3
- div.append(content);
4
- }
14
+ div.innerHTML = '';
15
+ div.append(content);
16
+ }