@datagrok/peptides 0.8.13 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/.eslintrc.json +5 -2
  2. package/dist/package-test.js +1268 -1766
  3. package/dist/package.js +1097 -1622
  4. package/dist/vendors-node_modules_datagrok-libraries_ml_src_workers_dimensionality-reducer_js.js +120 -62
  5. package/package.json +13 -17
  6. package/package.png +0 -0
  7. package/src/model.ts +504 -448
  8. package/src/monomer-library.ts +31 -30
  9. package/src/package-test.ts +5 -6
  10. package/src/package.ts +52 -70
  11. package/src/tests/core.ts +67 -0
  12. package/src/tests/msa-tests.ts +3 -3
  13. package/src/tests/peptide-space-test.ts +65 -45
  14. package/src/tests/utils.ts +20 -50
  15. package/src/utils/cell-renderer.ts +25 -151
  16. package/src/utils/chem-palette.ts +3 -14
  17. package/src/utils/constants.ts +5 -0
  18. package/src/utils/filtering-statistics.ts +2 -2
  19. package/src/utils/misc.ts +29 -0
  20. package/src/utils/multiple-sequence-alignment.ts +5 -18
  21. package/src/utils/multivariate-analysis.ts +5 -8
  22. package/src/utils/peptide-similarity-space.ts +12 -9
  23. package/src/utils/types.ts +5 -2
  24. package/src/viewers/peptide-space-viewer.ts +67 -39
  25. package/src/viewers/sar-viewer.ts +34 -37
  26. package/src/viewers/stacked-barchart-viewer.ts +38 -61
  27. package/src/widgets/analyze-peptides.ts +53 -75
  28. package/src/widgets/distribution.ts +34 -18
  29. package/src/widgets/manual-alignment.ts +8 -12
  30. package/src/widgets/peptide-molecule.ts +48 -25
  31. package/src/widgets/subst-table.ts +53 -52
  32. package/src/workers/dimensionality-reducer.ts +8 -13
  33. package/{test-Peptides-f8114def7953-4bf59d70.html → test-Peptides-69a4761f6044-40ac3a0c.html} +2 -2
  34. package/src/peptides.ts +0 -327
  35. package/src/semantics.ts +0 -5
  36. package/src/tests/peptides-tests.ts +0 -60
  37. package/src/utils/SAR-multiple-filter.ts +0 -439
  38. package/src/utils/SAR-multiple-selection.ts +0 -177
  39. package/src/viewers/logo-viewer.ts +0 -195
package/src/peptides.ts DELETED
@@ -1,327 +0,0 @@
1
- import * as grok from 'datagrok-api/grok';
2
- import * as DG from 'datagrok-api/dg';
3
- import {PeptidesModel} from './model';
4
- import {SARViewer, SARViewerVertical} from './viewers/sar-viewer';
5
- import {ChemPalette} from './utils/chem-palette';
6
- import {Observable} from 'rxjs';
7
- import {MonomerLibrary} from './monomer-library';
8
- import {_package} from './package';
9
- import {setAARRenderer} from './utils/cell-renderer';
10
- import * as C from './utils/constants';
11
- import {PeptideSpaceViewer} from './viewers/peptide-space-viewer';
12
- import { FilteringStatistics } from './utils/filtering-statistics';
13
-
14
- type viewerTypes = SARViewer | SARViewerVertical;
15
- export class PeptidesController {
16
- private static _controllerName: string = 'peptidesController';
17
- private helpUrl = '/help/domains/bio/peptides.md';
18
-
19
- private _model: PeptidesModel;
20
- sarViewer!: SARViewer;
21
- sarViewerVertical!: SARViewerVertical;
22
- isInitialized = false;
23
-
24
- private constructor(dataFrame: DG.DataFrame) {
25
- this._model = PeptidesModel.getInstance(dataFrame);
26
- }
27
-
28
- static async getInstance(dataFrame: DG.DataFrame): Promise<PeptidesController> {
29
- dataFrame.temp[PeptidesController.controllerName] ??= new PeptidesController(dataFrame);
30
- if (dataFrame.temp[MonomerLibrary.id] === null) {
31
- const sdf = await _package.files.readAsText('HELMMonomers_June10.sdf');
32
- dataFrame.temp[MonomerLibrary.id] ??= new MonomerLibrary(sdf);
33
- }
34
- return dataFrame.temp[PeptidesController.controllerName];
35
- }
36
-
37
- static get controllerName() {return PeptidesController._controllerName;}
38
-
39
- get dataFrame() {return this._model.dataFrame;}
40
-
41
- static setAARRenderer(col: DG.Column, grid: DG.Grid) {
42
- return setAARRenderer(col, grid);
43
- }
44
-
45
- get onStatsDataFrameChanged(): Observable<DG.DataFrame> {return this._model.onStatsDataFrameChanged;}
46
-
47
- get onSARGridChanged(): Observable<DG.Grid> {return this._model.onSARGridChanged;}
48
-
49
- get onSARVGridChanged(): Observable<DG.Grid> {return this._model.onSARVGridChanged;}
50
-
51
- // get onGroupMappingChanged(): Observable<StringDictionary> {return this._model.onGroupMappingChanged;}
52
-
53
- get onSubstTableChanged(): Observable<DG.DataFrame> {return this._model.onSubstTableChanged;}
54
-
55
- async updateDefault() {await this._model.updateDefault();}
56
-
57
- get sarGrid() {return this._model._sarGrid;}
58
-
59
- get sarVGrid() {return this._model._sarVGrid;}
60
-
61
- get sourceGrid() {return this._model._sourceGrid!; }
62
-
63
- async updateData(
64
- activityScaling?: string, sourceGrid?: DG.Grid, twoColorMode?: boolean, activityLimit?: number,
65
- maxSubstitutions?: number, isSubstitutionOn?: boolean, filterMode?: boolean,
66
- ) {
67
- filterMode ??= false;
68
- await this._model.updateData(
69
- activityScaling, sourceGrid, twoColorMode, activityLimit, maxSubstitutions, isSubstitutionOn, filterMode);
70
- }
71
-
72
- getSubstitutions() {
73
- return this._model.getSubstitutionTable();
74
- }
75
-
76
- static async scaleActivity(
77
- activityScaling: string, df: DG.DataFrame, originalActivityName?: string, cloneBitset = false,
78
- ): Promise<[DG.DataFrame, string]> {
79
- // const df = sourceGrid.dataFrame!;
80
- let currentActivityColName = originalActivityName ?? C.COLUMNS_NAMES.ACTIVITY;
81
- const flag = (df.columns as DG.ColumnList).names().includes(currentActivityColName) &&
82
- currentActivityColName === originalActivityName;
83
- currentActivityColName = flag ? currentActivityColName : C.COLUMNS_NAMES.ACTIVITY;
84
- const tempDf = df.clone(cloneBitset ? df.filter : null, [currentActivityColName]);
85
-
86
- let formula = '${' + currentActivityColName + '}';
87
- let newColName = 'activity'; //originalActivityName ?? df.temp[C.COLUMNS_NAMES.ACTIVITY] ?? currentActivityColName;
88
- switch (activityScaling) {
89
- case 'none':
90
- break;
91
- case 'lg':
92
- formula = `Log10(${formula})`;
93
- newColName = `Log10(${newColName})`;
94
- break;
95
- case '-lg':
96
- formula = `-1*Log10(${formula})`;
97
- newColName = `-Log10(${newColName})`;
98
- break;
99
- default:
100
- throw new Error(`ScalingError: method \`${activityScaling}\` is not available.`);
101
- }
102
-
103
- await (tempDf.columns as DG.ColumnList).addNewCalculated(C.COLUMNS_NAMES.ACTIVITY_SCALED, formula);
104
- df.tags['scaling'] = activityScaling;
105
-
106
- return [tempDf, newColName];
107
- }
108
-
109
- get originalActivityColumnName(): string {return this.dataFrame.temp[C.COLUMNS_NAMES.ACTIVITY];}
110
-
111
- get substTooltipData() {return this._model.substTooltipData;}
112
-
113
- static splitAlignedPeptides(peptideColumn: DG.Column, filter: boolean = true): [DG.DataFrame, number[]] {
114
- const splitPeptidesArray: string[][] = [];
115
- let currentSplitPeptide: string[];
116
- let modeMonomerCount = 0;
117
- let currentLength;
118
- const colLength = peptideColumn.length;
119
-
120
- // splitting data
121
- const monomerLengths: {[index: string]: number} = {};
122
- for (let i = 0; i < colLength; i++) {
123
- currentSplitPeptide = peptideColumn.get(i).split('-').map((value: string) => value ? value : '-');
124
- splitPeptidesArray.push(currentSplitPeptide);
125
- currentLength = currentSplitPeptide.length;
126
- monomerLengths[currentLength + ''] =
127
- monomerLengths[currentLength + ''] ? monomerLengths[currentLength + ''] + 1 : 1;
128
- }
129
- //@ts-ignore: what I do here is converting string to number the most effective way I could find. parseInt is slow
130
- modeMonomerCount = 1 * Object.keys(monomerLengths).reduce((a, b) => monomerLengths[a] > monomerLengths[b] ? a : b);
131
-
132
- // making sure all of the sequences are of the same size
133
- // and marking invalid sequences
134
- let nTerminal: string;
135
- const invalidIndexes: number[] = [];
136
- let splitColumns: string[][] = Array.from({length: modeMonomerCount}, (_) => []);
137
- modeMonomerCount--; // minus N-terminal
138
- for (let i = 0; i < colLength; i++) {
139
- currentSplitPeptide = splitPeptidesArray[i];
140
- nTerminal = currentSplitPeptide.pop()!; // it is guaranteed that there will be at least one element
141
- currentLength = currentSplitPeptide.length;
142
- if (currentLength !== modeMonomerCount)
143
- invalidIndexes.push(i);
144
-
145
- for (let j = 0; j < modeMonomerCount; j++)
146
- splitColumns[j].push(j < currentLength ? currentSplitPeptide[j] : '-');
147
-
148
- splitColumns[modeMonomerCount].push(nTerminal);
149
- }
150
- modeMonomerCount--; // minus C-terminal
151
-
152
- //create column names list
153
- const columnNames = Array.from({length: modeMonomerCount}, (_, index) => `${index + 1 < 10 ? 0 : ''}${index + 1 }`);
154
- columnNames.splice(0, 0, 'Nterminal');
155
- columnNames.push('Cterminal');
156
-
157
- // filter out the columns with the same values
158
- if (filter) {
159
- splitColumns = splitColumns.filter((positionArray, index) => {
160
- const isRetained = new Set(positionArray).size > 1;
161
- if (!isRetained)
162
- columnNames.splice(index, 1);
163
-
164
- return isRetained;
165
- });
166
- }
167
-
168
- return [
169
- DG.DataFrame.fromColumns(splitColumns.map((positionArray, index) => {
170
- return DG.Column.fromList('string', columnNames[index], positionArray);
171
- })),
172
- invalidIndexes,
173
- ];
174
- }
175
-
176
- static get chemPalette() { return ChemPalette; }
177
-
178
- assertVar(variable: string, init = false): boolean {
179
- //@ts-ignore
180
- let foundVariable: any = this[variable];
181
- if (!foundVariable && init) {
182
- //@ts-ignore
183
- this[variable] = foundVariable = this.dataFrame.temp[variable];
184
- }
185
-
186
- const assertionResult = foundVariable ? true : false
187
- if (init && !assertionResult)
188
- throw new Error(`Variable assertion error: variable '${variable}' is not found in dataFrame`);
189
-
190
- return assertionResult;
191
- }
192
-
193
- assertVariables(variables: string[], init = false) {
194
- let result = true;
195
- for (const variable of variables)
196
- result &&= this.assertVar(variable, init);
197
-
198
- return result;
199
- }
200
-
201
- syncProperties(isSourceSAR = true) {
202
- this.assertVariables(['sarViewer', 'sarViewerVertical'], true);
203
- const sourceViewer = isSourceSAR ? this.sarViewer : this.sarViewerVertical;
204
- const targetViewer = isSourceSAR ? this.sarViewerVertical : this.sarViewer;
205
- const properties = sourceViewer.props.getProperties();
206
- for (const property of properties)
207
- targetViewer.props.set(property.name, property.get(sourceViewer));
208
- }
209
-
210
- modifyOrCreateSplitCol(aar: string, position: string, notify: boolean = true) {
211
- this._model.modifyOrCreateSplitCol(aar, position);
212
- if (notify)
213
- this._model.fireBitsetChanged();
214
- }
215
-
216
- setSARGridCellAt(aar: string, position: string) {
217
- const sarDf = this.sarGrid.dataFrame;
218
- const aarCol = sarDf.getCol(C.COLUMNS_NAMES.AMINO_ACID_RESIDUE);
219
- const aarColLen = aarCol.length;
220
- let index = -1;
221
- for (let i = 0; i < aarColLen; i++) {
222
- if (aarCol.get(i) === aar) {
223
- index = i;
224
- break;
225
- }
226
- }
227
- position = position === C.CATEGORIES.ALL ? C.COLUMNS_NAMES.AMINO_ACID_RESIDUE : position;
228
- sarDf.currentCell = sarDf.cell(index, position);
229
- }
230
-
231
- /**
232
- * Class initializer
233
- *
234
- * @param {DG.Grid} sourceGrid Working talbe grid.
235
- * @param {DG.TableView} currentView Working view.
236
- * @param {DG.DataFrame} currentDf Working table.
237
- * @param {StringDictionary} options SAR viewer options
238
- * @param {DG.Column} col Aligned sequences column.
239
- * @memberof Peptides
240
- */
241
- async init(table: DG.DataFrame) {
242
- if (this.isInitialized)
243
- return;
244
- this.isInitialized = true;
245
- //calculate initial stats
246
- const stats = new FilteringStatistics();
247
- const activityScaledCol = table.getCol(C.COLUMNS_NAMES.ACTIVITY_SCALED);
248
- stats.setData(activityScaledCol.getRawData() as Float32Array);
249
- stats.setMask(table.selection);
250
- table.temp[C.STATS] = stats;
251
-
252
- //set up views
253
- let currentView = grok.shell.v as DG.TableView ;
254
- if (currentView.dataFrame.tags['isPeptidesAnalysis'] !== 'true')
255
- currentView = grok.shell.addTableView(table);
256
- const sourceGrid = currentView.grid;
257
- sourceGrid.col(C.COLUMNS_NAMES.ACTIVITY_SCALED)!.name = table.temp[C.COLUMNS_NAMES.ACTIVITY_SCALED];
258
- sourceGrid.columns.setOrder([table.temp[C.COLUMNS_NAMES.ACTIVITY_SCALED]]);
259
-
260
- this.dataFrame.temp[C.EMBEDDING_STATUS] = false;
261
- function adjustCellSize(grid: DG.Grid) {
262
- const colNum = grid.columns.length;
263
- for (let i = 0; i < colNum; ++i) {
264
- const iCol = grid.columns.byIndex(i)!;
265
- iCol.width = isNaN(parseInt(iCol.name)) ? 50 : 40;
266
- }
267
- grid.props.rowHeight = 20;
268
- }
269
-
270
- for (let i = 0; i < sourceGrid.columns.length; i++) {
271
- const aarCol = sourceGrid.columns.byIndex(i);
272
- if (aarCol && aarCol.name && aarCol.column?.semType !== C.SEM_TYPES.AMINO_ACIDS &&
273
- aarCol.name !== this.dataFrame.temp[C.COLUMNS_NAMES.ACTIVITY_SCALED]
274
- )
275
- sourceGrid.columns.byIndex(i)!.visible = false;
276
- }
277
-
278
- const options = {scaling: table.tags['scaling']};
279
- await this.updateData(table.tags['scaling'], sourceGrid, false, 1, 2, false, false);
280
-
281
- const dockManager = currentView.dockManager;
282
-
283
- this.dataFrame.temp['sarViewer'] = this.sarViewer =
284
- await this.dataFrame.plot.fromType('peptide-sar-viewer', options) as SARViewer;
285
- this.sarViewer.helpUrl = this.helpUrl;
286
-
287
- this.dataFrame.temp['sarViewerVertical'] = this.sarViewerVertical =
288
- await this.dataFrame.plot.fromType('peptide-sar-viewer-vertical', options) as SARViewerVertical;
289
- this.sarViewerVertical.helpUrl = this.helpUrl;
290
-
291
- const sarViewersGroup: viewerTypes[] = [this.sarViewer, this.sarViewerVertical];
292
-
293
- const peptideSpaceViewerOptions = {method: 't-SNE', measure: 'Levenshtein', cyclesCount: 100};
294
- const peptideSpaceViewer =
295
- await this.dataFrame.plot.fromType('peptide-space-viewer', peptideSpaceViewerOptions) as PeptideSpaceViewer;
296
- dockManager.dock(peptideSpaceViewer, DG.DOCK_TYPE.RIGHT, null, 'Peptide Space Viewer');
297
-
298
- dockViewers(sarViewersGroup, DG.DOCK_TYPE.RIGHT, dockManager, DG.DOCK_TYPE.DOWN);
299
-
300
- sourceGrid.props.allowEdit = false;
301
- adjustCellSize(sourceGrid);
302
-
303
- // this._model._sarGrid.invalidate();
304
- // this._model._sarVGrid.invalidate();
305
- this._model.invalidateGrids();
306
- }
307
-
308
- invalidateSourceGrid() { this.sourceGrid.invalidate(); }
309
- }
310
-
311
- function dockViewers(
312
- viewerList: viewerTypes[], attachDirection: DG.DockType, dockManager: DG.DockManager,
313
- initialAttachDirection?: DG.DockType): DG.DockNode[] | null {
314
- const viewerListLength = viewerList.length;
315
- if (viewerListLength === 0)
316
- return null;
317
-
318
- let currentViewer = viewerList[0];
319
- const nodeList = [dockManager.dock(currentViewer, initialAttachDirection, null, currentViewer.name ?? '')];
320
- const ratio = 1 / viewerListLength;
321
-
322
- for (let i = 1; i < viewerListLength; i++) {
323
- currentViewer = viewerList[i];
324
- nodeList.push(dockManager.dock(currentViewer, attachDirection, nodeList[i - 1], currentViewer.name ?? '', ratio));
325
- }
326
- return nodeList;
327
- }
package/src/semantics.ts DELETED
@@ -1,5 +0,0 @@
1
- export const SEMTYPE = {
2
- ALIGNED: 'alignedSequence',
3
- };
4
-
5
-
@@ -1,60 +0,0 @@
1
- import {after, before, category, test} from '@datagrok-libraries/utils/src/test';
2
- import {StringDictionary} from '@datagrok-libraries/utils/src/type-declarations';
3
- import * as DG from 'datagrok-api/dg';
4
- import * as grok from 'datagrok-api/grok';
5
- import {PeptidesController} from '../peptides';
6
- import {analyzePeptidesWidget} from '../widgets/analyze-peptides';
7
- import {manualAlignmentWidget} from '../widgets/manual-alignment';
8
- import {peptideMoleculeWidget} from '../widgets/peptide-molecule';
9
- import {_packageTest} from '../package-test';
10
-
11
- category('peptides', async () => {
12
- let peptidesDf: DG.DataFrame;
13
- let options: StringDictionary;
14
- // let peptidesGrid: DG.Grid;
15
- let asCol: DG.Column;
16
- // let pepView: DG.TableView;
17
-
18
- before(async () => {
19
- peptidesDf = DG.DataFrame.fromCsv(await _packageTest.files.readAsText('aligned.csv'));
20
- options = {
21
- activityColumnName: 'IC50',
22
- scaling: '-lg',
23
- };
24
- asCol = peptidesDf.getCol('AlignedSequence');
25
- // pepView = grok.shell.addTableView(peptidesDf);
26
- // peptidesGrid = pepView.grid;
27
- });
28
-
29
- test('utils.split-sequence', async () => {
30
- PeptidesController.splitAlignedPeptides(peptidesDf.getCol('AlignedSequence'));
31
- });
32
-
33
- // test('describe', async () => {
34
- // await describe(
35
- // peptidesDf, options['activityColumnName'], options['scaling'], peptidesGrid, true,
36
- // DG.BitSet.create(peptidesDf.rowCount, (i) => i % 2 === 0), true);
37
- // });
38
-
39
- test('Peptides-controller', async () => {
40
- const peptides = await PeptidesController.getInstance(peptidesDf);
41
- peptides.init(peptidesDf); //, peptidesDf.columns.names());
42
- });
43
-
44
- test('widgets.analyze-peptides', async () => {
45
- await analyzePeptidesWidget(peptidesDf, asCol);
46
- });
47
-
48
- test('widgets.manual-alignment', async () => {
49
- manualAlignmentWidget(asCol, peptidesDf);
50
- });
51
-
52
- test('widgets.peptide-molecule', async () => {
53
- await peptideMoleculeWidget('NH2--A-N-T-T-Y-K-N-Y-R-S-N-L-L--COOH');
54
- });
55
-
56
- after(async () => {
57
- // pepView.close();
58
- grok.shell.closeTable(peptidesDf);
59
- });
60
- });