@datagrok/peptides 0.8.9 → 0.8.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/.eslintrc.json +2 -1
  2. package/dist/package-test.js +22626 -0
  3. package/dist/package.js +21429 -0
  4. package/dist/vendors-node_modules_datagrok-libraries_ml_src_workers_dimensionality-reducer_js.js +8840 -0
  5. package/jest.config.js +33 -0
  6. package/package.json +75 -62
  7. package/src/__jest__/remote.test.ts +50 -0
  8. package/src/__jest__/test-node.ts +96 -0
  9. package/src/model.ts +950 -86
  10. package/src/monomer-library.ts +8 -0
  11. package/src/package-test.ts +3 -2
  12. package/src/package.ts +57 -22
  13. package/src/peptides.ts +165 -119
  14. package/src/styles.css +8 -0
  15. package/src/tests/peptides-tests.ts +17 -78
  16. package/src/tests/utils.ts +1 -7
  17. package/src/utils/SAR-multiple-filter.ts +439 -0
  18. package/src/utils/SAR-multiple-selection.ts +177 -0
  19. package/src/utils/cell-renderer.ts +49 -50
  20. package/src/utils/chem-palette.ts +61 -163
  21. package/src/utils/constants.ts +56 -0
  22. package/src/utils/filtering-statistics.ts +62 -0
  23. package/src/utils/multiple-sequence-alignment.ts +33 -2
  24. package/src/utils/multivariate-analysis.ts +79 -0
  25. package/src/utils/peptide-similarity-space.ts +12 -31
  26. package/src/utils/types.ts +10 -0
  27. package/src/viewers/logo-viewer.ts +2 -1
  28. package/src/viewers/peptide-space-viewer.ts +121 -0
  29. package/src/viewers/sar-viewer.ts +111 -313
  30. package/src/viewers/stacked-barchart-viewer.ts +126 -173
  31. package/src/widgets/analyze-peptides.ts +39 -18
  32. package/src/widgets/distribution.ts +61 -0
  33. package/src/widgets/manual-alignment.ts +3 -3
  34. package/src/widgets/peptide-molecule.ts +4 -4
  35. package/src/widgets/subst-table.ts +30 -22
  36. package/test-Peptides-f8114def7953-4bf59d70.html +256 -0
  37. package/src/describe.ts +0 -534
  38. package/src/utils/split-aligned.ts +0 -72
  39. package/src/viewers/subst-viewer.ts +0 -320
package/src/model.ts CHANGED
@@ -1,125 +1,989 @@
1
+ import * as ui from 'datagrok-api/ui';
2
+ import * as grok from 'datagrok-api/grok';
1
3
  import * as DG from 'datagrok-api/dg';
2
4
 
3
- import {describe} from './describe';
4
5
  import {Subject, Observable} from 'rxjs';
5
- import {StringDictionary} from '@datagrok-libraries/utils/src/type-declarations';
6
6
  import {addViewerToHeader, StackedBarChart} from './viewers/stacked-barchart-viewer';
7
+ import {PeptidesController} from './peptides';
8
+ import {tTest} from '@datagrok-libraries/statistics/src/tests';
9
+ import {fdrcorrection} from '@datagrok-libraries/statistics/src/multiple-tests';
10
+ import {ChemPalette} from './utils/chem-palette';
11
+ import {MonomerLibrary} from './monomer-library';
12
+ import * as C from './utils/constants';
13
+ import * as type from './utils/types';
14
+ import {FilteringStatistics} from './utils/filtering-statistics';
7
15
 
8
16
  export class PeptidesModel {
9
- // private _viewerGrid: DG.Grid;
10
- // private viewerVGrid: DG.Grid;
11
- // private _statsDf: DG.DataFrame;
12
- // private groupMapping: StringDictionary;
13
- private dataFrame: DG.DataFrame | null;
14
- private activityColumn: string | null;
15
- private activityScaling: string | null;
16
- private sourceGrid: DG.Grid | null;
17
- private twoColorMode: boolean | null;
18
- private initialBitset: DG.BitSet | null;
19
- private isUpdating: boolean = false;
20
- private grouping: boolean = false;
21
- private substFlag = false;
22
- private statsDataFrameSubject = new Subject<DG.DataFrame>();
23
- private sarGridSubject = new Subject<DG.Grid>();
24
- private sarVGridSubject = new Subject<DG.Grid>();
25
- private groupMappingSubject = new Subject<StringDictionary>();
26
- private substFlagSubject = new Subject<boolean>();
27
- private static _modelName = 'peptidesModel';
17
+ static _modelName = 'peptidesModel';
18
+
19
+ _statsDataFrameSubject = new Subject<DG.DataFrame>();
20
+ _sarGridSubject = new Subject<DG.Grid>();
21
+ _sarVGridSubject = new Subject<DG.Grid>();
22
+ // _groupMappingSubject = new Subject<StringDictionary>();
23
+ _substitutionTableSubject = new Subject<DG.DataFrame>();
28
24
 
29
- private constructor(dataFrame: DG.DataFrame) {
30
- this.dataFrame = dataFrame;
31
- this.activityColumn = null;
32
- this.activityScaling = null;
33
- this.sourceGrid = null;
34
- this.twoColorMode = null;
35
- this.initialBitset = null;
25
+ _isUpdating: boolean = false;
26
+ _isSubstInitialized = false;
27
+ isBitsetChangedInitialized = false;
36
28
 
37
- // this._statsDf = DG.DataFrame.create();
38
- // this._viewerGrid = DG.Grid.create(this.statsDf);
39
- // this.viewerVGrid = DG.Grid.create(this.statsDf);
40
- // this.groupMapping = {};
29
+ //viewer properties
30
+ // _grouping!: boolean;
31
+ _filterMode!: boolean;
32
+ _twoColorMode!: boolean;
33
+ _activityScaling!: string;
34
+ _isSubstitutionOn!: boolean;
35
+ _activityLimit!: number;
36
+ _maxSubstitutions!: number;
41
37
 
42
- // this.statsDataFrameObservable = new Observable(subject => subject.next(this.statsDf));
43
- // this.sarGridObservable = new Observable(subject => subject.next(this.viewerGrid));
44
- // this.sarVGridObservable = new Observable(subject => subject.next(this.viewerVGrid));
45
- // this.groupMappingObservable = new Observable(subject => subject.next(this.groupMapping));
46
- }
38
+ _sarGrid!: DG.Grid;
39
+ _sarVGrid!: DG.Grid;
40
+ _sourceGrid!: DG.Grid;
41
+ _dataFrame: DG.DataFrame;
42
+ _substitutionTable!: DG.DataFrame;
43
+ splitCol!: DG.Column;
44
+ stackedBarchart!: StackedBarChart;
47
45
 
48
- // get statsDf() {
49
- // return this._statsDf;
50
- // }
46
+ // _substTableTooltipData!: { [aar: string]: number[][][]; };
47
+ _casesTable!: type.SubstitutionCases;
48
+ _substTableTooltipData!: type.SubstitutionTooltips;
51
49
 
52
- // get viewerGrid() {
53
- // return this._viewerGrid;
54
- // }
50
+ private constructor(dataFrame: DG.DataFrame) {
51
+ this._dataFrame = dataFrame;
52
+ this._dataFrame.temp[C.PEPTIDES_ANALYSIS] = true;
55
53
 
56
- get onStatsDataFrameChanged(): Observable<DG.DataFrame> {
57
- return this.statsDataFrameSubject.asObservable();
54
+ this.updateProperties();
58
55
  }
59
56
 
60
- get onSARGridChanged(): Observable<DG.Grid> {
61
- return this.sarGridSubject.asObservable();
57
+ static getInstance(dataFrame: DG.DataFrame): PeptidesModel {
58
+ dataFrame.temp[PeptidesModel.modelName] ??= new PeptidesModel(dataFrame);
59
+ return dataFrame.temp[PeptidesModel.modelName];
62
60
  }
63
61
 
64
- get onSARVGridChanged(): Observable<DG.Grid> {
65
- return this.sarVGridSubject.asObservable();
62
+ updateProperties() {
63
+ this._activityScaling = this._dataFrame.tags['scaling'];
64
+ this._filterMode = this.stringToBool(this._dataFrame.tags['filterMode']);
65
+ this._twoColorMode = this.stringToBool(this._dataFrame.tags['bidirectionalAnalysis']);
66
+ // this._grouping = this.stringToBool(this._dataFrame.tags['grouping']);
67
+ this._isSubstitutionOn = this.stringToBool(this._dataFrame.tags['showSubstitution']);
68
+ this._maxSubstitutions = parseInt(this._dataFrame.tags['maxSubstitutions']);
69
+ this._activityLimit = parseFloat(this._dataFrame.tags['activityLimit']);
66
70
  }
67
71
 
68
- get onGroupMappingChanged(): Observable<StringDictionary> {
69
- return this.groupMappingSubject.asObservable();
72
+ stringToBool(str: string) {
73
+ return str === 'true' ? true : false;
70
74
  }
71
75
 
72
- get onSubstFlagChanged(): Observable<boolean> {
73
- return this.substFlagSubject.asObservable();
76
+ setProperties(
77
+ activityScaling: string, filterMode: boolean, twoColorMode: boolean, isSubstitutionOn: boolean,
78
+ maxSubstitutions: number, activityLimit: number, forceUpdate = false,
79
+ ) {
80
+ const chooseAction = (value: string, defaultValue: any) => forceUpdate ? value : defaultValue ?? value;
81
+ this._dataFrame.tags['scaling'] = chooseAction(`${activityScaling}`, this._dataFrame.tags['scaling']);
82
+ this._dataFrame.tags['filterMode'] = chooseAction(`${filterMode}`, this._dataFrame.tags['filterMode']);
83
+ this._dataFrame.tags['bidirectionalAnalysis'] = chooseAction(`${twoColorMode}`, this._dataFrame.tags['bidirectionalAnalysis']);
84
+ // this._dataFrame.tags['grouping'] = chooseAction(`${grouping}`, this._dataFrame.tags['grouping']);
85
+ this._dataFrame.tags['showSubstitution'] = chooseAction(`${isSubstitutionOn}`, this._dataFrame.tags['showSubstitution']);
86
+ this._dataFrame.tags['maxSubstitutions'] = chooseAction(`${maxSubstitutions}`, this._dataFrame.tags['maxSubstitutions']);
87
+ this._dataFrame.tags['activityLimit'] = chooseAction(`${activityLimit}`, this._dataFrame.tags['activityLimit']);
88
+
89
+ this.updateProperties();
74
90
  }
75
91
 
92
+ get dataFrame(): DG.DataFrame {return this._dataFrame;}
93
+
94
+ get onStatsDataFrameChanged(): Observable<DG.DataFrame> {return this._statsDataFrameSubject.asObservable();}
95
+
96
+ get onSARGridChanged(): Observable<DG.Grid> {return this._sarGridSubject.asObservable();}
97
+
98
+ get onSARVGridChanged(): Observable<DG.Grid> {return this._sarVGridSubject.asObservable();}
99
+
100
+ // get onGroupMappingChanged(): Observable<StringDictionary> {return this._groupMappingSubject.asObservable();}
101
+
102
+ get onSubstTableChanged(): Observable<DG.DataFrame> {return this._substitutionTableSubject.asObservable();}
103
+
104
+ get substTooltipData(): type.SubstitutionTooltips {return this._substTableTooltipData!;}
105
+
76
106
  async updateData(
77
- df: DG.DataFrame | null, activityCol: string | null, activityScaling: string | null, sourceGrid: DG.Grid | null,
78
- twoColorMode: boolean | null, initialBitset: DG.BitSet | null, grouping: boolean | null) {
79
- this.dataFrame = df ?? this.dataFrame;
80
- this.activityColumn = activityCol ?? this.activityColumn;
81
- this.activityScaling = activityScaling ?? this.activityScaling;
82
- this.sourceGrid = sourceGrid ?? this.sourceGrid;
83
- this.twoColorMode = twoColorMode ?? this.twoColorMode;
84
- this.initialBitset = initialBitset ?? this.initialBitset;
85
- this.grouping = grouping ?? this.grouping;
107
+ activityScaling?: string, sourceGrid?: DG.Grid, twoColorMode?: boolean, activityLimit?: number,
108
+ maxSubstitutions?: number, isSubstitutionOn?: boolean, filterMode?: boolean,
109
+ ) {
110
+ //FIXME: threre are too many assignments, some are duplicating
111
+ this._activityScaling = activityScaling ?? this._activityScaling;
112
+ this._sourceGrid = sourceGrid ?? this._sourceGrid;
113
+ this._twoColorMode = twoColorMode ?? this._twoColorMode;
114
+ // this._grouping = grouping ?? this._grouping;
115
+ this._activityLimit = activityLimit ?? this._activityLimit;
116
+ this._maxSubstitutions = maxSubstitutions ?? this._maxSubstitutions;
117
+ this._isSubstitutionOn = isSubstitutionOn ?? this._isSubstitutionOn;
118
+ this._filterMode = filterMode ?? this._filterMode;
119
+ this.setProperties(this._activityScaling, this._filterMode, this._twoColorMode, this._isSubstitutionOn,
120
+ this._maxSubstitutions, this._activityLimit, true);
121
+
86
122
  await this.updateDefault();
87
123
  }
88
124
 
89
125
  async updateDefault() {
90
- if (this.dataFrame && this.activityColumn && this.activityScaling && this.sourceGrid &&
91
- this.twoColorMode !== null && !this.isUpdating) {
92
- this.isUpdating = true;
93
- const [viewerGrid, viewerVGrid, statsDf, groupMapping] = await describe(
94
- this.dataFrame, this.activityColumn, this.activityScaling, this.sourceGrid, this.twoColorMode,
95
- this.initialBitset, this.grouping);
96
- this.statsDataFrameSubject.next(statsDf);
97
- this.groupMappingSubject.next(groupMapping);
98
- this.sarGridSubject.next(viewerGrid);
99
- this.sarVGridSubject.next(viewerVGrid);
100
- this.substFlag = !this.substFlag;
101
- this.substFlagSubject.next(this.substFlag);
126
+ if (this._activityScaling && this._sourceGrid && this._twoColorMode !== null && !this._isUpdating) {
127
+ this._isUpdating = true;
128
+ const [viewerGrid, viewerVGrid, statsDf, substTable] = await this.initializeViewersComponents();
129
+ //FIXME: modify during the initializeViewersComponents stages
130
+ this._statsDataFrameSubject.next(statsDf);
131
+ // this._groupMappingSubject.next(groupMapping);
132
+ this._sarGridSubject.next(viewerGrid);
133
+ this._sarVGridSubject.next(viewerVGrid);
134
+ if (this._isSubstitutionOn) {
135
+ this._substitutionTableSubject.next(substTable);
136
+ this._isSubstInitialized = true;
137
+ }
138
+ }
139
+ await this.updateBarchart();
140
+ this.invalidateGrids();
141
+
142
+ this._isUpdating = false;
143
+ }
144
+
145
+ async updateBarchart() {
146
+ this.stackedBarchart ??= await this._dataFrame?.plot.fromType('StackedBarChartAA') as StackedBarChart;
147
+ if (this.stackedBarchart && this._sourceGrid)
148
+ addViewerToHeader(this._sourceGrid, this.stackedBarchart);
149
+ }
150
+
151
+ static get modelName() { return PeptidesModel._modelName; }
152
+
153
+ async initializeViewersComponents(): Promise<[DG.Grid, DG.Grid, DG.DataFrame, DG.DataFrame]> {
154
+ if (this._sourceGrid === null)
155
+ throw new Error(`Source grid is not initialized`);
156
+
157
+ //Split the aligned sequence into separate AARs
158
+ let splitSeqDf: DG.DataFrame | undefined;
159
+ let invalidIndexes: number[];
160
+ const col: DG.Column = (this._dataFrame.columns as DG.ColumnList).bySemType(C.SEM_TYPES.ALIGNED_SEQUENCE)!;
161
+ [splitSeqDf, invalidIndexes] = PeptidesController.splitAlignedPeptides(col);
102
162
 
103
- this.sourceGrid.invalidate();
163
+ const positionColumns = (splitSeqDf.columns as DG.ColumnList).names();
164
+ const renderColNames: string[] = (splitSeqDf.columns as DG.ColumnList).names();
104
165
 
105
- this.isUpdating = false;
166
+ (splitSeqDf.columns as DG.ColumnList).add(this._dataFrame.getCol(C.COLUMNS_NAMES.ACTIVITY));
167
+
168
+ this.joinDataFrames(this._dataFrame, positionColumns, splitSeqDf);
169
+
170
+ for (const dfCol of (this._dataFrame.columns as DG.ColumnList)) {
171
+ if (splitSeqDf.col(dfCol.name) && dfCol.name != C.COLUMNS_NAMES.ACTIVITY)
172
+ PeptidesController.setAARRenderer(dfCol, this._sourceGrid);
106
173
  }
107
174
 
108
- await this.updateBarchart();
175
+ this.sortSourceGrid(this._sourceGrid);
176
+
177
+ await this.createScaledCol(this._activityScaling!, this._dataFrame, this._sourceGrid, splitSeqDf);
178
+
179
+ //unpivot a table and handle duplicates
180
+ splitSeqDf = splitSeqDf.groupBy(positionColumns)
181
+ .add('med', C.COLUMNS_NAMES.ACTIVITY_SCALED, C.COLUMNS_NAMES.ACTIVITY_SCALED)
182
+ .aggregate();
183
+
184
+ const peptidesCount = splitSeqDf.getCol(C.COLUMNS_NAMES.ACTIVITY_SCALED).length;
185
+
186
+ let matrixDf = splitSeqDf.unpivot(
187
+ [C.COLUMNS_NAMES.ACTIVITY_SCALED], positionColumns, C.COLUMNS_NAMES.POSITION, C.COLUMNS_NAMES.AMINO_ACID_RESIDUE);
188
+
189
+ //TODO: move to chem palette
190
+ // let groupMapping: StringDictionary = {};
191
+ // if (this._grouping) {
192
+ // groupMapping = C.aarGroups;
193
+ // const aarCol = matrixDf.getCol(C.COLUMNS_NAMES.AMINO_ACID_RESIDUE);
194
+ // aarCol.init((index) => groupMapping[aarCol.get(index)[0]] ?? '-');
195
+ // aarCol.compact();
196
+ // } else
197
+ // Object.keys(C.aarGroups).forEach((value) => groupMapping[value] = value);
198
+
199
+
200
+ //FIXME: for some reason Mean difference is not calculated for all the AARs
201
+ //statistics for specific AAR at a specific position
202
+ const statsDf = await this.calculateStatistics(matrixDf, peptidesCount, splitSeqDf);
203
+
204
+ // SAR matrix table
205
+ //pivot a table to make it matrix-like
206
+ matrixDf = statsDf.groupBy([C.COLUMNS_NAMES.AMINO_ACID_RESIDUE])
207
+ .pivot(C.COLUMNS_NAMES.POSITION)
208
+ .add('first', C.COLUMNS_NAMES.MEAN_DIFFERENCE, '')
209
+ .aggregate();
210
+ matrixDf.name = 'SAR';
211
+
212
+ // Setting category order
213
+ await this.setCategoryOrder(this._twoColorMode!, statsDf, matrixDf);
214
+
215
+ // SAR vertical table (naive, choose best Mean difference from pVals <= 0.01)
216
+ const sequenceDf = this.createVerticalTable(statsDf, this._twoColorMode!);
217
+ renderColNames.push(C.COLUMNS_NAMES.MEAN_DIFFERENCE);
218
+
219
+ let substTable: DG.DataFrame | null = null;
220
+ if (this._isSubstitutionOn || !this._isSubstInitialized)
221
+ substTable = this.calcSubstitutions();
222
+
223
+ //TODO: move everything below out to controller
224
+ const [sarGrid, sarVGrid] = this.createGrids(matrixDf, positionColumns, sequenceDf);
225
+
226
+ this._sarGrid = sarGrid;
227
+ this._sarVGrid = sarVGrid;
228
+
229
+ this.setCellRenderers(
230
+ renderColNames, statsDf, this._twoColorMode, sarGrid, sarVGrid, this._isSubstitutionOn);
231
+
232
+ // show all the statistics in a tooltip over cell
233
+ this.setTooltips(renderColNames, statsDf, peptidesCount, sarGrid, sarVGrid, this._dataFrame);
234
+
235
+ this.setInteractionCallback();
236
+
237
+ this.modifyOrCreateSplitCol(C.CATEGORIES.ALL, C.CATEGORIES.ALL);
238
+
239
+ this.setBitsetCallback();
240
+
241
+ this.postProcessGrids(this._sourceGrid, invalidIndexes, sarGrid, sarVGrid);
242
+
243
+ if (this.dataFrame.tags[C.TAGS.AAR] && this.dataFrame.tags[C.TAGS.POSITION]) {
244
+ const sarDf = sarGrid.dataFrame;
245
+ const rowCount = sarDf.rowCount;
246
+ let index = -1;
247
+ for (let i = 0; i < rowCount; i++) {
248
+ if (sarDf.get(C.COLUMNS_NAMES.AMINO_ACID_RESIDUE, i) === this.dataFrame.tags[C.TAGS.AAR]) {
249
+ index = i;
250
+ break;
251
+ }
252
+ }
253
+ sarDf.currentCell = sarDf.cell(index, this.dataFrame.tags[C.TAGS.POSITION]);
254
+ }
255
+
256
+ //TODO: return class instead
257
+ return [sarGrid, sarVGrid, statsDf, substTable!];
109
258
  }
110
259
 
111
- async updateBarchart() {
112
- const stackedBarchart = await this.dataFrame?.plot.fromType('StackedBarChartAA') as StackedBarChart;
113
- if (stackedBarchart && this.sourceGrid)
114
- addViewerToHeader(this.sourceGrid, stackedBarchart);
260
+ calcSubstitutions() {
261
+ // const col: DG.Column = this.dataFrame.columns.bySemType(C.SEM_TYPES.ALIGNED_SEQUENCE);
262
+ // const values: number[] = this.dataFrame.getCol(C.COLUMNS_NAMES.ACTIVITY_SCALED).toList();
263
+ const activityValues = this.dataFrame.getCol(C.COLUMNS_NAMES.ACTIVITY_SCALED);
264
+ // const splitedMatrix = this.split(col);
265
+ const columnList = (this.dataFrame.columns as DG.ColumnList).toList()
266
+ .filter(col => col.semType === C.SEM_TYPES.AMINO_ACIDS);
267
+ if (columnList.length === 0)
268
+ throw new Error(`Couldn't find any column of semType '${C.SEM_TYPES.AMINO_ACIDS}'`);
269
+
270
+ const tableValues: { [aar: string]: number[]; } = {};
271
+ const tableTooltips: { [aar: string]: {}[][]; } = {};
272
+ const tableCases: { [aar: string]: number[][][]; } = {};
273
+
274
+ // const nRows = splitedMatrix.length;
275
+ // const nCols = splitedMatrix[0].length;
276
+ // const nColsArray = Array(nCols);
277
+ const nCols = columnList.length;
278
+ const nRows = this.dataFrame.rowCount;
279
+
280
+ //TODO: this looks **very** expensive
281
+ for (let i = 0; i < nRows - 1; i++) {
282
+ for (let j = i + 1; j < nRows; j++) {
283
+ let substCounter = 0;
284
+ const subst1: { [pos: number]: [string, {key: string, value: string, diff: number}] } = {};
285
+ const subst2: { [pos: number]: [string, {key: string, value: string, diff: number}] } = {};
286
+ const activityValI = activityValues.get(i) as number;
287
+ const activityValJ = activityValues.get(j) as number;
288
+ const strActivityValI = activityValI.toFixed(2);
289
+ const strActivityValJ = activityValJ.toFixed(2);
290
+ const delta = activityValI - activityValJ;
291
+
292
+ if (Math.abs(delta) < this._activityLimit)
293
+ continue;
294
+
295
+ for (let k = 0; k < nCols; k++) {
296
+ // const smik = splitedMatrix[i][k];
297
+ // const smjk = splitedMatrix[j][k];
298
+ const smik = columnList[k].get(i) as string;
299
+ const smjk = columnList[k].get(j) as string;
300
+
301
+ if (smik === smjk)
302
+ continue;
303
+
304
+ substCounter++;
305
+ subst1[k] = [
306
+ smik,
307
+ {
308
+ key: `${smik === '-' ? 'Empty' : smik} → ${smjk === '-' ? 'Empty' : smjk}`,
309
+ value: `${strActivityValI} → ${strActivityValJ}`,
310
+ diff: -delta,
311
+ },
312
+ ];
313
+ subst2[k] = [
314
+ smjk,
315
+ {
316
+ key: `${smjk === '-' ? 'Empty' : smjk} → ${smik === '-' ? 'Empty' : smik}`,
317
+ value: `${strActivityValJ} → ${strActivityValI}`,
318
+ diff: delta,
319
+ },
320
+ ];
321
+ }
322
+
323
+ if (substCounter > this._maxSubstitutions || substCounter === 0)
324
+ continue;
325
+
326
+ for (const subst of [subst1, subst2]) {
327
+ Object.keys(subst).forEach((pos) => {
328
+ const posInt = parseInt(pos);
329
+ const aar = subst[posInt][0];
330
+ if (!Object.keys(tableValues).includes(aar)) {
331
+ // tableValues[aar] = Array(...nColsArray).map(() => DG.INT_NULL);
332
+ // tableTooltips[aar] = Array(...nColsArray).map(() => []);
333
+ // tableCases[aar] = Array(...nColsArray).map(() => []);
334
+ tableValues[aar] = Array(nCols).fill(DG.INT_NULL);
335
+ tableTooltips[aar] = Array(nCols).fill([]);
336
+ tableCases[aar] = Array(nCols).fill([]);
337
+ }
338
+
339
+ tableValues[aar][posInt] = tableValues[aar][posInt] === DG.INT_NULL ? 1 : tableValues[aar][posInt] + 1;
340
+ tableTooltips[aar][posInt] = !tableTooltips[aar][posInt].length ?
341
+ [{key: 'Substitution', value: 'Values'}] : tableTooltips[aar][posInt];
342
+ tableTooltips[aar][posInt].push(subst[posInt][1]);
343
+ tableCases[aar][posInt].push([i, j, subst == subst1 ? delta : -delta]);
344
+ });
345
+ }
346
+ }
347
+ }
348
+
349
+ const tableValuesKeys = Object.keys(tableValues);
350
+ const dfLength = tableValuesKeys.length;
351
+ const cols = columnList.map(col => {
352
+ const newCol = DG.Column.int(`${col.name}`, dfLength);
353
+ newCol.semType = 'Substitution';
354
+ return newCol;
355
+ });
356
+ const aarCol = DG.Column.string(C.COLUMNS_NAMES.AMINO_ACID_RESIDUE, dfLength);
357
+ cols.splice(0, 0, aarCol);
358
+ const table = DG.DataFrame.fromColumns(cols);
359
+
360
+ for (let i = 0; i < dfLength; i++) {
361
+ const aar = tableValuesKeys[i];
362
+ // tableValues[aar].splice(0, 1);
363
+ table.rows.setValues(i, [aar, ...tableValues[aar]]);
364
+ }
365
+
366
+ // let groupMapping: { [key: string]: string } = {};
367
+
368
+ //TODO: enable grouping
369
+ // Object.keys(aarGroups).forEach((value) => groupMapping[value] = value);
370
+ this._substTableTooltipData = tableTooltips;
371
+ this._casesTable = tableCases;
372
+ this.substitutionTable = table;
373
+
374
+ return table;
115
375
  }
116
376
 
117
- static get modelName() {
118
- return PeptidesModel._modelName;
377
+ get substitutionTable() { return this._substitutionTable; }
378
+ set substitutionTable(table: DG.DataFrame) {
379
+ if (!table)
380
+ throw new Error(`Substitution table cannot be set to null`);
381
+ this._substitutionTable = table;
382
+ this._substitutionTableSubject.next(table);
119
383
  }
120
384
 
121
- static getOrInit(dataFrame: DG.DataFrame): PeptidesModel {
122
- dataFrame.temp[PeptidesModel.modelName] ??= new PeptidesModel(dataFrame);
123
- return dataFrame.temp[PeptidesModel.modelName];
385
+ joinDataFrames(df: DG.DataFrame, positionColumns: string[], splitSeqDf: DG.DataFrame) {
386
+ // append splitSeqDf columns to source table and make sure columns are not added more than once
387
+ const dfColsSet = new Set((df.columns as DG.ColumnList).names());
388
+ if (!positionColumns.every((col: string) => dfColsSet.has(col))) {
389
+ df.join(
390
+ splitSeqDf, [C.COLUMNS_NAMES.ACTIVITY], [C.COLUMNS_NAMES.ACTIVITY], (df.columns as DG.ColumnList).names(),
391
+ positionColumns, 'inner', true);
392
+ }
393
+ }
394
+
395
+ sortSourceGrid(sourceGrid: DG.Grid) {
396
+ if (sourceGrid) {
397
+ const colNames: DG.GridColumn[] = [];
398
+ for (let i = 1; i < sourceGrid.columns.length; i++)
399
+ colNames.push(sourceGrid.columns.byIndex(i)!);
400
+
401
+ colNames.sort((a, b)=>{
402
+ if (a.column!.semType == C.SEM_TYPES.AMINO_ACIDS) {
403
+ if (b.column!.semType == C.SEM_TYPES.AMINO_ACIDS)
404
+ return 0;
405
+ return -1;
406
+ }
407
+ if (b.column!.semType == C.SEM_TYPES.AMINO_ACIDS)
408
+ return 1;
409
+ return 0;
410
+ });
411
+ sourceGrid.columns.setOrder(colNames.map((v) => v.name));
412
+ }
413
+ }
414
+
415
+ async createScaledCol(
416
+ activityScaling: string, df: DG.DataFrame, sourceGrid: DG.Grid, splitSeqDf: DG.DataFrame,
417
+ ) {
418
+ const [scaledDf, newColName] = await PeptidesController.scaleActivity(
419
+ activityScaling, df, df.temp[C.COLUMNS_NAMES.ACTIVITY]);
420
+ //TODO: make another func
421
+ const scaledCol = scaledDf.getCol(C.COLUMNS_NAMES.ACTIVITY_SCALED);
422
+ (splitSeqDf.columns as DG.ColumnList).add(scaledCol);
423
+ const oldScaledCol = df.getCol(C.COLUMNS_NAMES.ACTIVITY_SCALED);
424
+ (df.columns as DG.ColumnList).replace(oldScaledCol, scaledCol);
425
+ const gridCol = sourceGrid.col(C.COLUMNS_NAMES.ACTIVITY_SCALED);
426
+ if (gridCol !== null) {
427
+ gridCol.name = newColName;
428
+ df.temp[C.COLUMNS_NAMES.ACTIVITY_SCALED] = newColName;
429
+ }
430
+
431
+ sourceGrid.columns.setOrder([newColName]);
432
+ }
433
+
434
+ async calculateStatistics(matrixDf: DG.DataFrame, peptidesCount: number, splitSeqDf: DG.DataFrame) {
435
+ matrixDf = matrixDf.groupBy([C.COLUMNS_NAMES.POSITION, C.COLUMNS_NAMES.AMINO_ACID_RESIDUE])
436
+ .add('count', C.COLUMNS_NAMES.ACTIVITY_SCALED, 'Count')
437
+ .aggregate();
438
+
439
+ const countThreshold = 4;
440
+ //@ts-ignore: never gets old
441
+ matrixDf.rows.filter((row) => row.Count >= countThreshold && row.Count <= peptidesCount - countThreshold);
442
+ matrixDf = matrixDf.clone(matrixDf.filter);
443
+
444
+ // calculate additional stats
445
+ await (matrixDf.columns as DG.ColumnList).addNewCalculated('Ratio', '${count}/'.concat(`${peptidesCount}`));
446
+
447
+ //calculate p-values based on t-test
448
+ let pvalues: Float32Array = new Float32Array(matrixDf.rowCount).fill(1);
449
+ const mdCol: DG.Column = (matrixDf.columns as DG.ColumnList).addNewFloat(C.COLUMNS_NAMES.MEAN_DIFFERENCE);
450
+ const pValCol: DG.Column = (matrixDf.columns as DG.ColumnList).addNewFloat(C.COLUMNS_NAMES.P_VALUE);
451
+ for (let i = 0; i < matrixDf.rowCount; i++) {
452
+ const position = matrixDf.get(C.COLUMNS_NAMES.POSITION, i);
453
+ const aar = matrixDf.get(C.COLUMNS_NAMES.AMINO_ACID_RESIDUE, i);
454
+
455
+ //@ts-ignore
456
+ splitSeqDf.rows.select((row) => row[position] === aar);
457
+ const currentActivity: number[] = splitSeqDf
458
+ .clone(splitSeqDf.selection, [C.COLUMNS_NAMES.ACTIVITY_SCALED])
459
+ .getCol(C.COLUMNS_NAMES.ACTIVITY_SCALED)
460
+ .toList();
461
+
462
+ //@ts-ignore
463
+ splitSeqDf.rows.select((row) => row[position] !== aar);
464
+ const otherActivity: number[] = splitSeqDf
465
+ .clone(splitSeqDf.selection, [C.COLUMNS_NAMES.ACTIVITY_SCALED])
466
+ .getCol(C.COLUMNS_NAMES.ACTIVITY_SCALED)
467
+ .toList();
468
+
469
+ const testResult = tTest(currentActivity, otherActivity);
470
+ // testResult = uTest(currentActivity, otherActivity);
471
+ const currentMeanDiff = testResult[C.COLUMNS_NAMES.MEAN_DIFFERENCE]!;
472
+ const pvalue = testResult[currentMeanDiff >= 0 ? 'p-value more' : 'p-value less'];
473
+
474
+ mdCol.set(i, currentMeanDiff);
475
+ pvalues[i] = pvalue;
476
+ }
477
+
478
+ pvalues = fdrcorrection(pvalues)[1];
479
+
480
+ for (let i = 0; i < pvalues.length; ++i)
481
+ pValCol.set(i, pvalues[i]);
482
+
483
+ return matrixDf.clone();
484
+ }
485
+
486
+ async setCategoryOrder(twoColorMode: boolean, statsDf: DG.DataFrame, matrixDf: DG.DataFrame) {
487
+ const absMD = 'Absolute Mean difference';
488
+ const sortArgument = twoColorMode ? absMD : C.COLUMNS_NAMES.MEAN_DIFFERENCE;
489
+ if (twoColorMode)
490
+ await (statsDf.columns as DG.ColumnList).addNewCalculated(absMD, 'Abs(${Mean difference})');
491
+
492
+ const aarWeightsDf = statsDf.groupBy([C.COLUMNS_NAMES.AMINO_ACID_RESIDUE]).sum(sortArgument, 'weight').aggregate();
493
+ const aarList = aarWeightsDf.getCol(C.COLUMNS_NAMES.AMINO_ACID_RESIDUE).toList();
494
+ const getWeight = (aar: string) => aarWeightsDf
495
+ .groupBy(['weight'])
496
+ .where(`${C.COLUMNS_NAMES.AMINO_ACID_RESIDUE} = ${aar}`)
497
+ .aggregate()
498
+ .get('weight', 0);
499
+ aarList.sort((first, second) => getWeight(second) - getWeight(first));
500
+
501
+ matrixDf.getCol(C.COLUMNS_NAMES.AMINO_ACID_RESIDUE).setCategoryOrder(aarList);
502
+ }
503
+
504
+ createVerticalTable(statsDf: DG.DataFrame, twoColorMode: boolean) {
505
+ // TODO: aquire ALL of the positions
506
+ const columns = [C.COLUMNS_NAMES.MEAN_DIFFERENCE, C.COLUMNS_NAMES.AMINO_ACID_RESIDUE, C.COLUMNS_NAMES.POSITION,
507
+ 'Count', 'Ratio', C.COLUMNS_NAMES.P_VALUE];
508
+ let sequenceDf = statsDf.groupBy(columns)
509
+ .where('pValue <= 0.1')
510
+ .aggregate();
511
+
512
+ let tempStats: DG.Stats;
513
+ const maxAtPos: {[index: string]: number} = {};
514
+ for (const pos of sequenceDf.getCol(C.COLUMNS_NAMES.POSITION).categories) {
515
+ tempStats = DG.Stats.fromColumn(
516
+ sequenceDf.getCol(C.COLUMNS_NAMES.MEAN_DIFFERENCE),
517
+ DG.BitSet.create(sequenceDf.rowCount, (i) => sequenceDf.get(C.COLUMNS_NAMES.POSITION, i) === pos),
518
+ );
519
+ maxAtPos[pos] = twoColorMode ?
520
+ (tempStats.max > Math.abs(tempStats.min) ? tempStats.max : tempStats.min) : tempStats.max;
521
+ }
522
+ sequenceDf = sequenceDf.clone(DG.BitSet.create(sequenceDf.rowCount, (i) =>
523
+ sequenceDf.get(C.COLUMNS_NAMES.MEAN_DIFFERENCE, i) === maxAtPos[sequenceDf.get(C.COLUMNS_NAMES.POSITION, i)]));
524
+
525
+ return sequenceDf;
526
+ }
527
+
528
+ createGrids(matrixDf: DG.DataFrame, positionColumns: string[], sequenceDf: DG.DataFrame) {
529
+ const sarGrid = matrixDf.plot.grid();
530
+ sarGrid.sort([C.COLUMNS_NAMES.AMINO_ACID_RESIDUE]);
531
+ sarGrid.columns.setOrder([C.COLUMNS_NAMES.AMINO_ACID_RESIDUE].concat(positionColumns as C.COLUMNS_NAMES[]));
532
+
533
+ const sarVGrid = sequenceDf.plot.grid();
534
+ sarVGrid.sort([C.COLUMNS_NAMES.POSITION]);
535
+ sarVGrid.col(C.COLUMNS_NAMES.P_VALUE)!.format = 'four digits after comma';
536
+ sarVGrid.col(C.COLUMNS_NAMES.P_VALUE)!.name = 'P-Value';
537
+
538
+ let tempCol = (matrixDf.columns as DG.ColumnList).byName(C.COLUMNS_NAMES.AMINO_ACID_RESIDUE);
539
+ if (tempCol)
540
+ PeptidesController.setAARRenderer(tempCol, sarGrid);
541
+
542
+ tempCol = (sequenceDf.columns as DG.ColumnList).byName(C.COLUMNS_NAMES.AMINO_ACID_RESIDUE);
543
+ if (tempCol)
544
+ PeptidesController.setAARRenderer(tempCol, sarGrid);
545
+
546
+ return [sarGrid, sarVGrid];
547
+ }
548
+
549
+ setCellRenderers(
550
+ renderColNames: string[], statsDf: DG.DataFrame, twoColorMode: boolean, sarGrid: DG.Grid, sarVGrid: DG.Grid,
551
+ isSubstitutionOn: boolean,
552
+ ) {
553
+ const mdCol = statsDf.getCol(C.COLUMNS_NAMES.MEAN_DIFFERENCE);
554
+ const cellRendererAction = (args: DG.GridCellRenderArgs) => {
555
+ const canvasContext = args.g;
556
+ const bound = args.bounds;
557
+ const cell = args.cell;
558
+ const tableColName = cell.tableColumn?.name;
559
+ const tableRowIndex = cell.tableRowIndex!;
560
+ const cellValue = cell.cell.value;
561
+ const midX = bound.x + bound.width / 2;
562
+ const midY = bound.y + bound.height / 2;
563
+
564
+ canvasContext.save();
565
+ canvasContext.beginPath();
566
+ canvasContext.rect(bound.x, bound.y, bound.width, bound.height);
567
+ canvasContext.clip();
568
+
569
+ if (cell.isRowHeader && cell.gridColumn.visible) {
570
+ cell.gridColumn.visible = false;
571
+ args.preventDefault();
572
+ return;
573
+ }
574
+
575
+ if (cell.isTableCell && tableColName && tableRowIndex !== null && renderColNames.indexOf(tableColName) !== -1) {
576
+ const gridTable = cell.grid.table;
577
+ const currentPosition = tableColName !== C.COLUMNS_NAMES.MEAN_DIFFERENCE ?
578
+ tableColName : gridTable.get(C.COLUMNS_NAMES.POSITION, tableRowIndex);
579
+ const currentAAR = gridTable.get(C.COLUMNS_NAMES.AMINO_ACID_RESIDUE, tableRowIndex);
580
+ if (currentAAR === 'Aib' && currentPosition === '02')
581
+ console.log('stop');
582
+
583
+ const queryAAR = `${C.COLUMNS_NAMES.AMINO_ACID_RESIDUE} = ${currentAAR}`;
584
+ if (cellValue) {
585
+ const query = `${queryAAR} and ${C.COLUMNS_NAMES.POSITION} = ${currentPosition}`;
586
+ const pVal: number = statsDf
587
+ .groupBy([C.COLUMNS_NAMES.P_VALUE])
588
+ .where(query)
589
+ .aggregate()
590
+ .get(C.COLUMNS_NAMES.P_VALUE, 0);
591
+
592
+ let coef: string;
593
+ const variant = cellValue < 0;
594
+ if (pVal < 0.01)
595
+ coef = variant && twoColorMode ? '#FF7900' : '#299617';
596
+ else if (pVal < 0.05)
597
+ coef = variant && twoColorMode ? '#FFA500' : '#32CD32';
598
+ else if (pVal < 0.1)
599
+ coef = variant && twoColorMode ? '#FBCEB1' : '#98FF98';
600
+ else
601
+ coef = DG.Color.toHtml(DG.Color.lightLightGray);
602
+
603
+
604
+ const chooseMin = () => twoColorMode ? 0 : mdCol.min;
605
+ const chooseMax = () => twoColorMode ? Math.max(Math.abs(mdCol.min), mdCol.max) : mdCol.max;
606
+ const chooseCurrent = () => twoColorMode ? Math.abs(cellValue) : cellValue;
607
+
608
+ const rCoef = (chooseCurrent() - chooseMin()) / (chooseMax() - chooseMin());
609
+
610
+ const maxRadius = 0.9 * (bound.width > bound.height ? bound.height : bound.width) / 2;
611
+ const radius = Math.floor(maxRadius * rCoef);
612
+
613
+ canvasContext.beginPath();
614
+ canvasContext.fillStyle = coef;
615
+ canvasContext.arc(midX, midY, radius < 3 ? 3 : radius, 0, Math.PI * 2, true);
616
+ canvasContext.closePath();
617
+
618
+ canvasContext.fill();
619
+ if (isSubstitutionOn) {
620
+ canvasContext.textBaseline = 'middle';
621
+ canvasContext.textAlign = 'center';
622
+ canvasContext.fillStyle = DG.Color.toHtml(DG.Color.black);
623
+ // DG.Color.getContrastColor()
624
+ canvasContext.font = '13px Roboto, Roboto Local, sans-serif';
625
+ const substValue = this.substitutionTable.groupBy([currentPosition])
626
+ .where(queryAAR)
627
+ .aggregate()
628
+ .get(currentPosition, 0);
629
+ if (substValue && substValue !== DG.INT_NULL)
630
+ canvasContext.fillText(substValue, midX, midY);
631
+ }
632
+ }
633
+ args.preventDefault();
634
+ }
635
+ canvasContext.restore();
636
+ };
637
+ sarGrid.onCellRender.subscribe(cellRendererAction);
638
+ sarVGrid.onCellRender.subscribe(cellRendererAction);
639
+ }
640
+
641
+ setTooltips(
642
+ renderColNames: string[], statsDf: DG.DataFrame, peptidesCount: number, sarGrid: DG.Grid, sarVGrid: DG.Grid,
643
+ sourceDf: DG.DataFrame,
644
+ ) {
645
+ const onCellTooltipAction = async (cell: DG.GridCell, x: number, y: number) => {
646
+ if (
647
+ !cell.isRowHeader && !cell.isColHeader && cell.tableColumn !== null && cell.cell.value !== null &&
648
+ cell.tableRowIndex !== null && renderColNames.indexOf(cell.tableColumn.name) !== -1) {
649
+ const tooltipMap: { [index: string]: string } = {};
650
+
651
+ for (const col of (statsDf.columns as DG.ColumnList).names()) {
652
+ if (col !== C.COLUMNS_NAMES.AMINO_ACID_RESIDUE && col !== C.COLUMNS_NAMES.POSITION) {
653
+ const currentPosition = cell.tableColumn.name !== C.COLUMNS_NAMES.MEAN_DIFFERENCE ?
654
+ cell.tableColumn.name : cell.grid.table.get(C.COLUMNS_NAMES.POSITION, cell.tableRowIndex);
655
+ const query =
656
+ `${C.COLUMNS_NAMES.AMINO_ACID_RESIDUE} = ` +
657
+ `${cell.grid.table.get(C.COLUMNS_NAMES.AMINO_ACID_RESIDUE, cell.tableRowIndex)} ` +
658
+ `and ${C.COLUMNS_NAMES.POSITION} = ${currentPosition}`;
659
+ const textNum = statsDf.groupBy([col]).where(query).aggregate().get(col, 0);
660
+ let text = `${col === 'Count' ? textNum : textNum.toFixed(5)}`;
661
+
662
+ if (col === 'Count')
663
+ text += ` / ${peptidesCount}`;
664
+ else if (col === C.COLUMNS_NAMES.P_VALUE)
665
+ text = parseFloat(text) !== 0 ? text : '<0.01';
666
+
667
+
668
+ tooltipMap[col === C.COLUMNS_NAMES.P_VALUE ? 'p-value' : col] = text;
669
+ }
670
+ }
671
+
672
+ ui.tooltip.show(ui.tableFromMap(tooltipMap), x, y);
673
+ }
674
+ if (!cell.isColHeader && cell.tableColumn?.name == C.COLUMNS_NAMES.AMINO_ACID_RESIDUE) {
675
+ // if (grouping) {
676
+ // const currentGroup = C.groupDescription[cell.cell.value];
677
+ // const divText = ui.divText('Amino Acids in this group: ' + currentGroup[C.SEM_TYPES.AMINO_ACIDS].join(', '));
678
+ // ui.tooltip.show(ui.divV([ui.h3(currentGroup['description']), divText]), x, y);
679
+ // } else {
680
+ const monomerLib = sourceDf.temp[MonomerLibrary.id];
681
+ ChemPalette.showTooltip(cell, x, y, monomerLib);
682
+ }
683
+ return true;
684
+ };
685
+ sarGrid.onCellTooltip(onCellTooltipAction);
686
+ sarVGrid.onCellTooltip(onCellTooltipAction);
687
+ }
688
+
689
+ setInteractionCallback() {
690
+ const sarDf = this._sarGrid.dataFrame;
691
+ const sarVDf = this._sarVGrid.dataFrame;
692
+
693
+ const getAARandPosition = (isVertical = false): [string, string] => {
694
+ let aar : string;
695
+ let position: string;
696
+ if (isVertical) {
697
+ const currentRowIdx = sarVDf.currentRowIdx;
698
+ aar = sarVDf.get(C.COLUMNS_NAMES.MEAN_DIFFERENCE, currentRowIdx);
699
+ position = sarVDf.get(C.COLUMNS_NAMES.POSITION, currentRowIdx);
700
+ } else {
701
+ aar = sarDf.get(C.COLUMNS_NAMES.AMINO_ACID_RESIDUE, sarDf.currentRowIdx);
702
+ position = sarDf.currentCol.name;
703
+ }
704
+ return [aar, position];
705
+ };
706
+
707
+ this._sarGrid.onCurrentCellChanged.subscribe((gc) => {
708
+ const isNegativeRowIndex = sarDf.currentRowIdx === -1;
709
+ if (!sarDf.currentCol || (!sarDf.currentCell.value && !isNegativeRowIndex))
710
+ return;
711
+ this.syncGrids(false, sarDf, sarVDf);
712
+ let aar: string = C.CATEGORIES.ALL;
713
+ let position: string = C.CATEGORIES.ALL;
714
+ if (!isNegativeRowIndex) {
715
+ [aar, position] = getAARandPosition();
716
+ this.dataFrame.tags[C.TAGS.AAR] = aar;
717
+ this.dataFrame.tags[C.TAGS.POSITION] = position;
718
+ } else {
719
+ this.dataFrame.tags[C.TAGS.AAR] = this.dataFrame.tags[C.TAGS.POSITION] = null;
720
+ }
721
+ this.dataFrame.temp['substTable'] = this.getSubstitutionTable();
722
+ this.modifyOrCreateSplitCol(aar, position);
723
+ this.fireBitsetChanged();
724
+ this.invalidateGrids();
725
+ grok.shell.o = this.dataFrame;
726
+ });
727
+
728
+ this._sarVGrid.onCurrentCellChanged.subscribe((gc) => {
729
+ if (!sarVDf.currentCol || sarVDf.currentRowIdx === -1)
730
+ return;
731
+ this.syncGrids(true, sarDf, sarVDf);
732
+ });
733
+ }
734
+
735
+ invalidateGrids() {
736
+ this.stackedBarchart?.computeData();
737
+ this._sarGrid.invalidate();
738
+ this._sarVGrid.invalidate();
739
+ this._sourceGrid?.invalidate();
740
+ //TODO: this.peptideSpaceGrid.invalidate();
741
+ }
742
+
743
+ setBitsetCallback() {
744
+ if (this.isBitsetChangedInitialized)
745
+ return;
746
+ const filter = this.dataFrame.filter;
747
+ const selection = this.dataFrame.selection;
748
+
749
+ const changeBitset = (currentBitset: DG.BitSet, previousBitset: DG.BitSet) => {
750
+ previousBitset.setAll(!this._filterMode, false);
751
+ currentBitset.init((i) => {
752
+ const currentCategory = this.splitCol.get(i);
753
+ return currentCategory !== C.CATEGORIES.OTHER && currentCategory !== C.CATEGORIES.ALL;
754
+ }, false);
755
+ };
756
+
757
+ const recalculateStatistics =
758
+ (bitset: DG.BitSet) => (this.dataFrame.temp[C.STATS] as FilteringStatistics).setMask(bitset);
759
+
760
+ filter.onChanged.subscribe(() => {
761
+ changeBitset(filter, selection);
762
+ recalculateStatistics(filter);
763
+ });
764
+ selection.onChanged.subscribe(() => {
765
+ changeBitset(selection, filter);
766
+ recalculateStatistics(selection);
767
+ });
768
+ this.isBitsetChangedInitialized = true;
769
+ }
770
+
771
+ fireBitsetChanged() {(this._filterMode ? this._dataFrame.filter : this._dataFrame.selection).fireChanged();}
772
+
773
+ postProcessGrids(sourceGrid: DG.Grid, invalidIndexes: number[], sarGrid: DG.Grid, sarVGrid: DG.Grid) {
774
+ sourceGrid.onCellPrepare((cell: DG.GridCell) => {
775
+ const currentRowIndex = cell.tableRowIndex;
776
+ if (currentRowIndex && invalidIndexes.includes(currentRowIndex) && !cell.isRowHeader)
777
+ cell.style.backColor = DG.Color.lightLightGray;
778
+ });
779
+
780
+ const mdCol: DG.GridColumn = sarVGrid.col(C.COLUMNS_NAMES.MEAN_DIFFERENCE)!;
781
+ mdCol.name = 'Diff';
782
+
783
+ for (const grid of [sarGrid, sarVGrid]) {
784
+ grid.props.rowHeight = 20;
785
+ grid.columns.rowHeader!.width = 20;
786
+ for (let i = 0; i < grid.columns.length; ++i) {
787
+ const col = grid.columns.byIndex(i)!;
788
+ if (grid == sarVGrid && col.name !== 'Diff' && col.name !== C.COLUMNS_NAMES.AMINO_ACID_RESIDUE)
789
+ col.width = 45;
790
+ else
791
+ col.width = grid.props.rowHeight;
792
+ }
793
+ }
794
+
795
+ // if (grouping) {
796
+ // sarGrid.col(C.COLUMNS_NAMES.AMINO_ACID_RESIDUE)!.name = 'Groups';
797
+ // sarVGrid.col(C.COLUMNS_NAMES.AMINO_ACID_RESIDUE)!.name = 'Groups';
798
+ // }
799
+
800
+ sarGrid.props.allowEdit = false;
801
+ sarVGrid.props.allowEdit = false;
802
+ }
803
+
804
+ split(peptideColumn: DG.Column, filter: boolean = true): string[][] {
805
+ const splitPeptidesArray: string[][] = [];
806
+ let currentSplitPeptide: string[];
807
+ let modeMonomerCount = 0;
808
+ let currentLength;
809
+ const colLength = peptideColumn.length;
810
+
811
+ // splitting data
812
+ const monomerLengths: { [index: string]: number } = {};
813
+ for (let i = 0; i < colLength; i++) {
814
+ currentSplitPeptide = peptideColumn.get(i).split('-').map((value: string) => value ? value : '-');
815
+ splitPeptidesArray.push(currentSplitPeptide);
816
+ currentLength = currentSplitPeptide.length;
817
+ monomerLengths[currentLength + ''] =
818
+ monomerLengths[currentLength + ''] ? monomerLengths[currentLength + ''] + 1 : 1;
819
+ }
820
+ //@ts-ignore: what I do here is converting string to number the most effective way I could find. parseInt is slow
821
+ modeMonomerCount = 1 * Object.keys(monomerLengths).reduce((a, b) => monomerLengths[a] > monomerLengths[b] ? a : b);
822
+
823
+ // making sure all of the sequences are of the same size
824
+ // and marking invalid sequences
825
+ let nTerminal: string;
826
+ const invalidIndexes: number[] = [];
827
+ let splitColumns: string[][] = Array.from({length: modeMonomerCount}, (_) => []);
828
+ modeMonomerCount--; // minus N-terminal
829
+ for (let i = 0; i < colLength; i++) {
830
+ currentSplitPeptide = splitPeptidesArray[i];
831
+ nTerminal = currentSplitPeptide.pop()!; // it is guaranteed that there will be at least one element
832
+ currentLength = currentSplitPeptide.length;
833
+ if (currentLength !== modeMonomerCount)
834
+ invalidIndexes.push(i);
835
+
836
+ for (let j = 0; j < modeMonomerCount; j++)
837
+ splitColumns[j].push(j < currentLength ? currentSplitPeptide[j] : '-');
838
+
839
+ splitColumns[modeMonomerCount].push(nTerminal);
840
+ }
841
+ modeMonomerCount--; // minus C-terminal
842
+
843
+ //create column names list
844
+ const columnNames = Array.from({length: modeMonomerCount}, (_, index) => `${index + 1 < 10 ? 0 : ''}${index + 1}`);
845
+ columnNames.splice(0, 0, 'N-terminal');
846
+ columnNames.push('C-terminal');
847
+
848
+ // filter out the columns with the same values
849
+ if (filter) {
850
+ splitColumns = splitColumns.filter((positionArray, index) => {
851
+ const isRetained = new Set(positionArray).size > 1;
852
+ if (!isRetained)
853
+ columnNames.splice(index, 1);
854
+
855
+ return isRetained;
856
+ });
857
+ }
858
+
859
+ return splitPeptidesArray;
860
+ }
861
+
862
+ getSubstitutionTable() {
863
+ if (!this._casesTable)
864
+ this.calcSubstitutions();
865
+ const sarDf = this._sarGrid.dataFrame;
866
+ const sourceDf = this._sourceGrid.dataFrame;
867
+ if (sarDf.currentRowIdx === -1)
868
+ return null;
869
+ const currentColName = sarDf.currentCol.name;
870
+ if (currentColName !== C.COLUMNS_NAMES.AMINO_ACID_RESIDUE) {
871
+ const col: DG.Column = sourceDf.columns.bySemType(C.SEM_TYPES.ALIGNED_SEQUENCE);
872
+ const aar = sarDf.get(C.COLUMNS_NAMES.AMINO_ACID_RESIDUE, sarDf.currentRowIdx);
873
+ const pos = parseInt(currentColName);
874
+ const substitutionsCount = this.substitutionTable.groupBy([currentColName])
875
+ .where(`${C.COLUMNS_NAMES.AMINO_ACID_RESIDUE} = ${aar}`)
876
+ .aggregate()
877
+ .get(currentColName, 0);
878
+ if (substitutionsCount === DG.INT_NULL)
879
+ return null;
880
+ const currentCase = this._casesTable[aar][pos];
881
+ const tempDfLength = currentCase.length;
882
+ const initCol = DG.Column.string('Initial', tempDfLength);
883
+ const subsCol = DG.Column.string('Substituted', tempDfLength);
884
+
885
+ const tempDf = DG.DataFrame.fromColumns([
886
+ initCol,
887
+ subsCol,
888
+ DG.Column.float('Difference', tempDfLength),
889
+ ]);
890
+
891
+ for (let i = 0; i < tempDfLength; i++) {
892
+ const row = currentCase[i];
893
+ tempDf.rows.setValues(i, [col.get(row[0]), col.get(row[1]), row[2]]);
894
+ }
895
+
896
+ // tempDf.temp['isReal'] = true;
897
+
898
+ initCol.semType = C.SEM_TYPES.ALIGNED_SEQUENCE;
899
+ initCol.temp['isAnalysisApplicable'] = false;
900
+ subsCol.semType = C.SEM_TYPES.ALIGNED_SEQUENCE;
901
+ subsCol.temp['isAnalysisApplicable'] = false;
902
+
903
+ // grok.shell.o = DG.SemanticValue.fromValueType(tempDf, 'Substitution');
904
+ return tempDf;
905
+ }
906
+ return null;
907
+ }
908
+
909
+ //TODO: refactor, use this.sarDf and accept aar & position as parameters
910
+ syncGrids(sourceVertical: boolean, sarDf: DG.DataFrame, sarVDf: DG.DataFrame) {
911
+ let otherColName: string;
912
+ let otherRowIndex: number;
913
+ const otherDf = sourceVertical ? sarDf : sarVDf;
914
+
915
+ if (otherDf.temp[C.FLAGS.CELL_CHANGING])
916
+ return;
917
+
918
+ //on vertical SAR viewer click
919
+ if (sourceVertical) {
920
+ const currentRowIdx = sarVDf.currentRowIdx;
921
+ const currentColName = sarVDf.currentCol.name;
922
+ if (currentColName !== C.COLUMNS_NAMES.MEAN_DIFFERENCE)
923
+ return;
924
+
925
+ otherColName = sarVDf.get(C.COLUMNS_NAMES.POSITION, currentRowIdx);
926
+ const otherRowName: string = sarVDf.get(C.COLUMNS_NAMES.AMINO_ACID_RESIDUE, currentRowIdx);
927
+ otherRowIndex = -1;
928
+ const rows = otherDf.rowCount;
929
+ for (let i = 0; i < rows; i++) {
930
+ if (otherDf.get(C.COLUMNS_NAMES.AMINO_ACID_RESIDUE, i) === otherRowName) {
931
+ otherRowIndex = i;
932
+ break;
933
+ }
934
+ }
935
+ //on SAR viewer click
936
+ } else {
937
+ otherColName = C.COLUMNS_NAMES.MEAN_DIFFERENCE;
938
+ const otherPos: string = sarDf.currentCol.name;
939
+ if (otherPos === C.COLUMNS_NAMES.AMINO_ACID_RESIDUE)
940
+ return;
941
+
942
+ const otherAAR: string =
943
+ sarDf.get(C.COLUMNS_NAMES.AMINO_ACID_RESIDUE, sarDf.currentRowIdx);
944
+ otherRowIndex = -1;
945
+ for (let i = 0; i < sarVDf.rowCount; i++) {
946
+ if (
947
+ sarVDf.get(C.COLUMNS_NAMES.AMINO_ACID_RESIDUE, i) === otherAAR &&
948
+ sarVDf.get(C.COLUMNS_NAMES.POSITION, i) === otherPos
949
+ ) {
950
+ otherRowIndex = i;
951
+ break;
952
+ }
953
+ }
954
+ }
955
+ otherDf.temp[C.FLAGS.CELL_CHANGING] = true;
956
+ otherDf.currentCell = otherDf.cell(otherRowIndex, otherColName);
957
+ otherDf.temp[C.FLAGS.CELL_CHANGING] = false;
958
+ }
959
+
960
+ getSplitColValueAt(index: number, aar: string, position: string, aarLabel: string): string {
961
+ const currentAAR = this.dataFrame.get(position, index) as string;
962
+ return currentAAR === aar ? aarLabel : C.CATEGORIES.OTHER;
963
+ }
964
+
965
+ modifyOrCreateSplitCol(aar: string, position: string): void {
966
+ const df = this.dataFrame;
967
+ this.splitCol = df.col(C.COLUMNS_NAMES.SPLIT_COL) ??
968
+ df.columns.addNew(C.COLUMNS_NAMES.SPLIT_COL, 'string') as DG.Column;
969
+
970
+ if (aar === C.CATEGORIES.ALL && position === C.CATEGORIES.ALL) {
971
+ this.splitCol.init(() => C.CATEGORIES.ALL);
972
+ return;
973
+ }
974
+
975
+ const aarLabel = `${aar === '-' ? 'Gap' : aar} : ${position}`;
976
+ this.splitCol.init((i) => this.getSplitColValueAt(i, aar, position, aarLabel));
977
+
978
+ // splitCol.init((i) => bitset.get(i) ? aarLabel : C.CATEGORY_OTHER);
979
+ this.splitCol.setCategoryOrder([aarLabel]);
980
+ this.splitCol.compact();
981
+
982
+ const colorMap: {[index: string]: string | number} = {};
983
+
984
+ colorMap[C.CATEGORIES.OTHER] = DG.Color.blue;
985
+ colorMap[aarLabel] = DG.Color.orange;
986
+ // colorMap[currentAAR] = cp.getColor(currentAAR);
987
+ df.getCol(C.COLUMNS_NAMES.SPLIT_COL).colors.setCategorical(colorMap);
124
988
  }
125
989
  }