@datagrok/peptides 1.19.1 → 1.21.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@datagrok/peptides",
3
3
  "friendlyName": "Peptides",
4
- "version": "1.19.1",
4
+ "version": "1.21.0",
5
5
  "author": {
6
6
  "name": "Davit Rizhinashvili",
7
7
  "email": "drizhinashvili@datagrok.ai"
@@ -14,12 +14,12 @@
14
14
  },
15
15
  "dependencies": {
16
16
  "@datagrok-libraries/bio": "^5.46.1",
17
- "@datagrok-libraries/math": "^1.2.3",
18
- "@datagrok-libraries/ml": "^6.7.5",
17
+ "@datagrok-libraries/math": "^1.2.4",
18
+ "@datagrok-libraries/ml": "^6.7.6",
19
19
  "@datagrok-libraries/statistics": "^1.2.12",
20
20
  "@datagrok-libraries/tutorials": "^1.4.2",
21
- "@datagrok-libraries/utils": "^4.3.10",
22
- "datagrok-api": "^1.22.0",
21
+ "@datagrok-libraries/utils": "^4.4.0",
22
+ "datagrok-api": "^1.23.0",
23
23
  "@webgpu/types": "^0.1.40",
24
24
  "cash-dom": "^8.1.5",
25
25
  "file-loader": "^6.2.0",
@@ -30,10 +30,11 @@
30
30
  "devDependencies": {
31
31
  "@datagrok-libraries/helm-web-editor": "^1.1.7",
32
32
  "@datagrok-libraries/js-draw-lite": "^0.0.5",
33
- "@datagrok/bio": "^2.16.9",
34
- "@datagrok/chem": "^1.12.0",
33
+ "@datagrok/eda": "^1.3.1",
34
+ "@datagrok/bio": "^2.18.0",
35
+ "@datagrok/chem": "^1.13.0",
35
36
  "@datagrok/dendrogram": "^1.2.33",
36
- "@datagrok/helm": "^2.5.9",
37
+ "@datagrok/helm": "^2.7.0",
37
38
  "@types/uuid": "^10.0.0",
38
39
  "@types/wu": "^2.1.44",
39
40
  "@typescript-eslint/eslint-plugin": "^8.8.1",
@@ -79,4 +80,4 @@
79
80
  "All users"
80
81
  ],
81
82
  "category": "Bioinformatics"
82
- }
83
+ }
package/src/model.ts CHANGED
@@ -51,11 +51,12 @@ import {DimReductionMethods} from '@datagrok-libraries/ml/src/multi-column-dimen
51
51
  import {AggregationColumns, MonomerPositionStats} from './utils/statistics';
52
52
  import {splitAlignedSequences} from '@datagrok-libraries/bio/src/utils/splitter';
53
53
  import {getDbscanWorker} from '@datagrok-libraries/math';
54
- import {markovCluster} from '@datagrok-libraries/ml/src/MCL/clustering-view';
55
54
  import {DistanceAggregationMethods} from '@datagrok-libraries/ml/src/distance-matrix/types';
56
55
  import {ClusterMaxActivityViewer, IClusterMaxActivity} from './viewers/cluster-max-activity-viewer';
57
- import {MCL_OPTIONS_TAG, MCLSerializableOptions} from '@datagrok-libraries/ml/src/MCL';
56
+ import {MCLSerializableOptions} from '@datagrok-libraries/ml/src/MCL';
58
57
  import {PeptideUtils} from './peptideUtils';
58
+ import {getGPUAdapterDescription} from '@datagrok-libraries/math/src/webGPU/getGPUDevice';
59
+ import {MCLViewer} from '@datagrok-libraries/ml/src/MCL/mcl-viewer';
59
60
 
60
61
  export enum VIEWER_TYPE {
61
62
  SEQUENCE_VARIABILITY_MAP = 'Sequence Variability Map',
@@ -63,6 +64,7 @@ export enum VIEWER_TYPE {
63
64
  LOGO_SUMMARY_TABLE = 'Logo Summary Table',
64
65
  DENDROGRAM = 'Dendrogram',
65
66
  CLUSTER_MAX_ACTIVITY = 'Active peptide selection',
67
+ MCL = 'MCL',
66
68
  }
67
69
 
68
70
  export type CachedWebLogoTooltip = { bar: string, tooltip: HTMLDivElement | null };
@@ -106,7 +108,7 @@ export class PeptidesModel {
106
108
  // sequence space viewer
107
109
  _sequenceSpaceViewer: DG.ScatterPlotViewer | null = null;
108
110
  //MCL viewer
109
- _mclViewer: DG.ScatterPlotViewer | null = null;
111
+ _mclViewer: MCLViewer | null = null;
110
112
  /**
111
113
  * @param {DG.DataFrame}dataFrame - DataFrame to use for analysis
112
114
  */
@@ -143,7 +145,7 @@ export class PeptidesModel {
143
145
  */
144
146
  get analysisView(): DG.TableView {
145
147
  if (this._analysisView === undefined) {
146
- this._analysisView = wu(grok.shell.tableViews).find(({dataFrame}) => dataFrame?.getTag(DG.TAGS.ID) === this.id);
148
+ this._analysisView = this.id ? wu(grok.shell.tableViews).find(({dataFrame}) => dataFrame?.getTag(DG.TAGS.ID) === this.id) : undefined;
147
149
  if (typeof this._analysisView === 'undefined')
148
150
  this._analysisView = grok.shell.addTableView(this.df);
149
151
  }
@@ -166,11 +168,13 @@ export class PeptidesModel {
166
168
  */
167
169
  get settings(): type.PeptidesSettings | null {
168
170
  const settingsStr = this.df.getTag(C.TAGS.SETTINGS);
169
- if (settingsStr == null)
171
+ if (!settingsStr)
170
172
  return null;
171
173
 
172
174
 
173
175
  this._settings ??= JSON.parse(settingsStr);
176
+ if (this._settings?.mclSettings && !(this._settings.mclSettings.webGPUDescriptionPromise instanceof Promise))
177
+ this._settings.mclSettings.webGPUDescriptionPromise = getGPUAdapterDescription();
174
178
  return this._settings!;
175
179
  }
176
180
 
@@ -358,7 +362,14 @@ export class PeptidesModel {
358
362
  dataFrame.getCol(C.COLUMNS_NAMES.ACTIVITY_SCALED).name = C.COLUMNS_NAMES.ACTIVITY;
359
363
 
360
364
 
361
- dataFrame.temp[PeptidesModel.modelName] ??= new PeptidesModel(dataFrame);
365
+ //dataFrame.temp[PeptidesModel.modelName] ??= new PeptidesModel(dataFrame);
366
+ if (!dataFrame.temp[PeptidesModel.modelName]) {
367
+ const model = dataFrame.temp[PeptidesModel.modelName] = new PeptidesModel(dataFrame);
368
+ const settings = model.settings;
369
+ // this is important bit. settings are written by startAnalysis function or other viewers, but separate viewers will not init the peptides model
370
+ if (settings)
371
+ model.init(settings);
372
+ }
362
373
  return dataFrame.temp[PeptidesModel.modelName] as PeptidesModel;
363
374
  }
364
375
 
@@ -690,20 +701,23 @@ export class PeptidesModel {
690
701
  const cols = this.df.columns;
691
702
  const splitSeqDf = splitAlignedSequences(this.df.getCol(this.settings!.sequenceColumnName), PeptideUtils.getSeqHelper());
692
703
  const positionColumns = splitSeqDf.columns.names();
693
- for (const colName of positionColumns) {
694
- let col = this.df.col(colName);
695
- const newCol = splitSeqDf.getCol(colName);
696
- if (col !== null)
697
- cols.remove(colName);
698
-
699
-
700
- const newColCat = newCol.categories;
701
- const newColData = newCol.getRawData();
702
- col = cols.addNew(newCol.name, newCol.type).init((i) => newColCat[newColData[i]]);
703
- col.setTag(C.TAGS.ANALYSIS_COL, `${true}`);
704
- col.setTag(C.TAGS.POSITION_COL, `${true}`);
705
- CR.setMonomerRenderer(col, this.alphabet);
704
+ if (positionColumns.every((colName) => cols.contains(colName)))
705
+ positionColumns.forEach((colName) => CR.setMonomerRenderer(this.df.col(colName)!, this.alphabet));
706
+ else {
707
+ for (const colName of positionColumns) {
708
+ let col = this.df.col(colName);
709
+ const newCol = splitSeqDf.getCol(colName);
710
+ if (col !== null)
711
+ cols.remove(colName);
712
+ const newColCat = newCol.categories;
713
+ const newColData = newCol.getRawData();
714
+ col = cols.addNew(newCol.name, newCol.type).init((i) => newColCat[newColData[i]]);
715
+ col.setTag(C.TAGS.ANALYSIS_COL, `${true}`);
716
+ col.setTag(C.TAGS.POSITION_COL, `${true}`);
717
+ CR.setMonomerRenderer(col, this.alphabet);
718
+ }
706
719
  }
720
+
707
721
  this.df.name = name;
708
722
  }
709
723
 
@@ -1133,7 +1147,7 @@ export class PeptidesModel {
1133
1147
  activityColumnName: this.settings!.activityColumnName,
1134
1148
  clusterColumnName: potentialClusterCol ?? wu(this.df.columns.categorical).next().value?.name,
1135
1149
  activityTarget: C.ACTIVITY_TARGET.HIGH,
1136
- connectivityColumnName: this._mclCols.find((colName) => colName.toLowerCase().startsWith('connectivity')),
1150
+ connectivityColumnName: this._mclCols.find((colName) => colName.toLowerCase().startsWith('connectivity')) ?? this.df.columns.names().find((colName) => colName.toLowerCase().includes('connectivity') && this.df.col(colName)?.isNumerical) ?? '',
1137
1151
  clusterSizeThreshold: 20,
1138
1152
  activityThreshold: 1000,
1139
1153
  };
@@ -1296,51 +1310,44 @@ export class PeptidesModel {
1296
1310
  }
1297
1311
  });
1298
1312
 
1299
- const bioPreprocessingFunc = DG.Func.find({package: 'Bio', name: 'macromoleculePreprocessingFunction'})[0];
1300
- const mclViewer = await markovCluster(
1301
- this.df, [seqCol], [mclParams!.distanceF], [1],
1302
- DistanceAggregationMethods.MANHATTAN, [bioPreprocessingFunc], [{
1313
+ const serializedOptions: string = JSON.stringify({
1314
+ cols: [seqCol].map((col) => col.name),
1315
+ metrics: [mclParams!.distanceF],
1316
+ weights: [1],
1317
+ aggregationMethod: DistanceAggregationMethods.MANHATTAN,
1318
+ preprocessingFuncs: ['macromoleculePreprocessingFunction'],
1319
+ preprocessingFuncArgs: [{
1303
1320
  gapOpen: mclParams!.gapOpen, gapExtend: mclParams!.gapExtend,
1304
1321
  fingerprintType: mclParams!.fingerprintType,
1305
1322
  }],
1306
- mclParams!.threshold, mclParams!.maxIterations, mclParams.useWebGPU,
1307
- mclParams!.inflation, mclParams.minClusterSize,
1308
- );
1309
- mclAdditionSub.unsubscribe();
1310
-
1311
- // find logo summery viewer and make it rerender
1312
- const lstViewer = this.findViewer(VIEWER_TYPE.LOGO_SUMMARY_TABLE) as LogoSummaryTable | null;
1313
- if (lstViewer) { // beware, this is accessing private things
1314
- lstViewer._clusterStats = null;
1315
- lstViewer._clusterSelection = null;
1316
- lstViewer._viewerGrid = null;
1317
- lstViewer._logoSummaryTable = null;
1318
- lstViewer.render();
1319
- }
1320
-
1321
- if (mclViewer?.sc) {
1322
- const serializedOptions: string = JSON.stringify({
1323
- cols: [seqCol].map((col) => col.name),
1324
- metrics: [mclParams!.distanceF],
1325
- weights: [1],
1326
- aggregationMethod: DistanceAggregationMethods.MANHATTAN,
1327
- preprocessingFuncs: [bioPreprocessingFunc].map((func) => func?.name ?? null),
1328
- preprocessingFuncArgs: [{
1329
- gapOpen: mclParams!.gapOpen, gapExtend: mclParams!.gapExtend,
1330
- fingerprintType: mclParams!.fingerprintType,
1331
- }],
1332
- threshold: mclParams!.threshold,
1333
- maxIterations: mclParams!.maxIterations,
1334
- useWebGPU: mclParams.useWebGPU,
1335
- inflate: mclParams!.inflation,
1336
- minClusterSize: mclParams.minClusterSize,
1337
- } satisfies MCLSerializableOptions);
1338
- this.df.setTag(MCL_OPTIONS_TAG, serializedOptions);
1339
-
1340
-
1341
- //@ts-ignore
1342
- mclViewer.sc.props['initializationFunction'] = 'EDA:MCLInitializationFunction';
1343
- this._mclViewer = mclViewer?.sc ?? null;
1323
+ threshold: mclParams!.threshold,
1324
+ maxIterations: mclParams!.maxIterations,
1325
+ useWebGPU: mclParams.useWebGPU,
1326
+ inflate: mclParams!.inflation,
1327
+ minClusterSize: mclParams.minClusterSize,
1328
+ } satisfies MCLSerializableOptions);
1329
+
1330
+ const tv = grok.shell.getTableView(this.df.name);
1331
+ if (tv) {
1332
+ const func = DG.Func.find({package: 'EDA', name: 'markovClusteringViewer'})[0];
1333
+ if (!func)
1334
+ throw new Error('Markov clustering function is not found');
1335
+ // make sure eda is loaded
1336
+ await func.apply();
1337
+ tv.addViewer(VIEWER_TYPE.MCL, {mclProps: serializedOptions}) as MCLViewer;
1338
+ //tv.addViewer(VIEWER_TYPE.MCL, {mclProps: serializedOptions});
1339
+ // the addviewer method goes through dart, so it returns JSViewer instead of MCLViewer, so also need to wait a bit
1340
+ await DG.delay(500);
1341
+ this._mclViewer = this.findViewer(VIEWER_TYPE.MCL) as MCLViewer;
1342
+ await this._mclViewer.initPromise;
1343
+ const lstViewer = this.findViewer(VIEWER_TYPE.LOGO_SUMMARY_TABLE) as LogoSummaryTable | null;
1344
+ if (lstViewer) { // beware, this is accessing private things
1345
+ lstViewer._clusterStats = null;
1346
+ lstViewer._clusterSelection = null;
1347
+ lstViewer._viewerGrid = null;
1348
+ lstViewer._logoSummaryTable = null;
1349
+ lstViewer.render();
1350
+ }
1344
1351
  }
1345
1352
  }
1346
1353
 
@@ -112,7 +112,7 @@ export class ClusterMaxActivityViewer extends DG.JsViewer implements IClusterMax
112
112
  return null;
113
113
  }
114
114
  const clusterSizeCol = this.dataFrame.columns.getOrCreate(ClusterMaxActivityViewer.clusterSizeColName,
115
- DG.TYPE.INT, this.dataFrame.rowCount);
115
+ DG.TYPE.INT);
116
116
  const clusterSizeMap: {[key: number | string]: number} = {};
117
117
  for (let i = 0; i < this.dataFrame.rowCount; i++) {
118
118
  const cluster: string | number = clusterCol.get(i);
@@ -156,7 +156,7 @@ export class ClusterMaxActivityViewer extends DG.JsViewer implements IClusterMax
156
156
  }
157
157
 
158
158
  const maxAtivityInClusterSizeCol = this.dataFrame.columns.getOrCreate(
159
- ClusterMaxActivityViewer.maxActivityInClusterColName, DG.COLUMN_TYPE.INT, this.dataFrame.rowCount);
159
+ ClusterMaxActivityViewer.maxActivityInClusterColName, DG.COLUMN_TYPE.INT);
160
160
  maxAtivityInClusterSizeCol.init((i) => {
161
161
  if (clusterCol.isNone(i))
162
162
  return 0;
@@ -164,7 +164,7 @@ export class ClusterMaxActivityViewer extends DG.JsViewer implements IClusterMax
164
164
  });
165
165
 
166
166
  const maxConnectivityInClusterSizeCol = this.dataFrame.columns.getOrCreate(
167
- ClusterMaxActivityViewer.maxConnectivityInClusterColName, DG.COLUMN_TYPE.INT, this.dataFrame.rowCount);
167
+ ClusterMaxActivityViewer.maxConnectivityInClusterColName, DG.COLUMN_TYPE.INT);
168
168
  maxConnectivityInClusterSizeCol.init((i) => {
169
169
  if (clusterCol.isNone(i))
170
170
  return 0;
@@ -172,7 +172,7 @@ export class ClusterMaxActivityViewer extends DG.JsViewer implements IClusterMax
172
172
  });
173
173
 
174
174
  const synSelectionCol = this.dataFrame.columns.getOrCreate(
175
- ClusterMaxActivityViewer.synSelectionColName, DG.TYPE.STRING, this.dataFrame.rowCount);
175
+ ClusterMaxActivityViewer.synSelectionColName, DG.TYPE.STRING);
176
176
 
177
177
  synSelectionCol.init((i) => {
178
178
  if (clusterCol.isNone(i))
@@ -243,8 +243,10 @@ function cliffsPairsWidgetParts(table: DG.DataFrame, options: MutationCliffsOpti
243
243
  pairsGrid.root.style.setProperty('width', '100%');
244
244
  uniqueSequencesGrid.root.style.removeProperty('width');
245
245
  uniqueSequencesGrid.root.style.setProperty('width', '100%');
246
- pairsGrid.root.style.minHeight = '250px';
247
- uniqueSequencesGrid.root.style.minHeight = '250px';
246
+ pairsGrid.root.style.minHeight = '200px';
247
+ uniqueSequencesGrid.root.style.minHeight = '200px';
248
+ pairsGrid.root.style.height = 'unset';
249
+ uniqueSequencesGrid.root.style.height = 'unset';
248
250
  }, 200);
249
251
 
250
252
  return {pairsGrid, uniqueSequencesGrid, aminoToInput};
@@ -263,20 +263,28 @@ export async function startAnalysis(activityColumn: DG.Column<number>, peptidesC
263
263
  const progress = DG.TaskBarProgressIndicator.create('Loading SAR...');
264
264
 
265
265
  // Prepare new DF
266
- const newDf = DG.DataFrame.create(sourceDf.rowCount);
267
- newDf.name = 'Peptides analysis';
268
- const newDfCols = newDf.columns;
269
- newDfCols.add(scaledCol);
270
- for (const col of sourceDf.columns) {
271
- if (col.getTag(C.TAGS.ANALYSIS_COL) !== `${true}`) {
272
- if (col.name.toLowerCase() === scaledCol.name.toLowerCase())
273
- col.name = sourceDf.columns.getUnusedName(col.name);
274
-
275
-
276
- newDfCols.add(col);
277
- }
278
- }
279
-
266
+ // const newDf = DG.DataFrame.create(sourceDf.rowCount);
267
+ // newDf.name = 'Peptides analysis';
268
+ // const newDfCols = newDf.columns;
269
+ // newDfCols.add(scaledCol);
270
+ // for (const col of sourceDf.columns) {
271
+ // if (col.getTag(C.TAGS.ANALYSIS_COL) !== `${true}`) {
272
+ // if (col.name.toLowerCase() === scaledCol.name.toLowerCase())
273
+ // col.name = sourceDf.columns.getUnusedName(col.name);
274
+ // newDfCols.add(col);
275
+ // }
276
+ // }
277
+
278
+ //make sure the data sync is turned off for the dataframe:
279
+ sourceDf.tags.delete && sourceDf.tags.delete('.script');
280
+
281
+ const sourceCols = sourceDf.columns;
282
+ const oldActivityCol = sourceDf.col(scaledCol.name);
283
+ if (oldActivityCol)
284
+ oldActivityCol.name = sourceCols.getUnusedName(oldActivityCol.name);
285
+ const scaleColRawData = scaledCol.getRawData();
286
+ sourceDf.columns.addNew(scaledCol.name, scaledCol.type).init((i) => scaleColRawData[i]);
287
+ sourceCols.setOrder([scaledCol.name, peptidesCol.name, ...sourceCols.names().filter((name) => name !== peptidesCol.name && name !== scaledCol.name)]);
280
288
  const settings: type.PeptidesSettings = {
281
289
  sequenceColumnName: peptidesCol.name, activityColumnName: activityColumn.name, activityScaling: scaling,
282
290
  columns: {}, showDendrogram: false, showSequenceSpace: false,
@@ -285,18 +293,18 @@ export async function startAnalysis(activityColumn: DG.Column<number>, peptidesC
285
293
  };
286
294
 
287
295
  if (clustersColumn) {
288
- const clusterCol = newDf.getCol(clustersColumn.name);
296
+ const clusterCol = sourceDf.getCol(clustersColumn.name);
289
297
  if (clusterCol.type !== DG.COLUMN_TYPE.STRING)
290
- newDfCols.replace(clusterCol, clusterCol.convertTo(DG.COLUMN_TYPE.STRING));
298
+ sourceCols.replace(clusterCol, clusterCol.convertTo(DG.COLUMN_TYPE.STRING));
291
299
  }
292
- newDf.setTag(C.TAGS.SETTINGS, JSON.stringify(settings));
300
+ sourceDf.setTag(C.TAGS.SETTINGS, JSON.stringify(settings));
293
301
 
294
- const bitset = DG.BitSet.create(sourceDf.rowCount,
295
- (i) => !activityColumn.isNone(i) && !peptidesCol.isNone(i) && sourceDf.filter.get(i));
302
+ // const bitset = DG.BitSet.create(sourceDf.rowCount,
303
+ // (i) => !activityColumn.isNone(i) && !peptidesCol.isNone(i) && sourceDf.filter.get(i));
296
304
 
297
305
  // Cloning dataframe with applied filter. If filter is not applied, cloning is
298
306
  // needed anyway to allow filtering on the original dataframe
299
- model = PeptidesModel.getInstance(newDf.clone(bitset));
307
+ model = PeptidesModel.getInstance(sourceDf);
300
308
  model.init(settings);
301
309
  if (clustersColumn) {
302
310
  const lstProps: ILogoSummaryTable = {