@datagrok/peptides 1.17.0 → 1.17.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. package/.eslintrc.json +17 -6
  2. package/dist/196.js +2 -0
  3. package/dist/23.js +2 -0
  4. package/dist/282.js +2 -0
  5. package/dist/361.js +2 -2
  6. package/dist/40.js +2 -0
  7. package/dist/436.js +2 -2
  8. package/dist/65.js +2 -0
  9. package/dist/704.js +2 -0
  10. package/dist/package-test.js +2 -3
  11. package/dist/package.js +2 -3
  12. package/package.json +14 -14
  13. package/src/demo/fasta.ts +8 -2
  14. package/src/model.ts +780 -531
  15. package/src/package-test.ts +1 -3
  16. package/src/package.ts +15 -28
  17. package/src/tests/benchmarks.ts +31 -11
  18. package/src/tests/core.ts +11 -6
  19. package/src/tests/misc.ts +6 -6
  20. package/src/tests/model.ts +79 -44
  21. package/src/tests/table-view.ts +48 -38
  22. package/src/tests/utils.ts +0 -76
  23. package/src/tests/viewers.ts +30 -12
  24. package/src/tests/widgets.ts +30 -11
  25. package/src/utils/algorithms.ts +115 -38
  26. package/src/utils/cell-renderer.ts +181 -72
  27. package/src/utils/constants.ts +33 -7
  28. package/src/utils/misc.ts +244 -10
  29. package/src/utils/parallel-mutation-cliffs.ts +18 -15
  30. package/src/utils/statistics.ts +70 -15
  31. package/src/utils/tooltips.ts +42 -17
  32. package/src/utils/types.ts +29 -26
  33. package/src/utils/worker-creator.ts +5 -0
  34. package/src/viewers/logo-summary.ts +591 -130
  35. package/src/viewers/sar-viewer.ts +893 -239
  36. package/src/widgets/distribution.ts +305 -64
  37. package/src/widgets/manual-alignment.ts +18 -11
  38. package/src/widgets/mutation-cliffs.ts +44 -18
  39. package/src/widgets/peptides.ts +86 -91
  40. package/src/widgets/selection.ts +56 -22
  41. package/src/widgets/settings.ts +94 -44
  42. package/src/workers/dimensionality-reducer.ts +5 -6
  43. package/src/workers/mutation-cliffs-worker.ts +3 -16
  44. package/dist/209.js +0 -2
  45. package/dist/381.js +0 -2
  46. package/dist/770.js +0 -2
  47. package/dist/831.js +0 -2
  48. package/dist/868.js +0 -2
  49. package/dist/931.js +0 -3
  50. package/dist/931.js.LICENSE.txt +0 -51
  51. package/dist/932.js +0 -2
  52. package/dist/package-test.js.LICENSE.txt +0 -51
  53. package/dist/package.js.LICENSE.txt +0 -51
  54. package/src/tests/peptide-space-test.ts +0 -48
  55. package/src/tests/test-data.ts +0 -649
  56. package/src/utils/molecular-measure.ts +0 -174
  57. package/src/utils/peptide-similarity-space.ts +0 -216
  58. package/src/viewers/peptide-space-viewer.ts +0 -150
@@ -2,8 +2,6 @@ import * as grok from 'datagrok-api/grok';
2
2
  import * as ui from 'datagrok-api/ui';
3
3
  import * as DG from 'datagrok-api/dg';
4
4
 
5
- import * as uuid from 'uuid';
6
-
7
5
  import '../styles.css';
8
6
  import * as C from '../utils/constants';
9
7
  import * as type from '../utils/types';
@@ -11,28 +9,31 @@ import {PeptidesModel} from '../model';
11
9
  import $ from 'cash-dom';
12
10
  import {scaleActivity} from '../utils/misc';
13
11
  import {ALIGNMENT, NOTATION, TAGS as bioTAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
12
+ import {ILogoSummaryTable} from '../viewers/logo-summary';
13
+
14
+ export type DialogParameters = { host: HTMLElement, callback: () => Promise<boolean> };
14
15
 
15
- /** Peptide analysis widget.
16
- * @param {DG.DataFrame} df Working table
17
- * @param {DG.Column} col Aligned sequence column
18
- * @return {Promise<DG.Widget>} Widget containing peptide analysis */
19
- export function analyzePeptidesUI(df: DG.DataFrame, col?: DG.Column<string>): {host: HTMLElement, callback: () => Promise<boolean>} {
16
+ /**
17
+ * Peptides analysis parameters UI
18
+ * @param df - Dataframe with peptides
19
+ * @param [col] - Peptides column
20
+ * @return - UI host and analysis start callback
21
+ */
22
+ export function analyzePeptidesUI(df: DG.DataFrame, col?: DG.Column<string>): DialogParameters {
20
23
  const logoHost = ui.div();
21
24
  let seqColInput: DG.InputBase | null = null;
22
25
  if (typeof col === 'undefined') {
23
- const sequenceColumns = df.columns.toList().filter((dfCol) => dfCol.semType === DG.SEMTYPE.MACROMOLECULE);
24
- const potentialCol = DG.Utils.firstOrNull(sequenceColumns);
26
+ // Building UI for starting analysis from dialog (top menu)
27
+ const potentialCol = DG.Utils.firstOrNull(
28
+ df.columns.toList().filter((dfCol) => dfCol.semType === DG.SEMTYPE.MACROMOLECULE));
25
29
  if (potentialCol === null)
26
30
  throw new Error('Peptides Error: table doesn\'t contain sequence columns');
27
31
  else if (potentialCol.stats.missingValueCount !== 0)
28
32
  grok.shell.info('Sequences column contains missing values. They will be ignored during analysis');
29
33
 
34
+
30
35
  seqColInput = ui.columnInput('Sequence', df, potentialCol, () => {
31
36
  const seqCol = seqColInput!.value;
32
- if (!(seqCol.getTag(DG.TAGS.SEMTYPE) === DG.SEMTYPE.MACROMOLECULE)) {
33
- grok.shell.warning('Peptides analysis only works with macromolecules');
34
- seqColInput!.value = potentialCol;
35
- }
36
37
  $(logoHost).empty().append(ui.wait(async () => {
37
38
  const viewer = await df.plot.fromType('WebLogo', {sequenceColumnName: seqCol.name});
38
39
  viewer.root.style.setProperty('height', '130px');
@@ -66,6 +67,7 @@ export function analyzePeptidesUI(df: DG.DataFrame, col?: DG.Column<string>): {h
66
67
  };
67
68
  }
68
69
 
70
+ // Activity column properties
69
71
  let scaledCol: DG.Column<number>;
70
72
  const defaultActivityColumn: DG.Column<number> | null = df.col('activity') || df.col('IC50') ||
71
73
  DG.Utils.firstOrNull(df.columns.numerical);
@@ -77,13 +79,8 @@ export function analyzePeptidesUI(df: DG.DataFrame, col?: DG.Column<string>): {h
77
79
  scaledCol = scaleActivity(activityColumnChoice.value!, currentMethod);
78
80
 
79
81
  const hist = DG.DataFrame.fromColumns([scaledCol]).plot.histogram({
80
- filteringEnabled: false,
81
- valueColumnName: C.COLUMNS_NAMES.ACTIVITY,
82
- legendVisibility: 'Never',
83
- showXAxis: true,
84
- showColumnSelector: false,
85
- showRangeSlider: false,
86
- showBinSelector: false,
82
+ filteringEnabled: false, valueColumnName: C.COLUMNS_NAMES.ACTIVITY, legendVisibility: 'Never', showXAxis: true,
83
+ showColumnSelector: false, showRangeSlider: false, showBinSelector: false,
87
84
  });
88
85
  histogramHost.lastChild?.remove();
89
86
  histogramHost.appendChild(hist.root);
@@ -96,7 +93,6 @@ export function analyzePeptidesUI(df: DG.DataFrame, col?: DG.Column<string>): {h
96
93
  if (activityColumnChoice.value!.stats.missingValueCount !== 0)
97
94
  grok.shell.info('Activity column contains missing values. They will be ignored during analysis');
98
95
  };
99
- //TODO: add when new version of datagrok-api is available
100
96
  const activityColumnChoice = ui.columnInput('Activity', df, defaultActivityColumn, activityScalingMethodState,
101
97
  {filter: (col: DG.Column) => col.type === DG.TYPE.INT || col.type === DG.TYPE.FLOAT});
102
98
  activityColumnChoice.setTooltip('Numerical activity column');
@@ -106,15 +102,11 @@ export function analyzePeptidesUI(df: DG.DataFrame, col?: DG.Column<string>): {h
106
102
  activityColumnChoice.fireChanged();
107
103
  activityScalingMethod.fireChanged();
108
104
 
109
- const targetColumnChoice = ui.columnInput('Target', df, null, null, {filter: (col: DG.Column) => col.type === DG.TYPE.STRING});
110
- targetColumnChoice.setTooltip('Optional. Target represents a unique binding construct for every peptide in the data. ' +
111
- 'Target can be used to split mutation cliff analysis for peptides specific to a certain set of targets');
112
- targetColumnChoice.nullable = true;
113
-
114
- const inputsList = [activityColumnChoice, activityScalingMethod, clustersColumnChoice, targetColumnChoice];
105
+ const inputsList = [activityColumnChoice, activityScalingMethod, clustersColumnChoice];
115
106
  if (seqColInput !== null)
116
107
  inputsList.splice(0, 0, seqColInput);
117
108
 
109
+
118
110
  const bitsetChanged = df.filter.onChanged.subscribe(() => activityScalingMethodState());
119
111
 
120
112
  const startAnalysisCallback = async (): Promise<boolean> => {
@@ -122,7 +114,7 @@ export function analyzePeptidesUI(df: DG.DataFrame, col?: DG.Column<string>): {h
122
114
  bitsetChanged.unsubscribe();
123
115
  if (sequencesCol) {
124
116
  const model = await startAnalysis(activityColumnChoice.value!, sequencesCol, clustersColumnChoice.value, df,
125
- scaledCol, activityScalingMethod.value ?? C.SCALING_METHODS.NONE, targetColumnChoice.value, {addSequenceSpace: true});
117
+ scaledCol, activityScalingMethod.value ?? C.SCALING_METHODS.NONE, {addSequenceSpace: true});
126
118
  return model !== null;
127
119
  }
128
120
  return false;
@@ -155,75 +147,78 @@ export function analyzePeptidesUI(df: DG.DataFrame, col?: DG.Column<string>): {h
155
147
  return {host: mainHost, callback: startAnalysisCallback};
156
148
  }
157
149
 
158
- type AnalysisOptions = {addSequenceSpace?: boolean};
159
-
150
+ type AnalysisOptions = { addSequenceSpace?: boolean };
151
+
152
+ /**
153
+ * Creates dataframe to use in analysis, model instance and adds viewers
154
+ * @param activityColumn - Activity column
155
+ * @param peptidesCol - Peptides column
156
+ * @param clustersColumn - Clusters column or null
157
+ * @param sourceDf - Source dataframe
158
+ * @param scaledCol - Scaled activity column
159
+ * @param scaling - Activity scaling method
160
+ * @param options - Additional options
161
+ * @return - Peptides model instance or null
162
+ */
160
163
  export async function startAnalysis(activityColumn: DG.Column<number>, peptidesCol: DG.Column<string>,
161
- clustersColumn: DG.Column | null, currentDf: DG.DataFrame, scaledCol: DG.Column<number>, scaling: C.SCALING_METHODS,
162
- targetColumn: DG.Column<string> | null = null, options: AnalysisOptions = {}): Promise<PeptidesModel | null> {
163
- const progress = DG.TaskBarProgressIndicator.create('Loading SAR...');
164
+ clustersColumn: DG.Column | null, sourceDf: DG.DataFrame, scaledCol: DG.Column<number>, scaling: C.SCALING_METHODS,
165
+ options: AnalysisOptions = {}): Promise<PeptidesModel | null> {
164
166
  let model = null;
165
- if (activityColumn.type === DG.COLUMN_TYPE.FLOAT || activityColumn.type === DG.COLUMN_TYPE.INT) {
166
- //prepare new DF
167
- const newDf = DG.DataFrame.create(currentDf.rowCount);
168
- const newDfCols = newDf.columns;
169
- newDfCols.add(scaledCol);
170
- for (const col of currentDf.columns) {
171
- if (col.getTag(C.TAGS.ANALYSIS_COL) !== `${true}`) {
172
- if (col.name.toLowerCase() === scaledCol.name.toLowerCase())
173
- col.name = currentDf.columns.getUnusedName(col.name);
174
- newDfCols.add(col);
175
- }
167
+ if (activityColumn.type !== DG.COLUMN_TYPE.FLOAT && activityColumn.type !== DG.COLUMN_TYPE.INT) {
168
+ grok.shell.error('The activity column must be of numeric type!');
169
+ return model;
170
+ }
171
+ const progress = DG.TaskBarProgressIndicator.create('Loading SAR...');
172
+
173
+ // Prepare new DF
174
+ const newDf = DG.DataFrame.create(sourceDf.rowCount);
175
+ newDf.name = 'Peptides analysis';
176
+ const newDfCols = newDf.columns;
177
+ newDfCols.add(scaledCol);
178
+ for (const col of sourceDf.columns) {
179
+ if (col.getTag(C.TAGS.ANALYSIS_COL) !== `${true}`) {
180
+ if (col.name.toLowerCase() === scaledCol.name.toLowerCase())
181
+ col.name = sourceDf.columns.getUnusedName(col.name);
182
+
183
+
184
+ newDfCols.add(col);
176
185
  }
186
+ }
187
+
188
+ const settings: type.PeptidesSettings = {
189
+ sequenceColumnName: peptidesCol.name, activityColumnName: activityColumn.name, activityScaling: scaling,
190
+ columns: {}, showDendrogram: false, sequenceSpaceParams: new type.SequenceSpaceParams(),
191
+ };
177
192
 
178
- newDf.name = 'Peptides analysis';
179
- const settings: type.PeptidesSettings = {
180
- sequenceColumnName: peptidesCol.name,
193
+ if (clustersColumn) {
194
+ const clusterCol = newDf.getCol(clustersColumn.name);
195
+ if (clusterCol.type !== DG.COLUMN_TYPE.STRING)
196
+ newDfCols.replace(clusterCol, clusterCol.convertTo(DG.COLUMN_TYPE.STRING));
197
+ }
198
+ newDf.setTag(C.TAGS.SETTINGS, JSON.stringify(settings));
199
+
200
+ const bitset = DG.BitSet.create(sourceDf.rowCount,
201
+ (i) => !activityColumn.isNone(i) && !peptidesCol.isNone(i) && sourceDf.filter.get(i));
202
+
203
+ // Cloning dataframe with applied filter. If filter is not applied, cloning is
204
+ // needed anyway to allow filtering on the original dataframe
205
+ model = PeptidesModel.getInstance(newDf.clone(bitset));
206
+ model.init(settings);
207
+ if (clustersColumn) {
208
+ const lstProps: ILogoSummaryTable = {
209
+ clustersColumnName: clustersColumn.name, sequenceColumnName: peptidesCol.name, activityScaling: scaling,
181
210
  activityColumnName: activityColumn.name,
182
- scaling: scaling,
183
- columns: {},
184
- maxMutations: 1,
185
- minActivityDelta: 0,
186
- showDendrogram: false,
187
211
  };
188
- if (targetColumn !== null)
189
- settings.targetColumnName = targetColumn.name;
190
-
191
- if (clustersColumn) {
192
- const clusterCol = newDf.getCol(clustersColumn.name);
193
- if (clusterCol.type !== DG.COLUMN_TYPE.STRING)
194
- newDfCols.replace(clusterCol, clusterCol.convertTo(DG.COLUMN_TYPE.STRING));
195
- settings.clustersColumnName = clustersColumn.name;
196
- }
197
- newDf.setTag(C.TAGS.SETTINGS, JSON.stringify(settings));
198
-
199
- let monomerType: string;
200
- if (peptidesCol.getTag(DG.TAGS.UNITS) === NOTATION.HELM) {
201
- const sampleSeq = peptidesCol.get(0)!;
202
- monomerType = sampleSeq.startsWith('PEPTIDE') ? 'HELM_AA' : 'HELM_BASE';
203
- } else {
204
- const alphabet = peptidesCol.tags[C.TAGS.ALPHABET];
205
- monomerType = alphabet === 'DNA' || alphabet === 'RNA' ? 'HELM_BASE' : 'HELM_AA';
206
- }
207
- const dfUuid = uuid.v4();
208
- newDf.setTag(C.TAGS.UUID, dfUuid);
209
- newDf.setTag('monomerType', monomerType);
210
-
211
- const bitset = DG.BitSet.create(currentDf.rowCount,
212
- (i) => !activityColumn.isNone(i) && !peptidesCol.isNone(i) && currentDf.filter.get(i));
213
-
214
- // Cloning dataframe with applied filter. If filter is not applied, cloning is
215
- // needed anyway to allow filtering on the original dataframe
216
- model = PeptidesModel.getInstance(newDf.clone(bitset));
217
- if (clustersColumn)
218
- await model.addLogoSummaryTable();
219
- await model.addMonomerPosition();
220
- await model.addMostPotentResidues();
221
-
222
- // FIXME: enable by default for tests
223
- if (options.addSequenceSpace ?? false)
224
- model.addSequenceSpace();
225
- } else
226
- grok.shell.error('The activity column must be of numeric type!');
212
+ await model.addLogoSummaryTable(lstProps);
213
+ }
214
+ await model.addMonomerPosition();
215
+ await model.addMostPotentResidues();
216
+
217
+ // FIXME: enable by default for tests
218
+ if (options.addSequenceSpace ?? false)
219
+ model.addSequenceSpace();
220
+
221
+
227
222
  progress.close();
228
223
  return model;
229
224
  }
@@ -1,35 +1,52 @@
1
1
  import * as ui from 'datagrok-api/ui';
2
2
  import * as DG from 'datagrok-api/dg';
3
3
 
4
- import {PeptidesModel} from '../model';
5
-
6
4
  import wu from 'wu';
7
- import {COLUMNS_NAMES} from '../utils/constants';
8
5
  import {addExpandIcon} from '../utils/misc';
9
- import {CellRendererOptions, setWebLogoRenderer, WebLogoBounds} from '../utils/cell-renderer';
6
+ import {setWebLogoRenderer, WebLogoBounds, WebLogoCellRendererOptions} from '../utils/cell-renderer';
10
7
  import {CachedWebLogoTooltip, SelectionItem} from '../utils/types';
11
8
  import {TooltipOptions} from '../utils/tooltips';
12
9
  import {calculateMonomerPositionStatistics} from '../utils/algorithms';
10
+ import {AggregationColumns} from '../utils/statistics';
11
+ import {SeqPalette} from '@datagrok-libraries/bio/src/seq-palettes';
12
+
13
+ export type SelectionWidgetOptions = {
14
+ tableSelection: DG.BitSet, gridColumns: DG.GridColumnList, positionColumns: DG.Column<string>[],
15
+ activityColumn: DG.Column<number>, columns: AggregationColumns, colorPalette: SeqPalette, isAnalysis: boolean,
16
+ };
13
17
 
14
- export function getSelectionWidget(table: DG.DataFrame, model: PeptidesModel): HTMLElement {
15
- const compBitset = model.getVisibleSelection();
16
- if (compBitset.trueCount === 0)
18
+ /**
19
+ * Creates selection grid with WebLogo rendered in its header.
20
+ * @param table - table with selected sequences.
21
+ * @param options - options for selection widget.
22
+ * @return - selection grid.
23
+ */
24
+ export function getSelectionWidget(table: DG.DataFrame, options: SelectionWidgetOptions): HTMLElement {
25
+ if (options.tableSelection.trueCount === 0)
17
26
  return ui.divText('No compounds selected');
27
+
28
+
18
29
  const newTable = DG.DataFrame.create(table.rowCount);
19
30
  newTable.name = 'Selected compounds';
20
- newTable.filter.copyFrom(compBitset);
21
- const sourceGrid = model.analysisView.grid;
31
+ newTable.filter.copyFrom(options.tableSelection);
22
32
  const numericalCols = wu(table.columns.numerical);
23
- for (let gridColIdx = 1; gridColIdx < sourceGrid.columns.length; gridColIdx++) {
24
- const gridCol = sourceGrid.columns.byIndex(gridColIdx)!;
33
+ for (let gridColIdx = 1; gridColIdx < options.gridColumns.length; gridColIdx++) {
34
+ const gridCol = options.gridColumns.byIndex(gridColIdx)!;
25
35
  if (!gridCol.visible)
26
36
  continue;
37
+
38
+
27
39
  const sourceCol = gridCol.column!;
40
+ if (sourceCol.type === DG.COLUMN_TYPE.BOOL)
41
+ continue;
42
+
43
+
28
44
  const sourceColRawData = sourceCol.getRawData();
29
45
  const sourceColCategories = sourceCol.categories;
30
- const getValue = numericalCols.some((col) => col.name === sourceCol.name) ? (i: number): number => sourceColRawData[i] :
46
+ const getValue = numericalCols
47
+ .some((col) => col.name === sourceCol.name) ? (i: number): number => sourceColRawData[i] :
31
48
  (i: number): string => sourceColCategories[sourceColRawData[i]];
32
- const col = sourceCol.name === COLUMNS_NAMES.ACTIVITY ?
49
+ const col = sourceCol.name === options.activityColumn.name ?
33
50
  newTable.columns.addNewFloat(gridCol.name).init((i) => getValue(i)) :
34
51
  newTable.columns.addNewVirtual(gridCol.name, (i) => getValue(i), sourceCol.type as DG.TYPE);
35
52
  for (const [tag, value] of sourceCol.tags)
@@ -55,22 +72,39 @@ export function getSelectionWidget(table: DG.DataFrame, model: PeptidesModel): H
55
72
  const gridHost = ui.box(grid.root);
56
73
  gridHost.style.marginLeft = '0px';
57
74
  setTimeout(() => {
58
- for (let gridColIdx = 1; gridColIdx < sourceGrid.columns.length; gridColIdx++) {
59
- const gridCol = sourceGrid.columns.byIndex(gridColIdx)!;
60
- if (!gridCol.visible)
75
+ for (let gridColIdx = 1; gridColIdx < options.gridColumns.length; gridColIdx++) {
76
+ const originalGridCol = options.gridColumns.byIndex(gridColIdx)!;
77
+ if (!originalGridCol.visible)
78
+ continue;
79
+
80
+
81
+ if (!grid.col(originalGridCol.name))
61
82
  continue;
62
- grid.col(gridCol.name)!.width = gridCol.width;
83
+
84
+
85
+ grid.col(originalGridCol.name)!.width = originalGridCol.width;
63
86
  }
64
87
  }, 500);
65
88
 
66
- const mpStats = calculateMonomerPositionStatistics(grid.dataFrame, model.positionColumns.toArray());
89
+ const mpStats = calculateMonomerPositionStatistics(options.activityColumn, newTable.filter,
90
+ options.positionColumns, {isFiltered: newTable.filter.anyTrue || newTable.filter.anyFalse});
67
91
 
68
- const cachedWebLogoTooltip: CachedWebLogoTooltip = {bar: '', tooltip: null};
69
- const webLogoBounds: WebLogoBounds = {};
70
- const cellRendererOptions: CellRendererOptions = {isSelectionTable: true, cachedWebLogoTooltip, webLogoBounds};
92
+ const cachedWebLogoTooltip: () => CachedWebLogoTooltip = () => {
93
+ return {bar: '', tooltip: null};
94
+ };
95
+ const webLogoBounds: () => WebLogoBounds = () => {
96
+ return {};
97
+ };
98
+ const cellRendererOptions: WebLogoCellRendererOptions = {
99
+ isSelectionTable: true, cachedWebLogoTooltip, webLogoBounds,
100
+ colorPalette: () => options.colorPalette,
101
+ };
71
102
  const tooltipOptions: TooltipOptions = {x: 0, y: 0, monomerPosition: {} as SelectionItem, mpStats};
72
103
 
73
- setWebLogoRenderer(grid, model, cellRendererOptions, tooltipOptions);
104
+ if (options.isAnalysis) {
105
+ setWebLogoRenderer(grid, mpStats, options.positionColumns, options.activityColumn, cellRendererOptions,
106
+ tooltipOptions);
107
+ }
74
108
 
75
109
  return gridHost;
76
110
  }
@@ -1,4 +1,5 @@
1
1
  import * as ui from 'datagrok-api/ui';
2
+ import * as grok from 'datagrok-api/grok';
2
3
  import * as DG from 'datagrok-api/dg';
3
4
 
4
5
  import * as type from '../utils/types';
@@ -8,16 +9,17 @@ import {PeptidesModel, VIEWER_TYPE} from '../model';
8
9
  import $ from 'cash-dom';
9
10
  import wu from 'wu';
10
11
  import {getTreeHelperInstance} from '../package';
12
+ import { MmDistanceFunctionsNames as distFNames } from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
11
13
 
12
- type PaneInputs = {[paneName: string]: DG.InputBase[]};
13
- type SettingsElements = {dialog: DG.Dialog, accordion: DG.Accordion, inputs: PaneInputs};
14
+ type PaneInputs = { [paneName: string]: DG.InputBase[] };
15
+ type SettingsElements = { dialog: DG.Dialog, accordion: DG.Accordion, inputs: PaneInputs };
14
16
 
15
17
  export enum SETTINGS_PANES {
16
18
  GENERAL = 'General',
17
19
  VIEWERS = 'Viewers',
18
- MUTATION_CLIFFS = 'Mutation Cliffs',
19
20
  COLUMNS = 'Columns',
20
- };
21
+ SEQUENCE_SPACE = 'Sequence space',
22
+ }
21
23
 
22
24
  export enum GENERAL_INPUTS {
23
25
  ACTIVITY = 'Activity',
@@ -28,45 +30,57 @@ export enum VIEWERS_INPUTS {
28
30
  DENDROGRAM = VIEWER_TYPE.DENDROGRAM,
29
31
  }
30
32
 
31
- export enum MUTATION_CLIFFS_INPUTS {
32
- MAX_MUTATIONS = 'Max mutations',
33
- MIN_ACTIVITY_DELTA = 'Min activity delta',
34
- }
35
-
36
33
  export enum COLUMNS_INPUTS {
37
34
  IS_INCLUDED = '',
38
35
  AGGREGATION = 'Aggregation',
39
36
  }
37
+ export enum SEQUENCE_SPACE_INPUTS {
38
+ DISTANCE_FUNCTION = 'Distance function',
39
+ GAP_OPEN = 'Gap open penalty',
40
+ GAP_EXTEND = 'Gap extend penalty',
41
+ CLUSTER_EMBEDDINGS = 'Cluster embeddings',
42
+ EPSILON = 'Epsilon',
43
+ MIN_PTS = 'Minimum points',
44
+ FINGERPRINT_TYPE = 'Fingerprint type',
45
+ }
46
+
40
47
 
41
48
  export const PANES_INPUTS = {
42
49
  [SETTINGS_PANES.GENERAL]: GENERAL_INPUTS,
43
50
  [SETTINGS_PANES.VIEWERS]: VIEWERS_INPUTS,
44
- [SETTINGS_PANES.MUTATION_CLIFFS]: MUTATION_CLIFFS_INPUTS,
45
51
  [SETTINGS_PANES.COLUMNS]: COLUMNS_INPUTS,
52
+ [SETTINGS_PANES.SEQUENCE_SPACE]: SEQUENCE_SPACE_INPUTS,
46
53
  };
47
54
 
48
- //TODO: show sliderInput values
55
+ /**
56
+ * Creates settings dialog for peptides analysis.
57
+ * @param model - Peptides analysis model.
58
+ * @return - Settings dialog elements.
59
+ */
49
60
  export function getSettingsDialog(model: PeptidesModel): SettingsElements {
61
+ if (model.settings == null)
62
+ grok.log.error('PeptidesError: Settings are not initialized');
63
+
50
64
  const accordion = ui.accordion();
51
65
  const settings = model.settings;
52
- const currentScaling = settings.scaling ?? C.SCALING_METHODS.NONE;
53
- // const currentBidirectional = settings.isBidirectional ?? false;
54
- const currentMaxMutations = settings.maxMutations ?? 1;
55
- const currentMinActivityDelta = settings.minActivityDelta ?? 0;
56
- const currentColumns = settings.columns ?? {};
66
+ const currentScaling = settings?.activityScaling ?? C.SCALING_METHODS.NONE;
67
+ const currentColumns = settings?.columns ?? {};
57
68
 
58
- const result: type.PeptidesSettings = {};
69
+ const result: type.PartialPeptidesSettings = {};
59
70
  const inputs: PaneInputs = {};
71
+ const seqSpaceParams = settings?.sequenceSpaceParams ?? new type.SequenceSpaceParams();
60
72
 
61
73
  // General pane options
62
74
  const activityCol = ui.columnInput(GENERAL_INPUTS.ACTIVITY, model.df,
63
- model.df.getCol(model.settings.activityColumnName!), () => result.activityColumnName = activityCol.value!.name,
64
- {filter: (col: DG.Column) => (col.type === DG.TYPE.FLOAT || col.type === DG.TYPE.INT) &&
65
- col.name !== C.COLUMNS_NAMES.ACTIVITY && col.stats.missingValueCount === 0});
75
+ model.df.getCol(model.settings!.activityColumnName!), () => result.activityColumnName = activityCol.value!.name,
76
+ {
77
+ filter: (col: DG.Column) => (col.type === DG.TYPE.FLOAT || col.type === DG.TYPE.INT) &&
78
+ col.name !== C.COLUMNS_NAMES.ACTIVITY && col.stats.missingValueCount === 0,
79
+ });
66
80
  activityCol.setTooltip('Numeric activity column');
67
81
  const activityScaling =
68
82
  ui.choiceInput(GENERAL_INPUTS.ACTIVITY_SCALING, currentScaling, Object.values(C.SCALING_METHODS),
69
- () => result.scaling = activityScaling.value as C.SCALING_METHODS) as DG.InputBase<C.SCALING_METHODS>;
83
+ () => result.activityScaling = activityScaling.value as C.SCALING_METHODS) as DG.InputBase<C.SCALING_METHODS>;
70
84
  activityScaling.setTooltip('Activity column transformation method');
71
85
 
72
86
  accordion.addPane(SETTINGS_PANES.GENERAL, () => ui.inputs([activityCol, activityScaling]), true);
@@ -94,35 +108,15 @@ export function getSettingsDialog(model: PeptidesModel): SettingsElements {
94
108
  accordion.addPane(SETTINGS_PANES.VIEWERS, () => ui.inputs([dendrogram]), true);
95
109
  inputs[SETTINGS_PANES.VIEWERS] = [dendrogram];
96
110
 
97
- // Mutation Cliffs pane options
98
- const maxMutations = ui.sliderInput(MUTATION_CLIFFS_INPUTS.MAX_MUTATIONS, currentMaxMutations, 1, 50, () => {
99
- const val = Math.round(maxMutations.value);
100
- $(maxMutations.root).find('label.ui-input-description').remove();
101
- result.maxMutations = val;
102
- maxMutations.addPostfix(val.toString());
103
- }) as DG.InputBase<number>;
104
- maxMutations.setTooltip('Maximum number of mutations between reference and mutated sequences');
105
- maxMutations.addPostfix((settings.maxMutations ?? 1).toString());
106
- const minActivityDelta = ui.sliderInput(MUTATION_CLIFFS_INPUTS.MIN_ACTIVITY_DELTA, currentMinActivityDelta, 0,
107
- 100, () => {
108
- const val = minActivityDelta.value.toFixed(3);
109
- result.minActivityDelta = parseFloat(val);
110
- $(minActivityDelta.root).find('label.ui-input-description').remove();
111
- minActivityDelta.addPostfix(val);
112
- }) as DG.InputBase<number>;
113
- minActivityDelta.setTooltip('Minimum activity difference between reference and mutated sequences');
114
- minActivityDelta.addPostfix((settings.minActivityDelta ?? 0).toString());
115
- accordion.addPane(SETTINGS_PANES.MUTATION_CLIFFS, () => ui.inputs([maxMutations, minActivityDelta]), true);
116
- inputs[SETTINGS_PANES.MUTATION_CLIFFS] = [maxMutations, minActivityDelta];
117
-
118
111
  // Columns to include pane options
119
112
  const inputsRows: HTMLElement[] = [];
120
113
  const includedColumnsInputs: DG.InputBase[] = [];
121
114
  for (const col of model.df.columns.numerical) {
122
115
  const colName = col.name;
123
- if (colName === settings.activityColumnName || colName === C.COLUMNS_NAMES.ACTIVITY)
116
+ if (colName === settings!.activityColumnName || colName === C.COLUMNS_NAMES.ACTIVITY)
124
117
  continue;
125
118
 
119
+
126
120
  const isIncludedInput = ui.boolInput(COLUMNS_INPUTS.IS_INCLUDED, typeof (currentColumns)[colName] !== 'undefined',
127
121
  () => {
128
122
  result.columns ??= {};
@@ -134,7 +128,8 @@ export function getSettingsDialog(model: PeptidesModel): SettingsElements {
134
128
  delete result.columns;
135
129
  }
136
130
  }) as DG.InputBase<boolean>;
137
- isIncludedInput.setTooltip('Include aggregated column value in tooltips, Logo Summary Table and Distribution panel');
131
+ isIncludedInput.setTooltip('Include aggregated column value in tooltips, Logo Summary Table and ' +
132
+ 'Distribution panel');
138
133
 
139
134
  const aggregationInput = ui.choiceInput(COLUMNS_INPUTS.AGGREGATION, (currentColumns)[colName] ?? DG.AGG.AVG,
140
135
  Object.values(DG.STATS), () => {
@@ -159,6 +154,61 @@ export function getSettingsDialog(model: PeptidesModel): SettingsElements {
159
154
  inputs[SETTINGS_PANES.COLUMNS] = includedColumnsInputs;
160
155
  }
161
156
 
157
+ // Sequence space pane options
158
+ const modifiedSeqSpaceParams: Partial<type.SequenceSpaceParams> = {};
159
+ function onSeqSpaceParamsChange(fieldName: keyof type.SequenceSpaceParams, value: any) {
160
+ correctSeqSpaceInputs();
161
+ if (value === null || value === undefined || value === '')
162
+ return;
163
+ modifiedSeqSpaceParams[fieldName] = value;
164
+ let isAllSame = true;
165
+ for (const [key, val] of Object.entries(modifiedSeqSpaceParams)) {
166
+ if (val !== seqSpaceParams[key as keyof type.SequenceSpaceParams]) {
167
+ isAllSame = false;
168
+ break;
169
+ }
170
+ }
171
+ if (isAllSame)
172
+ delete result.sequenceSpaceParams;
173
+ else
174
+ result.sequenceSpaceParams = {...seqSpaceParams, ...modifiedSeqSpaceParams};
175
+ }
176
+
177
+ function toggleInputs(nwInputs: DG.InputBase[], condition: boolean) {
178
+ nwInputs.forEach((input) => {
179
+ if (condition)
180
+ input.root.style.display = 'flex';
181
+ else
182
+ input.root.style.display = 'none';
183
+ });
184
+ }
185
+
186
+ const distanceFunctionInput = ui.choiceInput(SEQUENCE_SPACE_INPUTS.DISTANCE_FUNCTION, seqSpaceParams.distanceF,
187
+ [distFNames.NEEDLEMANN_WUNSCH, distFNames.HAMMING, distFNames.LEVENSHTEIN, distFNames.MONOMER_CHEMICAL_DISTANCE],
188
+ () => onSeqSpaceParamsChange('distanceF', distanceFunctionInput.value));
189
+ distanceFunctionInput.setTooltip('Distance function');
190
+ const gapOpenInput = ui.floatInput(SEQUENCE_SPACE_INPUTS.GAP_OPEN, seqSpaceParams.gapOpen, () => onSeqSpaceParamsChange('gapOpen', gapOpenInput.value));
191
+ const gapExtendInput = ui.floatInput(SEQUENCE_SPACE_INPUTS.GAP_EXTEND, seqSpaceParams.gapExtend, () => onSeqSpaceParamsChange('gapExtend', gapExtendInput.value));
192
+ const clusterEmbeddingsInput = ui.boolInput(SEQUENCE_SPACE_INPUTS.CLUSTER_EMBEDDINGS, seqSpaceParams.clusterEmbeddings ?? false,
193
+ () => onSeqSpaceParamsChange('clusterEmbeddings', clusterEmbeddingsInput.value));
194
+ clusterEmbeddingsInput.setTooltip('Cluster embeddings using DBSCAN algorithm');
195
+ const epsilonInput = ui.floatInput(SEQUENCE_SPACE_INPUTS.EPSILON, seqSpaceParams.epsilon, () => onSeqSpaceParamsChange('epsilon', epsilonInput.value));
196
+ epsilonInput.setTooltip('Epsilon parameter for DBSCAN. Minimum distance between two points to be considered as a cluster');
197
+ const minPtsInput = ui.intInput(SEQUENCE_SPACE_INPUTS.MIN_PTS, seqSpaceParams.minPts, () => onSeqSpaceParamsChange('minPts', minPtsInput.value));
198
+ minPtsInput.setTooltip('Minimum number of points in a cluster');
199
+ const fingerprintTypesInput = ui.choiceInput('Fingerprint type', seqSpaceParams.fingerprintType, ['Morgan', 'RDKit', 'Pattern'],
200
+ () => onSeqSpaceParamsChange('fingerprintType', fingerprintTypesInput.value));
201
+ function correctSeqSpaceInputs() {
202
+ toggleInputs([gapOpenInput, gapExtendInput], distanceFunctionInput.value === distFNames.NEEDLEMANN_WUNSCH);
203
+ toggleInputs([epsilonInput, minPtsInput], clusterEmbeddingsInput.value === true);
204
+ toggleInputs([fingerprintTypesInput],
205
+ distanceFunctionInput.value === distFNames.MONOMER_CHEMICAL_DISTANCE || distanceFunctionInput.value === distFNames.NEEDLEMANN_WUNSCH);
206
+ }
207
+ correctSeqSpaceInputs();
208
+
209
+ const seqSpaceInputs = [distanceFunctionInput, fingerprintTypesInput, gapOpenInput, gapExtendInput, clusterEmbeddingsInput, epsilonInput, minPtsInput];
210
+ accordion.addPane(SETTINGS_PANES.SEQUENCE_SPACE, () => ui.inputs(seqSpaceInputs), true);
211
+ inputs[SETTINGS_PANES.SEQUENCE_SPACE] = seqSpaceInputs;
162
212
  const dialog = ui.dialog('Peptides settings').add(accordion);
163
213
  dialog.root.style.width = '400px';
164
214
  dialog.onOK(() => model.settings = result);
@@ -3,12 +3,11 @@ import {KnownMetrics} from '@datagrok-libraries/ml/src/typed-metrics';
3
3
 
4
4
  /**
5
5
  * Worker thread receiving data function.
6
- *
7
- * @param {any[]} columnData Samples to process.
8
- * @param {string} method Embedding method.
9
- * @param {string} measure Distance metric.
10
- * @param {any} options Options to pass to algorithm.
11
- * @return {any} Embedding (and distance matrix where applicable).
6
+ * @param columnData Samples to process.
7
+ * @param method Embedding method.
8
+ * @param measure Distance metric.
9
+ * @param options Options to pass to algorithm.
10
+ * @return Embedding (and distance matrix where applicable).
12
11
  */
13
12
  function onMessage(columnData: any[], method: KnownMethods, measure: KnownMetrics, options: any): any {
14
13
  const reducer = new DimensionalityReducer(columnData, method, measure, options);
@@ -1,14 +1,9 @@
1
-
2
1
  onmessage = async (event): Promise<void> => {
3
- const {startIdx, endIdx, activityArray, monomerInfoArray, settings, currentTargetIdx, targetOptions} = event.data;
4
- // const monomers1: string[] = [];
5
- // const monomers2: string[] = [];
2
+ const {startIdx, endIdx, activityArray, monomerInfoArray, settings, currentTargetIdx} = event.data;
6
3
  const pos: string[] = [];
7
4
  const seq1Idxs: number[] = [];
8
5
  const seq2Idxs: number[] = [];
9
6
  const chunkSize = endIdx - startIdx;
10
- //const mi = startRow;
11
- //const mj = startCol;
12
7
  let cnt = 0;
13
8
  const startRow = activityArray.length - 2 - Math.floor(
14
9
  Math.sqrt(-8 * startIdx + 4 * activityArray.length * (activityArray.length - 1) - 7) / 2 - 0.5);
@@ -17,8 +12,8 @@ onmessage = async (event): Promise<void> => {
17
12
  let seq2Idx = startCol;
18
13
  const tempData = new Array(monomerInfoArray.length);
19
14
  while (cnt < chunkSize) {
20
- if (!(currentTargetIdx !== -1 && (targetOptions.targetCol?.rawData[seq1Idx] !== currentTargetIdx ||
21
- targetOptions.targetCol?.rawData[seq2Idx] !== currentTargetIdx))) {
15
+ if (!(currentTargetIdx !== -1 && (settings.targetCol?.rawData[seq1Idx] !== currentTargetIdx ||
16
+ settings.targetCol?.rawData[seq2Idx] !== currentTargetIdx))) {
22
17
  let substCounter = 0;
23
18
  const activityValSeq1 = activityArray[seq1Idx];
24
19
  const activityValSeq2 = activityArray[seq2Idx];
@@ -39,8 +34,6 @@ onmessage = async (event): Promise<void> => {
39
34
 
40
35
  tempData[tempDataIdx++] = {
41
36
  pos: monomerInfo.name,
42
- // seq1monomer: monomerInfo.cat![seq1categoryIdx],
43
- // seq2monomer: monomerInfo.cat![seq2categoryIdx],
44
37
  seq1Idx: seq1Idx,
45
38
  seq2Idx: seq2Idx,
46
39
  };
@@ -49,10 +42,6 @@ onmessage = async (event): Promise<void> => {
49
42
  for (let i = 0; i < tempDataIdx; i++) {
50
43
  const tempDataElement = tempData[i];
51
44
  const position = tempDataElement.pos;
52
- // const seq1monomer = tempDataElement.seq1monomer;
53
- // const seq2monomer = tempDataElement.seq2monomer;
54
- // monomers1.push(seq1monomer);
55
- // monomers2.push(seq2monomer);
56
45
  pos.push(position);
57
46
  seq1Idxs.push(seq1Idx);
58
47
  seq2Idxs.push(seq2Idx);
@@ -68,8 +57,6 @@ onmessage = async (event): Promise<void> => {
68
57
  }
69
58
  }
70
59
  postMessage({
71
- // monomers1: monomers1,
72
- // monomers2: monomers2,
73
60
  pos: pos,
74
61
  seq1Idxs: new Uint32Array(seq1Idxs),
75
62
  seq2Idxs: new Uint32Array(seq2Idxs),