@datagrok/eda 1.4.11 → 1.4.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/.eslintrc.json +0 -1
  2. package/CHANGELOG.md +15 -0
  3. package/CLAUDE.md +185 -0
  4. package/README.md +8 -0
  5. package/css/pmpo.css +35 -0
  6. package/dist/package-test.js +1 -1
  7. package/dist/package-test.js.map +1 -1
  8. package/dist/package.js +1 -1
  9. package/dist/package.js.map +1 -1
  10. package/eslintrc.json +45 -0
  11. package/files/drugs-props-test.csv +126 -0
  12. package/files/drugs-props-train-scores.csv +664 -0
  13. package/files/drugs-props-train.csv +664 -0
  14. package/package.json +9 -3
  15. package/src/anova/anova-tools.ts +1 -1
  16. package/src/anova/anova-ui.ts +1 -1
  17. package/src/package-api.ts +18 -0
  18. package/src/package-test.ts +4 -1
  19. package/src/package.g.ts +25 -0
  20. package/src/package.ts +55 -15
  21. package/src/pareto-optimization/pareto-computations.ts +6 -0
  22. package/src/pareto-optimization/utils.ts +6 -4
  23. package/src/probabilistic-scoring/data-generator.ts +157 -0
  24. package/src/probabilistic-scoring/nelder-mead.ts +204 -0
  25. package/src/probabilistic-scoring/pmpo-defs.ts +218 -0
  26. package/src/probabilistic-scoring/pmpo-utils.ts +603 -0
  27. package/src/probabilistic-scoring/prob-scoring.ts +991 -0
  28. package/src/probabilistic-scoring/stat-tools.ts +303 -0
  29. package/src/softmax-classifier.ts +1 -1
  30. package/src/tests/anova-tests.ts +1 -1
  31. package/src/tests/classifiers-tests.ts +1 -1
  32. package/src/tests/dim-reduction-tests.ts +1 -1
  33. package/src/tests/linear-methods-tests.ts +1 -1
  34. package/src/tests/mis-vals-imputation-tests.ts +1 -1
  35. package/src/tests/pareto-tests.ts +253 -0
  36. package/src/tests/pmpo-tests.ts +157 -0
  37. package/test-console-output-1.log +175 -209
  38. package/test-record-1.mp4 +0 -0
package/package.json CHANGED
@@ -1,11 +1,12 @@
1
1
  {
2
2
  "name": "@datagrok/eda",
3
3
  "friendlyName": "EDA",
4
- "version": "1.4.11",
4
+ "version": "1.4.13",
5
5
  "description": "Exploratory Data Analysis Tools",
6
6
  "dependencies": {
7
7
  "@datagrok-libraries/math": "^1.2.6",
8
8
  "@datagrok-libraries/ml": "^6.10.8",
9
+ "@datagrok-libraries/statistics": "^1.10.0",
9
10
  "@datagrok-libraries/tutorials": "^1.7.4",
10
11
  "@datagrok-libraries/utils": "^4.6.5",
11
12
  "@keckelt/tsne": "^1.0.2",
@@ -14,10 +15,12 @@
14
15
  "datagrok-api": "^1.26.3",
15
16
  "dayjs": "^1.11.9",
16
17
  "jstat": "^1.9.6",
18
+ "mathjs": "^15.1.0",
17
19
  "source-map-loader": "^4.0.1",
18
20
  "umap-js": "^1.3.3",
19
21
  "worker-loader": "^3.0.8",
20
- "wu": "^2.1.0"
22
+ "wu": "^2.1.0",
23
+ "@datagrok-libraries/test": "^1.1.0"
21
24
  },
22
25
  "author": {
23
26
  "name": "Viktor Makarichev",
@@ -27,7 +30,7 @@
27
30
  "@typescript-eslint/eslint-plugin": "^5.32.0",
28
31
  "@typescript-eslint/parser": "^5.32.0",
29
32
  "css-loader": "^7.1.2",
30
- "datagrok-tools": "^4.14.55",
33
+ "datagrok-tools": "^5.1.5",
31
34
  "eslint": "^8.21.0",
32
35
  "eslint-config-google": "^0.14.0",
33
36
  "style-loader": "^4.0.0",
@@ -95,5 +98,8 @@
95
98
  "Reduce Dimensionality": null
96
99
  }
97
100
  }
101
+ },
102
+ "overrides": {
103
+ "datagrok-api": "$datagrok-api"
98
104
  }
99
105
  }
@@ -1,4 +1,4 @@
1
- // Analysis of Variances (ANOVA): computations
1
+ // Analysis of Variances (ANOVA) - computations
2
2
 
3
3
  /* REFERENCES
4
4
 
@@ -1,4 +1,4 @@
1
- // Analysis of Variances (ANOVA): UI
1
+ // Analysis of Variances (ANOVA) - UI
2
2
 
3
3
  import * as grok from 'datagrok-api/grok';
4
4
  import * as ui from 'datagrok-api/ui';
@@ -274,4 +274,22 @@ export namespace funcs {
274
274
  export async function paretoFrontViewer(): Promise<any> {
275
275
  return await grok.functions.call('EDA:ParetoFrontViewer', {});
276
276
  }
277
+
278
+ /**
279
+ Train probabilistic multi-parameter optimization (pMPO) model
280
+ */
281
+ export async function trainPmpo(): Promise<void> {
282
+ return await grok.functions.call('EDA:TrainPmpo', {});
283
+ }
284
+
285
+ export async function getPmpoAppItems(view: DG.View ): Promise<any> {
286
+ return await grok.functions.call('EDA:GetPmpoAppItems', { view });
287
+ }
288
+
289
+ /**
290
+ Generates syntethetic dataset oriented on the pMPO modeling
291
+ */
292
+ export async function generatePmpoDataset(samples: number ): Promise<DG.DataFrame> {
293
+ return await grok.functions.call('EDA:GeneratePmpoDataset', { samples });
294
+ }
277
295
  }
@@ -1,10 +1,13 @@
1
1
  import * as DG from 'datagrok-api/dg';
2
- import {runTests, tests, TestContext, initAutoTests as initTests} from '@datagrok-libraries/utils/src/test';
2
+ import {runTests, tests, TestContext, initAutoTests as initTests} from '@datagrok-libraries/test/src/test';
3
3
  import './tests/dim-reduction-tests';
4
4
  import './tests/linear-methods-tests';
5
5
  import './tests/classifiers-tests';
6
6
  import './tests/mis-vals-imputation-tests';
7
7
  import './tests/anova-tests';
8
+ import './tests/pmpo-tests';
9
+ import './tests/pareto-tests';
10
+
8
11
  export const _package = new DG.Package();
9
12
  export {tests};
10
13
 
package/src/package.g.ts CHANGED
@@ -7,6 +7,7 @@ export function info() : void {
7
7
  }
8
8
 
9
9
  //tags: init
10
+ //meta.role: init
10
11
  export async function init() : Promise<void> {
11
12
  await PackageFunctions.init();
12
13
  }
@@ -43,6 +44,7 @@ export async function PCA(table: DG.DataFrame, features: DG.ColumnList, componen
43
44
  //input: double epsilon = 0.01 { description: Minimum distance between two points to be considered as in the same neighborhood. }
44
45
  //input: int minimumPoints = 5 { description: Minimum number of points to form a dense region. }
45
46
  //meta.defaultPostProcessingFunction: true
47
+ //meta.role: dim-red-postprocessing-function
46
48
  export async function dbscanPostProcessingFunction(col1: DG.Column, col2: DG.Column, epsilon: number, minimumPoints: number) : Promise<void> {
47
49
  await PackageFunctions.dbscanPostProcessingFunction(col1, col2, epsilon, minimumPoints);
48
50
  }
@@ -54,6 +56,7 @@ export async function dbscanPostProcessingFunction(col1: DG.Column, col2: DG.Col
54
56
  //output: object result
55
57
  //meta.supportedTypes: int,float,double,qnum
56
58
  //meta.supportedDistanceFunctions: Difference
59
+ //meta.role: dim-red-preprocessing-function
57
60
  export function numberPreprocessingFunction(col: DG.Column, _metric: string) {
58
61
  return PackageFunctions.numberPreprocessingFunction(col, _metric);
59
62
  }
@@ -65,6 +68,7 @@ export function numberPreprocessingFunction(col: DG.Column, _metric: string) {
65
68
  //output: object result
66
69
  //meta.supportedTypes: string
67
70
  //meta.supportedDistanceFunctions: One-Hot,Levenshtein,Hamming
71
+ //meta.role: dim-red-preprocessing-function
68
72
  export function stringPreprocessingFunction(col: DG.Column, _metric: string) {
69
73
  return PackageFunctions.stringPreprocessingFunction(col, _metric);
70
74
  }
@@ -77,6 +81,7 @@ export async function reduceDimensionality() : Promise<void> {
77
81
 
78
82
  //tags: editor
79
83
  //input: funccall call
84
+ //meta.role: editor
80
85
  export function GetMCLEditor(call: DG.FuncCall) : void {
81
86
  PackageFunctions.GetMCLEditor(call);
82
87
  }
@@ -105,6 +110,7 @@ export async function MCLClustering(df: DG.DataFrame, cols: DG.Column[], metrics
105
110
  //tags: viewer
106
111
  //output: viewer result
107
112
  //meta.showInGallery: false
113
+ //meta.role: viewer
108
114
  export function markovClusteringViewer() : any {
109
115
  return PackageFunctions.markovClusteringViewer();
110
116
  }
@@ -535,6 +541,25 @@ export function paretoFront() : void {
535
541
  //tags: viewer
536
542
  //output: viewer result
537
543
  //meta.icon: icons/pareto-front-viewer.svg
544
+ //meta.role: viewer
538
545
  export function paretoFrontViewer() : any {
539
546
  return PackageFunctions.paretoFrontViewer();
540
547
  }
548
+
549
+ //description: Train probabilistic multi-parameter optimization (pMPO) model
550
+ export function trainPmpo() : void {
551
+ PackageFunctions.trainPmpo();
552
+ }
553
+
554
+ //input: view view
555
+ //output: object result
556
+ export function getPmpoAppItems(view: any) : any {
557
+ return PackageFunctions.getPmpoAppItems(view);
558
+ }
559
+
560
+ //description: Generates syntethetic dataset oriented on the pMPO modeling
561
+ //input: int samples
562
+ //output: dataframe Synthetic
563
+ export async function generatePmpoDataset(samples: number) : Promise<any> {
564
+ return await PackageFunctions.generatePmpoDataset(samples);
565
+ }
package/src/package.ts CHANGED
@@ -36,9 +36,13 @@ import {SoftmaxClassifier} from './softmax-classifier';
36
36
 
37
37
  import {initXgboost} from '../wasm/xgbooster';
38
38
  import {XGBooster} from './xgbooster';
39
+
39
40
  import {ParetoOptimizer} from './pareto-optimization/pareto-optimizer';
40
41
  import {ParetoFrontViewer} from './pareto-optimization/pareto-front-viewer';
41
42
 
43
+ import {Pmpo} from './probabilistic-scoring/prob-scoring';
44
+ import {getSynteticPmpoData} from './probabilistic-scoring/data-generator';
45
+
42
46
  export const _package = new DG.Package();
43
47
  export * from './package.g';
44
48
 
@@ -51,7 +55,7 @@ export class PackageFunctions {
51
55
  }
52
56
 
53
57
 
54
- @grok.decorators.init({})
58
+ @grok.decorators.init({tags: ['init']})
55
59
  static async init(): Promise<void> {
56
60
  await _initEDAAPI();
57
61
  await initXgboost();
@@ -113,13 +117,9 @@ export class PackageFunctions {
113
117
 
114
118
 
115
119
  @grok.decorators.func({
116
- 'meta': {
117
- 'defaultPostProcessingFunction': 'true',
118
- },
119
- 'tags': [
120
- 'dim-red-postprocessing-function',
121
- ],
120
+ 'meta': {'defaultPostProcessingFunction': 'true', 'role': 'dim-red-postprocessing-function'},
122
121
  'name': 'DBSCAN clustering',
122
+ 'tags': ['dim-red-postprocessing-function'],
123
123
  })
124
124
  static async dbscanPostProcessingFunction(
125
125
  col1: DG.Column,
@@ -148,9 +148,10 @@ export class PackageFunctions {
148
148
  'meta': {
149
149
  'supportedTypes': 'int,float,double,qnum',
150
150
  'supportedDistanceFunctions': 'Difference',
151
+ 'role': 'dim-red-preprocessing-function',
151
152
  },
152
- 'tags': ['dim-red-preprocessing-function'],
153
153
  'name': 'None (number)',
154
+ 'tags': ['dim-red-preprocessing-function'],
154
155
  'outputs': [{name: 'result', type: 'object'}],
155
156
  })
156
157
  static numberPreprocessingFunction(
@@ -166,6 +167,7 @@ export class PackageFunctions {
166
167
  'meta': {
167
168
  'supportedTypes': 'string',
168
169
  'supportedDistanceFunctions': 'One-Hot,Levenshtein,Hamming',
170
+ 'role': 'dim-red-preprocessing-function',
169
171
  },
170
172
  'tags': ['dim-red-preprocessing-function'],
171
173
  'name': 'None (string)',
@@ -222,7 +224,7 @@ export class PackageFunctions {
222
224
  }
223
225
 
224
226
 
225
- @grok.decorators.editor()
227
+ @grok.decorators.editor({tags: ['editor']})
226
228
  static GetMCLEditor(
227
229
  call: DG.FuncCall): void {
228
230
  try {
@@ -289,10 +291,8 @@ export class PackageFunctions {
289
291
 
290
292
  @grok.decorators.func({
291
293
  'outputs': [{'name': 'result', 'type': 'viewer'}],
292
- 'tags': [
293
- 'viewer',
294
- ],
295
- 'meta': {showInGallery: 'false'},
294
+ 'meta': {showInGallery: 'false', role: 'viewer'},
295
+ 'tags': ['viewer'],
296
296
  'name': 'MCL',
297
297
  'description': 'Markov clustering viewer',
298
298
  })
@@ -984,11 +984,51 @@ export class PackageFunctions {
984
984
  @grok.decorators.func({
985
985
  'name': 'Pareto front',
986
986
  'description': 'Pareto front viewer',
987
- 'tags': ['viewer'],
988
987
  'outputs': [{'name': 'result', 'type': 'viewer'}],
989
- 'meta': {'icon': 'icons/pareto-front-viewer.svg'},
988
+ 'meta': {'icon': 'icons/pareto-front-viewer.svg', 'role': 'viewer'},
989
+ 'tags': ['viewer'],
990
990
  })
991
991
  static paretoFrontViewer(): DG.Viewer {
992
992
  return new ParetoFrontViewer();
993
993
  }
994
+
995
+ @grok.decorators.func({
996
+ 'name': 'trainPmpo',
997
+ 'description': 'Train probabilistic multi-parameter optimization (pMPO) model',
998
+ })
999
+ static trainPmpo(): void {
1000
+ const df = grok.shell.t;
1001
+ if (df === null) {
1002
+ grok.shell.warning('No dataframe is opened');
1003
+ return;
1004
+ }
1005
+
1006
+ if (!Pmpo.isTableValid(df))
1007
+ return;
1008
+
1009
+ const pMPO = new Pmpo(df);
1010
+ pMPO.runTrainingApp();
1011
+ }
1012
+
1013
+ @grok.decorators.func({'name': 'getPmpoAppItems', 'outputs': [{name: 'result', type: 'object'}]})
1014
+ static getPmpoAppItems(@grok.decorators.param({type: 'view'}) view: DG.TableView): any | null {
1015
+ const df = view.dataFrame;
1016
+ if (!Pmpo.isTableValid(df))
1017
+ return null;
1018
+
1019
+ const pMPO = new Pmpo(df, view);
1020
+
1021
+ return pMPO.getPmpoAppItems();
1022
+ }
1023
+
1024
+ @grok.decorators.func({
1025
+ 'name': 'generatePmpoDataset',
1026
+ 'description': 'Generates syntethetic dataset oriented on the pMPO modeling',
1027
+ 'outputs': [{name: 'Synthetic', type: 'dataframe'}],
1028
+ })
1029
+ static async generatePmpoDataset(@grok.decorators.param({'type': 'int'}) samples: number): Promise<DG.DataFrame> {
1030
+ const df = await getSynteticPmpoData(samples);
1031
+ df.name = 'Synthetic';
1032
+ return df;
1033
+ }
994
1034
  }
@@ -2,6 +2,12 @@
2
2
 
3
3
  import {NumericArray, OPT_TYPE} from './defs';
4
4
 
5
+ /** Computes the Pareto front mask for a given dataset and optimization sense
6
+ * @param rawData Array of numeric arrays representing the dataset (each array corresponds to a feature/dimension)
7
+ * @param sense Array of optimization types (OPT_TYPE.MIN or OPT_TYPE.MAX) for each dimension
8
+ * @param nPoints Number of data points in the dataset
9
+ * @param nullIndices Optional set of indices corresponding to missing values (these points will be marked as non-optimal)
10
+ * @returns Boolean array where true indicates that the point is on the Pareto front */
5
11
  export function getParetoMask(rawData: NumericArray[], sense: OPT_TYPE[], nPoints: number,
6
12
  nullIndices?: Set<number>): boolean[] {
7
13
  if (nPoints === 0)
@@ -6,6 +6,7 @@ import {OPT_TYPE} from './defs';
6
6
 
7
7
  export const PALETTE = [DG.Color.darkGreen, DG.Color.yellow, DG.Color.darkRed];
8
8
 
9
+ /** Return output color palette w.r.t. the specified type of optimization */
9
10
  export function getOutputPalette(type: OPT_TYPE): number[] {
10
11
  if (type === OPT_TYPE.MIN)
11
12
  return [...PALETTE];
@@ -13,13 +14,14 @@ export function getOutputPalette(type: OPT_TYPE): number[] {
13
14
  return [...PALETTE].reverse();
14
15
  }
15
16
 
16
- export function getColorScaleDiv(type: OPT_TYPE): HTMLElement {
17
+ /** Return div with color scale description */
18
+ export function getColorScaleDiv(type: OPT_TYPE, useMinMax: boolean = true): HTMLElement {
17
19
  const scale = ui.label('Color scale:');
18
20
  scale.style.paddingRight = '7px';
19
21
  const elems = [scale];
20
- const minLbl = ui.label('min');
22
+ const minLbl = ui.label(useMinMax ? 'min' : 'worst');
21
23
  const midLbl = ui.label('. . .');
22
- const maxLbl = ui.label('max');
24
+ const maxLbl = ui.label(useMinMax ? 'max' : 'best');
23
25
  const palette = getOutputPalette(type);
24
26
 
25
27
  const colorElems = [minLbl, midLbl, maxLbl].map((el, idx) => {
@@ -36,4 +38,4 @@ export function getColorScaleDiv(type: OPT_TYPE): HTMLElement {
36
38
  elems.push(...colorElems);
37
39
 
38
40
  return ui.divH(elems);
39
- }
41
+ } // getColorScaleDiv
@@ -0,0 +1,157 @@
1
+ import * as grok from 'datagrok-api/grok';
2
+ import * as ui from 'datagrok-api/ui';
3
+ import * as DG from 'datagrok-api/dg';
4
+
5
+ import {DescriptorStatistics, SOURCE_PATH, SYNTHETIC_DRUG_NAME} from './pmpo-defs';
6
+ import {getDescriptorStatistics, getDesiredTables} from './stat-tools';
7
+
8
+ //@ts-ignore: no types
9
+ import * as jStat from 'jstat';
10
+
11
+ /** Generates synthetic data for pMPO model training and testing
12
+ * @param samplesCount Number of samples to generate
13
+ * @returns DataFrame with generated data */
14
+ export async function getSynteticPmpoData(samplesCount: number): Promise<DG.DataFrame> {
15
+ const df = await grok.dapi.files.readCsv(SOURCE_PATH);
16
+ const generator = new PmpoDataGenerator(df, 'Drug', 'CNS', 'Smiles');
17
+
18
+ return generator.getGenerated(samplesCount);
19
+ }
20
+
21
+ /** Class for generating synthetic data for pMPO model training and testing */
22
+ export class PmpoDataGenerator {
23
+ private sourceDf: DG.DataFrame;
24
+ private drugName: string;
25
+ private desirabilityColName: string;
26
+ private smilesColName: string;
27
+ private desiredProbability: number;
28
+ private descriptorStats: Map<string, DescriptorStatistics>;
29
+
30
+ constructor(df: DG.DataFrame, drugName: string, desirabilityColName: string, smilesColName: string) {
31
+ this.sourceDf = df;
32
+ this.drugName = drugName;
33
+ this.desirabilityColName = desirabilityColName;
34
+ this.smilesColName = smilesColName;
35
+
36
+ const descriptorNames = df.columns.toList().filter((col) => col.isNumerical).map((col) => col.name);
37
+ const {desired, nonDesired} = getDesiredTables(df, df.col(desirabilityColName)!);
38
+
39
+ // Compute descriptors' statistics
40
+ this.descriptorStats = new Map<string, DescriptorStatistics>();
41
+ descriptorNames.forEach((name) => {
42
+ this.descriptorStats.set(name, getDescriptorStatistics(desired.col(name)!, nonDesired.col(name)!));
43
+ });
44
+
45
+ // Probability of desired class
46
+ this.desiredProbability = desired.rowCount / df.rowCount;
47
+ } // constructor
48
+
49
+ /** Generates synthetic data for pMPO model training and testing
50
+ * @param samplesCount Number of samples to generate
51
+ * @returns DataFrame with generated data */
52
+ public getGenerated(samplesCount: number): DG.DataFrame {
53
+ if (samplesCount <= 1)
54
+ throw new Error('Failed to generate pMPO data: sample count must be greater than 1.');
55
+
56
+ let result: DG.DataFrame;
57
+
58
+ /* Use rows from the source dataframe if the requested sample count
59
+ is less than or equal to the source dataframe row count */
60
+ if (samplesCount <= this.sourceDf.rowCount) {
61
+ const rowMask = DG.BitSet.create(this.sourceDf.rowCount);
62
+
63
+ for (let i = 0; i < samplesCount; ++i)
64
+ rowMask.set(i, true);
65
+
66
+ result = this.sourceDf.clone(rowMask);
67
+ } else {
68
+ const cloneDf = this.getClonedSourceDfWithFloatNumericCols();
69
+ result = cloneDf.append(this.getSyntheticTable(samplesCount - this.sourceDf.rowCount));
70
+ }
71
+
72
+ // Check boolean columns and ensure non-zero stdev
73
+ for (const col of result.columns) {
74
+ if (col.type === DG.COLUMN_TYPE.BOOL && col.stats.stdev === 0) {
75
+ // All values are the same, flip the first value
76
+ let value = col.get(0);
77
+ col.set(0, !value);
78
+
79
+ value = col.get(1);
80
+ col.set(1, !value);
81
+ }
82
+ }
83
+
84
+ return result;
85
+ } // getGenerated
86
+
87
+ /** Generates a synthetic data table
88
+ * @param samplesCount Number of samples to generate
89
+ * @returns DataFrame with synthetic data */
90
+ private getSyntheticTable(samplesCount: number): DG.DataFrame {
91
+ const desirabilityRaw = new Array<boolean>(samplesCount);
92
+
93
+ for (let i = 0; i < samplesCount; ++i)
94
+ desirabilityRaw[i] = (Math.random() < this.desiredProbability);
95
+
96
+
97
+ const cols = [
98
+ this.getDrugColumn(samplesCount),
99
+ this.getSmilesColumn(samplesCount),
100
+ DG.Column.fromList(DG.COLUMN_TYPE.BOOL, this.desirabilityColName, desirabilityRaw),
101
+ ];
102
+
103
+ this.descriptorStats.forEach((stat, name) => {
104
+ const arr = new Float32Array(samplesCount);
105
+
106
+ for (let i = 0; i < samplesCount; ++i) {
107
+ if (desirabilityRaw[i])
108
+ arr[i] = jStat.normal.sample(stat.desAvg, stat.desStd);
109
+ else
110
+ arr[i] = jStat.normal.sample(stat.nonDesAvg, stat.nonDesStd);
111
+ }
112
+
113
+ // @ts-ignore
114
+ cols.push(DG.Column.fromFloat32Array(name, arr));
115
+ });
116
+
117
+ return DG.DataFrame.fromColumns(cols);
118
+ } // getSyntheticTable
119
+
120
+ /** Generates a column with synthetic drug names
121
+ * @param samplesCount Number of samples to generate
122
+ * @returns Column with synthetic drug names */
123
+ private getDrugColumn(samplesCount: number): DG.Column<string> {
124
+ return DG.Column.fromList(
125
+ DG.COLUMN_TYPE.STRING,
126
+ this.drugName,
127
+ Array.from({length: samplesCount}, (_, i) => `${SYNTHETIC_DRUG_NAME} ${i + 1}`));
128
+ }
129
+
130
+ /** Generates a column with synthetic SMILES strings
131
+ * @param samplesCount Number of samples to generate
132
+ * @returns Column with synthetic SMILES strings */
133
+ private getSmilesColumn(samplesCount: number): DG.Column<string> {
134
+ return DG.Column.fromList(
135
+ DG.COLUMN_TYPE.STRING,
136
+ this.smilesColName,
137
+ Array.from({length: samplesCount}, () => 'C'));
138
+ }
139
+
140
+ /** Clones the source dataframe converting numerical columns to Float type
141
+ * @returns Cloned dataframe */
142
+ private getClonedSourceDfWithFloatNumericCols(): DG.DataFrame {
143
+ const cols: DG.Column[] = [];
144
+
145
+ this.sourceDf.columns.toList().forEach((col) => {
146
+ if (col.isNumerical)
147
+ cols.push(col.clone().convertTo(DG.COLUMN_TYPE.FLOAT));
148
+ else
149
+ cols.push(col.clone());
150
+ });
151
+
152
+ const clone = DG.DataFrame.fromColumns(cols);
153
+ clone.name = this.sourceDf.name;
154
+
155
+ return clone;
156
+ }
157
+ } // PmpoDataGenerator