@datagrok/eda 1.1.9 → 1.1.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.eslintignore +1 -0
- package/.eslintrc.json +45 -0
- package/CHANGELOG.md +21 -13
- package/README.md +2 -0
- package/dist/100.js +2 -2
- package/dist/42.js +2 -0
- package/dist/729.js +1 -1
- package/dist/935.js +3 -0
- package/dist/package-test.js +2 -2
- package/dist/package.js +2 -2
- package/package.json +7 -3
- package/src/data-generators.ts +13 -13
- package/src/eda-tools.ts +42 -42
- package/src/eda-ui.ts +65 -58
- package/src/missing-values-imputation/knn-imputer.ts +468 -0
- package/src/missing-values-imputation/ui-constants.ts +64 -0
- package/src/missing-values-imputation/ui.ts +246 -0
- package/src/package-test.ts +2 -2
- package/src/package.ts +61 -60
- package/src/stat-tools.ts +72 -61
- package/src/svm.ts +144 -151
- package/src/utils.ts +13 -17
- package/src/workers/tsne-worker.ts +6 -6
- package/src/workers/umap-worker.ts +3 -3
- package/dist/943.js +0 -3
- /package/dist/{943.js.LICENSE.txt → 935.js.LICENSE.txt} +0 -0
|
@@ -0,0 +1,246 @@
|
|
|
1
|
+
import * as grok from 'datagrok-api/grok';
|
|
2
|
+
import * as ui from 'datagrok-api/ui';
|
|
3
|
+
import * as DG from 'datagrok-api/dg';
|
|
4
|
+
|
|
5
|
+
import { TITLE, KNN_IMPUTER, ERROR_MSG, HINT } from './ui-constants';
|
|
6
|
+
import { SUPPORTED_COLUMN_TYPES, METRIC_TYPE, DISTANCE_TYPE, MetricInfo, DEFAULT, MIN_NEIGHBORS,
|
|
7
|
+
impute, getMissingValsIndices, areThereFails, imputeFailed } from "./knn-imputer";
|
|
8
|
+
|
|
9
|
+
/** Setting of the feature metric inputs */
|
|
10
|
+
type FeatureInputSettings = {
|
|
11
|
+
defaultWeight: number,
|
|
12
|
+
defaultMetric: METRIC_TYPE,
|
|
13
|
+
availableMetrics: METRIC_TYPE[],
|
|
14
|
+
};
|
|
15
|
+
|
|
16
|
+
/** Return default setting of the feature metric inputs */
|
|
17
|
+
function getFeatureInputSettings(type: DG.COLUMN_TYPE): FeatureInputSettings {
|
|
18
|
+
switch (type) {
|
|
19
|
+
case DG.COLUMN_TYPE.STRING:
|
|
20
|
+
case DG.COLUMN_TYPE.DATE_TIME:
|
|
21
|
+
return {
|
|
22
|
+
defaultWeight: DEFAULT.WEIGHT,
|
|
23
|
+
defaultMetric: METRIC_TYPE.ONE_HOT,
|
|
24
|
+
availableMetrics: [METRIC_TYPE.ONE_HOT]
|
|
25
|
+
};
|
|
26
|
+
|
|
27
|
+
case DG.COLUMN_TYPE.INT:
|
|
28
|
+
case DG.COLUMN_TYPE.FLOAT:
|
|
29
|
+
case DG.COLUMN_TYPE.QNUM:
|
|
30
|
+
return {
|
|
31
|
+
defaultWeight: DEFAULT.WEIGHT,
|
|
32
|
+
defaultMetric: METRIC_TYPE.DIFFERENCE,
|
|
33
|
+
availableMetrics: [METRIC_TYPE.DIFFERENCE, METRIC_TYPE.ONE_HOT]
|
|
34
|
+
};
|
|
35
|
+
|
|
36
|
+
default:
|
|
37
|
+
throw new Error(ERROR_MSG.UNSUPPORTED_COLUMN_TYPE);
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
/** Run the KNN missing values imputer */
|
|
42
|
+
export function runKNNImputer(): void {
|
|
43
|
+
/** current dataframe */
|
|
44
|
+
let df: DG.DataFrame | null = grok.shell.t;
|
|
45
|
+
|
|
46
|
+
if (df === null) {
|
|
47
|
+
grok.shell.warning(ERROR_MSG.NO_DATAFRAME);
|
|
48
|
+
return;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
/** columns with missing values */
|
|
52
|
+
const colsWithMissingVals = [] as DG.Column[];
|
|
53
|
+
|
|
54
|
+
/** names of columns with missing values */
|
|
55
|
+
const availableTargetColsNames = [] as string[];
|
|
56
|
+
|
|
57
|
+
/** names of columns that can be used as features */
|
|
58
|
+
const availableFeatureColsNames = [] as string[];
|
|
59
|
+
|
|
60
|
+
// get columns with missing vals & available feature cols
|
|
61
|
+
df.columns.toList()
|
|
62
|
+
.filter((col) => SUPPORTED_COLUMN_TYPES.includes(col.type))
|
|
63
|
+
.forEach((col) => {
|
|
64
|
+
availableFeatureColsNames.push(col.name);
|
|
65
|
+
|
|
66
|
+
if (col.stats.missingValueCount > 0) {
|
|
67
|
+
colsWithMissingVals.push(col);
|
|
68
|
+
availableTargetColsNames.push(col.name);
|
|
69
|
+
}
|
|
70
|
+
});
|
|
71
|
+
|
|
72
|
+
// get indices of missing values: col name -> array of indices
|
|
73
|
+
const misValsInds = getMissingValsIndices(colsWithMissingVals);
|
|
74
|
+
|
|
75
|
+
if (colsWithMissingVals.length === 0) {
|
|
76
|
+
grok.shell.info(ERROR_MSG.NO_MISSING_VALUES);
|
|
77
|
+
return;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
if (availableFeatureColsNames.length === 1) {
|
|
81
|
+
grok.shell.error(ERROR_MSG.ONE_AVAILABLE_FEATURE);
|
|
82
|
+
return;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
// In-place components
|
|
86
|
+
let inPlace = DEFAULT.IN_PLACE > 0;
|
|
87
|
+
const inPlaceInput = ui.boolInput(TITLE.IN_PLACE, inPlace, () => { inPlace = inPlaceInput.value ?? false;});
|
|
88
|
+
inPlaceInput.setTooltip(HINT.IN_PLACE);
|
|
89
|
+
|
|
90
|
+
// Keep empty feature
|
|
91
|
+
let keepEmpty = DEFAULT.KEEP_EMPTY > 0;
|
|
92
|
+
const keepEmptyInput = ui.boolInput(TITLE.KEEP_EMPTY, keepEmpty, () => { keepEmpty = keepEmptyInput.value ?? false });
|
|
93
|
+
keepEmptyInput.setTooltip(HINT.KEEP_EMPTY);
|
|
94
|
+
|
|
95
|
+
// Neighbors components
|
|
96
|
+
let neighbors = DEFAULT.NEIGHBORS;
|
|
97
|
+
const neighborsInput = ui.intInput(TITLE.NEIGHBORS, neighbors, () => {
|
|
98
|
+
const val = neighborsInput.value;
|
|
99
|
+
if (val === null)
|
|
100
|
+
neighborsInput.value = neighbors;
|
|
101
|
+
else if (val >= MIN_NEIGHBORS)
|
|
102
|
+
neighbors = val;
|
|
103
|
+
else
|
|
104
|
+
neighborsInput.value = neighbors;
|
|
105
|
+
});
|
|
106
|
+
neighborsInput.setTooltip(HINT.NEIGHBORS);
|
|
107
|
+
|
|
108
|
+
// Distance components
|
|
109
|
+
let distType = DISTANCE_TYPE.EUCLIDEAN;
|
|
110
|
+
const distTypeInput = ui.choiceInput(TITLE.DISTANCE, distType, [DISTANCE_TYPE.EUCLIDEAN, DISTANCE_TYPE.MANHATTAN],
|
|
111
|
+
() => distType = distTypeInput.value ?? DISTANCE_TYPE.EUCLIDEAN);
|
|
112
|
+
distTypeInput.setTooltip(HINT.DISTANCE);
|
|
113
|
+
|
|
114
|
+
// Target columns components (cols with missing values to be imputed)
|
|
115
|
+
let targetColNames = colsWithMissingVals.map((col) => col.name);
|
|
116
|
+
const targetColInput = ui.columnsInput(TITLE.COLUMNS, df, () => {
|
|
117
|
+
targetColNames = targetColInput.value.map((col) => col.name);
|
|
118
|
+
checkApplicability();
|
|
119
|
+
}, {available: availableTargetColsNames, checked: availableTargetColsNames});
|
|
120
|
+
targetColInput.setTooltip(HINT.TARGET);
|
|
121
|
+
|
|
122
|
+
// Feature columns components
|
|
123
|
+
let selectedFeatureColNames = availableFeatureColsNames as string[];
|
|
124
|
+
const featuresInput = ui.columnsInput(TITLE.FEATURES, df, () => {
|
|
125
|
+
selectedFeatureColNames = featuresInput.value.map((col) => col.name);
|
|
126
|
+
|
|
127
|
+
if (selectedFeatureColNames.length > 0) {
|
|
128
|
+
checkApplicability();
|
|
129
|
+
metricInfoInputs.forEach((div, name) => div.hidden = !selectedFeatureColNames.includes(name));
|
|
130
|
+
}
|
|
131
|
+
else
|
|
132
|
+
hideWidgets();
|
|
133
|
+
}, {available: availableFeatureColsNames, checked: availableFeatureColsNames});
|
|
134
|
+
featuresInput.setTooltip(HINT.FEATURES);
|
|
135
|
+
|
|
136
|
+
/** Hide widgets (use if run is not applicable) */
|
|
137
|
+
const hideWidgets = () => {
|
|
138
|
+
dlg.getButton(TITLE.RUN).disabled = true;
|
|
139
|
+
inPlaceInput.root.hidden = true;
|
|
140
|
+
keepEmptyInput.root.hidden = true;
|
|
141
|
+
neighborsInput.root.hidden = true;
|
|
142
|
+
distDiv.hidden = true;
|
|
143
|
+
metricsDiv.hidden = true;
|
|
144
|
+
};
|
|
145
|
+
|
|
146
|
+
/** Show widgets (use if run is applicable) */
|
|
147
|
+
const showWidgets = () => {
|
|
148
|
+
dlg.getButton(TITLE.RUN).disabled = false;
|
|
149
|
+
distDiv.hidden = false;
|
|
150
|
+
inPlaceInput.root.hidden = false;
|
|
151
|
+
neighborsInput.root.hidden = false;
|
|
152
|
+
distTypeInput.root.hidden = false;
|
|
153
|
+
keepEmptyInput.root.hidden = !areThereFails(targetColNames, selectedFeatureColNames, misValsInds);
|
|
154
|
+
};
|
|
155
|
+
|
|
156
|
+
/** Check applicability of the imputation */
|
|
157
|
+
const checkApplicability = () => {
|
|
158
|
+
showWidgets();
|
|
159
|
+
|
|
160
|
+
if (selectedFeatureColNames.length === 1) {
|
|
161
|
+
targetColNames.forEach((name) => {
|
|
162
|
+
if (selectedFeatureColNames[0] === name) {
|
|
163
|
+
hideWidgets();
|
|
164
|
+
grok.shell.warning(`${ERROR_MSG.ONE_FEATURE_SELECTED} the column '${name}'`);
|
|
165
|
+
}});
|
|
166
|
+
}
|
|
167
|
+
};
|
|
168
|
+
|
|
169
|
+
// Metrics components
|
|
170
|
+
const featuresMetrics = new Map<string, MetricInfo>();
|
|
171
|
+
const metricInfoInputs = new Map<string, HTMLDivElement>();
|
|
172
|
+
const metricsDiv = ui.divV([]);
|
|
173
|
+
metricsDiv.style.overflow = 'auto';
|
|
174
|
+
|
|
175
|
+
// Create metrics UI
|
|
176
|
+
availableFeatureColsNames.forEach((name) => {
|
|
177
|
+
// initialization
|
|
178
|
+
const type = df!.col(name)!.type as DG.COLUMN_TYPE;
|
|
179
|
+
const settings = getFeatureInputSettings(type);
|
|
180
|
+
featuresMetrics.set(name, {weight: settings.defaultWeight, type: settings.defaultMetric});
|
|
181
|
+
|
|
182
|
+
// distance input
|
|
183
|
+
const distTypeInput = ui.choiceInput(name, settings.defaultMetric, settings.availableMetrics, () => {
|
|
184
|
+
const distInfo = featuresMetrics.get(name) ?? {weight: settings.defaultWeight, type: settings.defaultMetric};
|
|
185
|
+
distInfo.type = distTypeInput.value ?? settings.defaultMetric;
|
|
186
|
+
featuresMetrics.set(name, distInfo);
|
|
187
|
+
});
|
|
188
|
+
distTypeInput.root.style.width = '50%';
|
|
189
|
+
distTypeInput.setTooltip(HINT.METRIC);
|
|
190
|
+
distTypeInput.root.hidden = true; // this input will be used further
|
|
191
|
+
|
|
192
|
+
// The following should provide a slider (see th bug https://reddata.atlassian.net/browse/GROK-14431)
|
|
193
|
+
// @ts-ignore
|
|
194
|
+
const prop = DG.Property.fromOptions({ "name": name, "inputType": "Float", min: 0, max: 10, "showSlider": true, "step": 1});
|
|
195
|
+
const weightInput = ui.input.forProperty(prop);
|
|
196
|
+
weightInput.value = settings.defaultWeight;
|
|
197
|
+
weightInput.onChanged(() => {
|
|
198
|
+
const distInfo = featuresMetrics.get(name) ?? {weight: settings.defaultWeight, type: settings.defaultMetric};
|
|
199
|
+
distInfo.weight = weightInput.value ?? settings.defaultWeight;
|
|
200
|
+
featuresMetrics.set(name, distInfo);
|
|
201
|
+
});
|
|
202
|
+
weightInput.setTooltip(HINT.WEIGHT);
|
|
203
|
+
|
|
204
|
+
const div = ui.divH([distTypeInput.root, weightInput.root]);
|
|
205
|
+
metricInfoInputs.set(name, div);
|
|
206
|
+
metricsDiv.append(div);
|
|
207
|
+
});
|
|
208
|
+
|
|
209
|
+
// The main dialog
|
|
210
|
+
const dlg = ui.dialog({title: TITLE.KNN_IMPUTER, helpUrl: KNN_IMPUTER});
|
|
211
|
+
grok.shell.v.root.appendChild(dlg.root);
|
|
212
|
+
|
|
213
|
+
metricsDiv.hidden = true;
|
|
214
|
+
keepEmptyInput.root.hidden = !areThereFails(targetColNames, selectedFeatureColNames, misValsInds);
|
|
215
|
+
|
|
216
|
+
// Icon showing/hiding metrics UI
|
|
217
|
+
const settingsIcon = ui.icons.settings(() => { metricsDiv.hidden = !metricsDiv.hidden;}, HINT.METRIC_SETTINGS);
|
|
218
|
+
|
|
219
|
+
const distDiv = ui.divH([distTypeInput.root, settingsIcon]);
|
|
220
|
+
|
|
221
|
+
dlg.addButton(TITLE.RUN, () => {
|
|
222
|
+
dlg.close();
|
|
223
|
+
availableFeatureColsNames.filter((name) => !selectedFeatureColNames.includes(name)).forEach((name) => featuresMetrics.delete(name));
|
|
224
|
+
|
|
225
|
+
try {
|
|
226
|
+
const failedToImpute = impute(df!, targetColNames, featuresMetrics, misValsInds, distType, neighbors, inPlace);
|
|
227
|
+
|
|
228
|
+
if (!keepEmpty)
|
|
229
|
+
imputeFailed(df!, failedToImpute);
|
|
230
|
+
}
|
|
231
|
+
catch (err) {
|
|
232
|
+
if (err instanceof Error)
|
|
233
|
+
grok.shell.error(`${ERROR_MSG.KNN_FAILS}: ${err.message}`);
|
|
234
|
+
else
|
|
235
|
+
grok.shell.error(`${ERROR_MSG.KNN_FAILS}: ${ERROR_MSG.CORE_ISSUE}`);
|
|
236
|
+
}
|
|
237
|
+
})
|
|
238
|
+
.add(targetColInput)
|
|
239
|
+
.add(featuresInput)
|
|
240
|
+
.add(distDiv)
|
|
241
|
+
.add(metricsDiv)
|
|
242
|
+
.add(neighborsInput)
|
|
243
|
+
.add(inPlaceInput)
|
|
244
|
+
.add(keepEmptyInput)
|
|
245
|
+
.show();
|
|
246
|
+
} // runKNNImputer
|
package/src/package-test.ts
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
import * as DG from
|
|
1
|
+
import * as DG from 'datagrok-api/dg';
|
|
2
2
|
import {runTests, tests, TestContext} from '@datagrok-libraries/utils/src/test';
|
|
3
3
|
|
|
4
|
-
export
|
|
4
|
+
export const _package = new DG.Package();
|
|
5
5
|
export {tests};
|
|
6
6
|
|
|
7
7
|
//name: test
|
package/src/package.ts
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
/* eslint-disable camelcase */
|
|
2
|
+
/* eslint-disable max-len */
|
|
1
3
|
/* Do not change these import lines to match external modules in webpack configuration */
|
|
2
4
|
import * as grok from 'datagrok-api/grok';
|
|
3
5
|
import * as ui from 'datagrok-api/ui';
|
|
@@ -7,22 +9,24 @@ import {DemoScript} from '@datagrok-libraries/tutorials/src/demo-script';
|
|
|
7
9
|
|
|
8
10
|
import {_initEDAAPI} from '../wasm/EDAAPI';
|
|
9
11
|
import {computePCA, computePLS} from './eda-tools';
|
|
10
|
-
import {addPrefixToEachColumnName, addPLSvisualization, regressionCoefficientsBarChart,
|
|
12
|
+
import {addPrefixToEachColumnName, addPLSvisualization, regressionCoefficientsBarChart,
|
|
11
13
|
scoresScatterPlot, predictedVersusReferenceScatterPlot, addOneWayAnovaVizualization} from './eda-ui';
|
|
12
14
|
import {carsDataframe, testDataForBinaryClassification} from './data-generators';
|
|
13
|
-
import {LINEAR, RBF, POLYNOMIAL, SIGMOID,
|
|
15
|
+
import {LINEAR, RBF, POLYNOMIAL, SIGMOID,
|
|
14
16
|
getTrainedModel, getPrediction, showTrainReport, getPackedModel} from './svm';
|
|
15
17
|
|
|
16
18
|
import {oneWayAnova} from './stat-tools';
|
|
17
|
-
import {
|
|
19
|
+
import {getDbscanWorker} from '@datagrok-libraries/math';
|
|
18
20
|
|
|
19
21
|
import {DistanceAggregationMethods} from '@datagrok-libraries/ml/src/distance-matrix/types';
|
|
20
22
|
import {MultiColumnDimReductionEditor} from
|
|
21
23
|
'@datagrok-libraries/ml/src/multi-column-dimensionality-reduction/multi-column-dim-reduction-editor';
|
|
22
24
|
import {multiColReduceDimensionality} from
|
|
23
25
|
'@datagrok-libraries/ml/src/multi-column-dimensionality-reduction/reduce-dimensionality';
|
|
24
|
-
import {
|
|
25
|
-
import {
|
|
26
|
+
import {KnownMetrics} from '@datagrok-libraries/ml/src/typed-metrics';
|
|
27
|
+
import {DimReductionMethods} from '@datagrok-libraries/ml/src/multi-column-dimensionality-reduction/types';
|
|
28
|
+
|
|
29
|
+
import {runKNNImputer} from './missing-values-imputation/ui';
|
|
26
30
|
|
|
27
31
|
export const _package = new DG.Package();
|
|
28
32
|
|
|
@@ -63,8 +67,7 @@ export async function dbScan(df: DG.DataFrame, xCol: DG.Column, yCol: DG.Column,
|
|
|
63
67
|
//input: bool scale = false [Indicating whether the variables should be scaled to have unit variance.]
|
|
64
68
|
//output: dataframe result {action:join(table)}
|
|
65
69
|
export async function PCA(table: DG.DataFrame, features: DG.ColumnList, components: number,
|
|
66
|
-
center: boolean, scale: boolean): Promise<DG.DataFrame>
|
|
67
|
-
{
|
|
70
|
+
center: boolean, scale: boolean): Promise<DG.DataFrame> {
|
|
68
71
|
const pcaTable = await computePCA(table, features, components, center, scale);
|
|
69
72
|
addPrefixToEachColumnName('PCA', pcaTable.columns);
|
|
70
73
|
return pcaTable;
|
|
@@ -120,9 +123,8 @@ export async function reduceDimensionality(): Promise<void> {
|
|
|
120
123
|
//input: column_list features {type: numerical}
|
|
121
124
|
//input: column predict {type: numerical}
|
|
122
125
|
//input: int components = 3
|
|
123
|
-
export async function PLS(table: DG.DataFrame, names: DG.Column, features: DG.ColumnList,
|
|
124
|
-
predict: DG.Column, components: number): Promise<void>
|
|
125
|
-
{
|
|
126
|
+
export async function PLS(table: DG.DataFrame, names: DG.Column, features: DG.ColumnList,
|
|
127
|
+
predict: DG.Column, components: number): Promise<void> {
|
|
126
128
|
const plsResults = await computePLS(table, features, predict, components);
|
|
127
129
|
addPLSvisualization(table, names, features, predict, plsResults);
|
|
128
130
|
}
|
|
@@ -131,17 +133,17 @@ export async function PLS(table: DG.DataFrame, names: DG.Column, features: DG.Co
|
|
|
131
133
|
//description: Multidimensional data analysis using partial least squares (PLS) regression. It reduces the predictors to a smaller set of uncorrelated components and performs least squares regression on them.
|
|
132
134
|
//meta.demoPath: Compute | Multivariate analysis
|
|
133
135
|
//meta.isDemoScript: True
|
|
134
|
-
export async function demoMultivariateAnalysis(): Promise<any>
|
|
135
|
-
const demoScript = new DemoScript('Partial least squares regression',
|
|
136
|
-
'Analysis of multidimensional data.');
|
|
137
|
-
|
|
136
|
+
export async function demoMultivariateAnalysis(): Promise<any> {
|
|
137
|
+
const demoScript = new DemoScript('Partial least squares regression',
|
|
138
|
+
'Analysis of multidimensional data.');
|
|
139
|
+
|
|
138
140
|
const cars = carsDataframe();
|
|
139
141
|
|
|
140
142
|
const components = 3;
|
|
141
143
|
const names = cars.columns.byName('model');
|
|
142
144
|
const predict = cars.columns.byName('price');
|
|
143
145
|
const features = cars.columns.remove('price').remove('model');
|
|
144
|
-
const plsOutput = await computePLS(cars, features, predict, components);
|
|
146
|
+
const plsOutput = await computePLS(cars, features, predict, components);
|
|
145
147
|
|
|
146
148
|
const sourceCars = carsDataframe();
|
|
147
149
|
sourceCars.name = 'Cars';
|
|
@@ -154,7 +156,7 @@ export async function demoMultivariateAnalysis(): Promise<any> {
|
|
|
154
156
|
view = grok.shell.getTableView(sourceCars.name);
|
|
155
157
|
}, {description: 'Each car has many features - patterns extraction is complicated.', delay: 0})
|
|
156
158
|
.step('Model', async () => {
|
|
157
|
-
dialog = ui.dialog({title:'Multivariate Analysis (PLS)'})
|
|
159
|
+
dialog = ui.dialog({title: 'Multivariate Analysis (PLS)'})
|
|
158
160
|
.add(ui.tableInput('Table', sourceCars))
|
|
159
161
|
.add(ui.columnsInput('Features', cars, features.toList, {available: undefined, checked: features.names()}))
|
|
160
162
|
.add(ui.columnInput('Names', cars, names, undefined))
|
|
@@ -165,16 +167,14 @@ export async function demoMultivariateAnalysis(): Promise<any> {
|
|
|
165
167
|
})
|
|
166
168
|
.show({x: 400, y: 140});
|
|
167
169
|
}, {description: 'Predict car price by its other features.', delay: 0})
|
|
168
|
-
.step('Regression coeffcicients', async () =>
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
.step('Scores', async () =>
|
|
174
|
-
{view.addViewer(scoresScatterPlot(names, plsOutput[2], plsOutput[3]))},
|
|
170
|
+
.step('Regression coeffcicients', async () => {
|
|
171
|
+
dialog.close();
|
|
172
|
+
view.addViewer(regressionCoefficientsBarChart(features, plsOutput[1]));
|
|
173
|
+
},
|
|
174
|
+
{description: 'The feature "diesel" affects the price the most.', delay: 0})
|
|
175
|
+
.step('Scores', async () => {view.addViewer(scoresScatterPlot(names, plsOutput[2], plsOutput[3]));},
|
|
175
176
|
{description: 'Similarities & dissimilarities: alfaromeo and mercedes are different.', delay: 0})
|
|
176
|
-
.step('Prediction', async () =>
|
|
177
|
-
{view.addViewer(predictedVersusReferenceScatterPlot(names, predict, plsOutput[0]))},
|
|
177
|
+
.step('Prediction', async () => {view.addViewer(predictedVersusReferenceScatterPlot(names, predict, plsOutput[0]));},
|
|
178
178
|
{description: 'Closer to the line means better price prediction.', delay: 0})
|
|
179
179
|
.start();
|
|
180
180
|
}
|
|
@@ -188,9 +188,8 @@ export async function demoMultivariateAnalysis(): Promise<any> {
|
|
|
188
188
|
//input: double max = 173 {caption: max; category: Range}
|
|
189
189
|
//input: double violatorsPercentage = 5 {caption: violators; units: %; category: Dataset}
|
|
190
190
|
//output: dataframe df
|
|
191
|
-
export async function testDataLinearSeparable(name: string, samplesCount: number, featuresCount: number,
|
|
192
|
-
min: number, max: number, violatorsPercentage: number): Promise<DG.DataFrame>
|
|
193
|
-
{
|
|
191
|
+
export async function testDataLinearSeparable(name: string, samplesCount: number, featuresCount: number,
|
|
192
|
+
min: number, max: number, violatorsPercentage: number): Promise<DG.DataFrame> {
|
|
194
193
|
return await testDataForBinaryClassification(LINEAR, [0, 0], name, samplesCount, featuresCount,
|
|
195
194
|
min, max, violatorsPercentage);
|
|
196
195
|
}
|
|
@@ -205,9 +204,8 @@ export async function testDataLinearSeparable(name: string, samplesCount: number
|
|
|
205
204
|
//input: double max = 173 {caption: max; category: Range}
|
|
206
205
|
//input: double violatorsPercentage = 5 {caption: violators; units: %; category: Dataset}
|
|
207
206
|
//output: dataframe df
|
|
208
|
-
export async function testDataLinearNonSeparable(name: string, sigma: number, samplesCount: number,
|
|
209
|
-
featuresCount: number, min: number, max: number, violatorsPercentage: number): Promise<DG.DataFrame>
|
|
210
|
-
{
|
|
207
|
+
export async function testDataLinearNonSeparable(name: string, sigma: number, samplesCount: number,
|
|
208
|
+
featuresCount: number, min: number, max: number, violatorsPercentage: number): Promise<DG.DataFrame> {
|
|
211
209
|
return await testDataForBinaryClassification(RBF, [sigma, 0], name, samplesCount, featuresCount,
|
|
212
210
|
min, max, violatorsPercentage);
|
|
213
211
|
}
|
|
@@ -220,10 +218,9 @@ export async function testDataLinearNonSeparable(name: string, sigma: number, sa
|
|
|
220
218
|
//input: double gamma = 1.0 {category: Hyperparameters}
|
|
221
219
|
//input: bool toShowReport = false {caption: to show report; category: Report}
|
|
222
220
|
//output: dynamic model
|
|
223
|
-
export async function trainLinearKernelSVM(df: DG.DataFrame, predict_column: string,
|
|
224
|
-
gamma: number, toShowReport: boolean): Promise<any>
|
|
225
|
-
{
|
|
226
|
-
const trainedModel = await getTrainedModel({gamma: gamma, kernel: LINEAR}, df, predict_column);
|
|
221
|
+
export async function trainLinearKernelSVM(df: DG.DataFrame, predict_column: string,
|
|
222
|
+
gamma: number, toShowReport: boolean): Promise<any> {
|
|
223
|
+
const trainedModel = await getTrainedModel({gamma: gamma, kernel: LINEAR}, df, predict_column);
|
|
227
224
|
|
|
228
225
|
if (toShowReport)
|
|
229
226
|
showTrainReport(df, trainedModel);
|
|
@@ -237,8 +234,8 @@ export async function trainLinearKernelSVM(df: DG.DataFrame, predict_column: str
|
|
|
237
234
|
//input: dataframe df
|
|
238
235
|
//input: dynamic model
|
|
239
236
|
//output: dataframe table
|
|
240
|
-
export async function applyLinearKernelSVM(df: DG.DataFrame, model: any): Promise<DG.DataFrame> {
|
|
241
|
-
return await getPrediction(df, model);
|
|
237
|
+
export async function applyLinearKernelSVM(df: DG.DataFrame, model: any): Promise<DG.DataFrame> {
|
|
238
|
+
return await getPrediction(df, model);
|
|
242
239
|
}
|
|
243
240
|
|
|
244
241
|
//name: trainRBFkernelSVM
|
|
@@ -250,12 +247,11 @@ export async function applyLinearKernelSVM(df: DG.DataFrame, model: any): Promis
|
|
|
250
247
|
//input: double sigma = 1.5 {category: Hyperparameters}
|
|
251
248
|
//input: bool toShowReport = false {caption: to show report; category: Report}
|
|
252
249
|
//output: dynamic model
|
|
253
|
-
export async function trainRBFkernelSVM(df: DG.DataFrame, predict_column: string,
|
|
254
|
-
gamma: number, sigma: number, toShowReport: boolean): Promise<any>
|
|
255
|
-
{
|
|
250
|
+
export async function trainRBFkernelSVM(df: DG.DataFrame, predict_column: string,
|
|
251
|
+
gamma: number, sigma: number, toShowReport: boolean): Promise<any> {
|
|
256
252
|
const trainedModel = await getTrainedModel(
|
|
257
|
-
{gamma: gamma, kernel: RBF, sigma: sigma},
|
|
258
|
-
df, predict_column);
|
|
253
|
+
{gamma: gamma, kernel: RBF, sigma: sigma},
|
|
254
|
+
df, predict_column);
|
|
259
255
|
|
|
260
256
|
if (toShowReport)
|
|
261
257
|
showTrainReport(df, trainedModel);
|
|
@@ -269,9 +265,9 @@ export async function trainRBFkernelSVM(df: DG.DataFrame, predict_column: string
|
|
|
269
265
|
//input: dataframe df
|
|
270
266
|
//input: dynamic model
|
|
271
267
|
//output: dataframe table
|
|
272
|
-
export async function applyRBFkernelSVM(df: DG.DataFrame, model: any): Promise<DG.DataFrame> {
|
|
273
|
-
return await getPrediction(df, model);
|
|
274
|
-
}
|
|
268
|
+
export async function applyRBFkernelSVM(df: DG.DataFrame, model: any): Promise<DG.DataFrame> {
|
|
269
|
+
return await getPrediction(df, model);
|
|
270
|
+
}
|
|
275
271
|
|
|
276
272
|
//name: trainPolynomialKernelSVM
|
|
277
273
|
//meta.mlname: polynomial kernel LS-SVM
|
|
@@ -283,12 +279,11 @@ export async function applyRBFkernelSVM(df: DG.DataFrame, model: any): Promise<D
|
|
|
283
279
|
//input: double d = 2 {category: Hyperparameters}
|
|
284
280
|
//input: bool toShowReport = false {caption: to show report; category: Report}
|
|
285
281
|
//output: dynamic model
|
|
286
|
-
export async function trainPolynomialKernelSVM(df: DG.DataFrame, predict_column: string,
|
|
287
|
-
gamma: number, c: number, d: number, toShowReport: boolean): Promise<any>
|
|
288
|
-
{
|
|
282
|
+
export async function trainPolynomialKernelSVM(df: DG.DataFrame, predict_column: string,
|
|
283
|
+
gamma: number, c: number, d: number, toShowReport: boolean): Promise<any> {
|
|
289
284
|
const trainedModel = await getTrainedModel(
|
|
290
|
-
{gamma: gamma, kernel: POLYNOMIAL, cParam: c, dParam: d},
|
|
291
|
-
df, predict_column);
|
|
285
|
+
{gamma: gamma, kernel: POLYNOMIAL, cParam: c, dParam: d},
|
|
286
|
+
df, predict_column);
|
|
292
287
|
|
|
293
288
|
if (toShowReport)
|
|
294
289
|
showTrainReport(df, trainedModel);
|
|
@@ -302,8 +297,8 @@ export async function trainPolynomialKernelSVM(df: DG.DataFrame, predict_column:
|
|
|
302
297
|
//input: dataframe df
|
|
303
298
|
//input: dynamic model
|
|
304
299
|
//output: dataframe table
|
|
305
|
-
export async function applyPolynomialKernelSVM(df: DG.DataFrame, model: any): Promise<DG.DataFrame> {
|
|
306
|
-
return await getPrediction(df, model);
|
|
300
|
+
export async function applyPolynomialKernelSVM(df: DG.DataFrame, model: any): Promise<DG.DataFrame> {
|
|
301
|
+
return await getPrediction(df, model);
|
|
307
302
|
}
|
|
308
303
|
|
|
309
304
|
//name: trainSigmoidKernelSVM
|
|
@@ -316,12 +311,11 @@ export async function applyPolynomialKernelSVM(df: DG.DataFrame, model: any): Pr
|
|
|
316
311
|
//input: double theta = 1 {category: Hyperparameters}
|
|
317
312
|
//input: bool toShowReport = false {caption: to show report; category: Report}
|
|
318
313
|
//output: dynamic model
|
|
319
|
-
export async function trainSigmoidKernelSVM(df: DG.DataFrame, predict_column: string,
|
|
320
|
-
gamma: number, kappa: number, theta: number, toShowReport: boolean): Promise<any>
|
|
321
|
-
{
|
|
314
|
+
export async function trainSigmoidKernelSVM(df: DG.DataFrame, predict_column: string,
|
|
315
|
+
gamma: number, kappa: number, theta: number, toShowReport: boolean): Promise<any> {
|
|
322
316
|
const trainedModel = await getTrainedModel(
|
|
323
|
-
{gamma: gamma, kernel: SIGMOID, kappa: kappa, theta: theta},
|
|
324
|
-
df, predict_column);
|
|
317
|
+
{gamma: gamma, kernel: SIGMOID, kappa: kappa, theta: theta},
|
|
318
|
+
df, predict_column);
|
|
325
319
|
|
|
326
320
|
if (toShowReport)
|
|
327
321
|
showTrainReport(df, trainedModel);
|
|
@@ -335,8 +329,8 @@ export async function trainSigmoidKernelSVM(df: DG.DataFrame, predict_column: st
|
|
|
335
329
|
//input: dataframe df
|
|
336
330
|
//input: dynamic model
|
|
337
331
|
//output: dataframe table
|
|
338
|
-
export async function applySigmoidKernelSVM(df: DG.DataFrame, model: any): Promise<DG.DataFrame> {
|
|
339
|
-
return await getPrediction(df, model);
|
|
332
|
+
export async function applySigmoidKernelSVM(df: DG.DataFrame, model: any): Promise<DG.DataFrame> {
|
|
333
|
+
return await getPrediction(df, model);
|
|
340
334
|
}
|
|
341
335
|
|
|
342
336
|
//top-menu: ML | Analyze | ANOVA...
|
|
@@ -349,5 +343,12 @@ export async function applySigmoidKernelSVM(df: DG.DataFrame, model: any): Promi
|
|
|
349
343
|
//input: bool validate = false [Indicates whether the normality of distribution and an eqaulity of varainces should be checked.]
|
|
350
344
|
export function anova(table: DG.DataFrame, factor: DG.Column, feature: DG.Column, significance: number, validate: boolean) {
|
|
351
345
|
const res = oneWayAnova(factor, feature, significance, validate);
|
|
352
|
-
addOneWayAnovaVizualization(table, factor, feature, res);
|
|
346
|
+
addOneWayAnovaVizualization(table, factor, feature, res);
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
//top-menu: ML | Missing Values Imputation ...
|
|
350
|
+
//name: KNN impute
|
|
351
|
+
//desription: Missing values imputation using the k-nearest neighbors method
|
|
352
|
+
export function kNNImputation() {
|
|
353
|
+
runKNNImputer();
|
|
353
354
|
}
|