@datagrok/eda 1.1.18 → 1.1.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +5 -0
- package/dist/523.js +2 -2
- package/dist/902.js +1 -1
- package/dist/972.js +2 -2
- package/dist/{191bd97af33c713bf78e.wasm → f5343e2c2e15952ce916.wasm} +0 -0
- package/dist/package.js +2 -2
- package/package.json +1 -1
- package/scripts/func.json +1 -1
- package/src/eda-tools.ts +1 -1
- package/src/package.ts +35 -60
- package/src/pls/pls-constants.ts +129 -0
- package/src/pls/pls-tools.ts +376 -0
- package/src/utils.ts +10 -2
- package/wasm/EDA.js +8 -1
- package/wasm/EDA.wasm +0 -0
- package/wasm/PLS/PLS.h +2 -1
- package/wasm/PLS/pls.cpp +3 -2
- package/wasm/plsExport.cpp +21 -16
|
@@ -0,0 +1,376 @@
|
|
|
1
|
+
// Tools for multivariate analysis by PLS
|
|
2
|
+
|
|
3
|
+
import * as grok from 'datagrok-api/grok';
|
|
4
|
+
import * as ui from 'datagrok-api/ui';
|
|
5
|
+
import * as DG from 'datagrok-api/dg';
|
|
6
|
+
|
|
7
|
+
import {PLS_ANALYSIS, ERROR_MSG, TITLE, HINT, LINK, COMPONENTS, INT, TIMEOUT,
|
|
8
|
+
RESULT_NAMES, WASM_OUTPUT_IDX, RADIUS, LINE_WIDTH, COLOR, X_COORD, Y_COORD,
|
|
9
|
+
DEMO_INTRO_MD, DEMO_RESULTS_MD, DELAY, DEMO_RESULTS} from './pls-constants';
|
|
10
|
+
import {checkWasmDimensionReducerInputs, checkColumnType, checkMissingVals} from '../utils';
|
|
11
|
+
import {_partialLeastSquareRegressionInWebWorker} from '../../wasm/EDAAPI';
|
|
12
|
+
import {carsDataframe} from '../data-generators';
|
|
13
|
+
|
|
14
|
+
const min = Math.min;
|
|
15
|
+
const max = Math.max;
|
|
16
|
+
|
|
17
|
+
/** PLS analysis results */
|
|
18
|
+
export type PlsOutput = {
|
|
19
|
+
prediction: DG.Column<DG.COLUMN_TYPE.FLOAT>,
|
|
20
|
+
regressionCoefficients: DG.Column<DG.COLUMN_TYPE.FLOAT>,
|
|
21
|
+
tScores: DG.Column<DG.COLUMN_TYPE.FLOAT>[],
|
|
22
|
+
uScores: DG.Column<DG.COLUMN_TYPE.FLOAT>[],
|
|
23
|
+
xLoadings: DG.Column<DG.COLUMN_TYPE.FLOAT>[],
|
|
24
|
+
yLoadings: DG.Column<DG.COLUMN_TYPE.FLOAT>,
|
|
25
|
+
};
|
|
26
|
+
|
|
27
|
+
/** PLS analysis input */
|
|
28
|
+
export type PlsInput = {
|
|
29
|
+
table: DG.DataFrame,
|
|
30
|
+
features: DG.ColumnList,
|
|
31
|
+
predict: DG.Column,
|
|
32
|
+
components: number,
|
|
33
|
+
names : DG.Column | null,
|
|
34
|
+
};
|
|
35
|
+
|
|
36
|
+
/** Partial least square regression (PLS) */
|
|
37
|
+
export async function getPlsAnalysis(input: PlsInput): Promise<PlsOutput> {
|
|
38
|
+
checkWasmDimensionReducerInputs(input.features, input.components);
|
|
39
|
+
|
|
40
|
+
// Check the responce column
|
|
41
|
+
checkColumnType(input.predict);
|
|
42
|
+
checkMissingVals(input.predict);
|
|
43
|
+
|
|
44
|
+
const result = await _partialLeastSquareRegressionInWebWorker(
|
|
45
|
+
input.table,
|
|
46
|
+
input.features,
|
|
47
|
+
input.predict,
|
|
48
|
+
input.components,
|
|
49
|
+
);
|
|
50
|
+
|
|
51
|
+
return {
|
|
52
|
+
prediction: result[WASM_OUTPUT_IDX.PREDICTION],
|
|
53
|
+
regressionCoefficients: result[WASM_OUTPUT_IDX.REGR_COEFFS],
|
|
54
|
+
tScores: result[WASM_OUTPUT_IDX.T_SCORES],
|
|
55
|
+
uScores: result[WASM_OUTPUT_IDX.U_SCORES],
|
|
56
|
+
xLoadings: result[WASM_OUTPUT_IDX.X_LOADINGS],
|
|
57
|
+
yLoadings: result[WASM_OUTPUT_IDX.Y_LOADINGS],
|
|
58
|
+
};
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
/** Perform multivariate analysis using the PLS regression */
|
|
62
|
+
async function performMVA(input: PlsInput, analysisType: PLS_ANALYSIS): Promise<void> {
|
|
63
|
+
const result = await getPlsAnalysis(input);
|
|
64
|
+
|
|
65
|
+
const plsCols = result.tScores;
|
|
66
|
+
const cols = input.table.columns;
|
|
67
|
+
const featuresNames = input.features.names();
|
|
68
|
+
const prefix = (analysisType === PLS_ANALYSIS.COMPUTE_COMPONENTS) ? RESULT_NAMES.PREFIX : TITLE.XSCORE;
|
|
69
|
+
|
|
70
|
+
// add PLS components to the table
|
|
71
|
+
plsCols.forEach((col, idx) => {
|
|
72
|
+
col.name = cols.getUnusedName(`${prefix}${idx + 1}`);
|
|
73
|
+
cols.add(col);
|
|
74
|
+
});
|
|
75
|
+
|
|
76
|
+
if (analysisType === PLS_ANALYSIS.COMPUTE_COMPONENTS)
|
|
77
|
+
return;
|
|
78
|
+
|
|
79
|
+
const view = grok.shell.tableView(input.table.name);
|
|
80
|
+
|
|
81
|
+
// 0.1 Buffer table
|
|
82
|
+
const buffer = DG.DataFrame.fromColumns([
|
|
83
|
+
DG.Column.fromStrings(TITLE.FEATURE, featuresNames),
|
|
84
|
+
result.regressionCoefficients,
|
|
85
|
+
]);
|
|
86
|
+
|
|
87
|
+
// 0.2. Add X-Loadings
|
|
88
|
+
result.xLoadings.forEach((col, idx) => {
|
|
89
|
+
col.name = buffer.columns.getUnusedName(`${TITLE.XLOADING}${idx + 1}`);
|
|
90
|
+
buffer.columns.add(col);
|
|
91
|
+
});
|
|
92
|
+
|
|
93
|
+
// 1. Predicted vs Reference scatter plot
|
|
94
|
+
const pred = result.prediction;
|
|
95
|
+
pred.name = cols.getUnusedName(`${input.predict.name} ${RESULT_NAMES.SUFFIX}`);
|
|
96
|
+
cols.add(pred);
|
|
97
|
+
const predictVsReferScatter = view.addViewer(DG.Viewer.scatterPlot(input.table, {
|
|
98
|
+
title: TITLE.MODEL,
|
|
99
|
+
xColumnName: input.predict.name,
|
|
100
|
+
yColumnName: pred.name,
|
|
101
|
+
showRegressionLine: true,
|
|
102
|
+
markerType: DG.MARKER_TYPE.CIRCLE,
|
|
103
|
+
labels: input.names?.name,
|
|
104
|
+
help: LINK.MODEL,
|
|
105
|
+
}));
|
|
106
|
+
|
|
107
|
+
// 2. Regression Coefficients Bar Chart
|
|
108
|
+
result.regressionCoefficients.name = TITLE.REGR_COEFS;
|
|
109
|
+
const regrCoeffsBar = view.addViewer(DG.Viewer.barChart(buffer, {
|
|
110
|
+
title: TITLE.REGR_COEFS,
|
|
111
|
+
splitColumnName: TITLE.FEATURE,
|
|
112
|
+
valueColumnName: result.regressionCoefficients.name,
|
|
113
|
+
valueAggrType: DG.AGG.AVG,
|
|
114
|
+
help: LINK.COEFFS,
|
|
115
|
+
showValueSelector: false,
|
|
116
|
+
showStackSelector: false,
|
|
117
|
+
}));
|
|
118
|
+
|
|
119
|
+
// 3. Loadings Scatter Plot
|
|
120
|
+
result.xLoadings.forEach((col, idx) => col.name = `${TITLE.XLOADING}${idx + 1}`);
|
|
121
|
+
const loadingsScatter = view.addViewer(DG.Viewer.scatterPlot(buffer, {
|
|
122
|
+
title: TITLE.LOADINGS,
|
|
123
|
+
xColumnName: `${TITLE.XLOADING}1`,
|
|
124
|
+
yColumnName: `${TITLE.XLOADING}${result.xLoadings.length > 1 ? '2' : '1'}`,
|
|
125
|
+
markerType: DG.MARKER_TYPE.CIRCLE,
|
|
126
|
+
labels: TITLE.FEATURE,
|
|
127
|
+
help: LINK.LOADINGS,
|
|
128
|
+
}));
|
|
129
|
+
|
|
130
|
+
// 4. Scores Scatter Plot
|
|
131
|
+
|
|
132
|
+
// 4.1) data
|
|
133
|
+
const scoreNames = plsCols.map((col) => col.name);
|
|
134
|
+
result.uScores.forEach((col, idx) => {
|
|
135
|
+
col.name = cols.getUnusedName(`${TITLE.YSCORE}${idx + 1}`);
|
|
136
|
+
cols.add(col);
|
|
137
|
+
scoreNames.push(col.name);
|
|
138
|
+
});
|
|
139
|
+
|
|
140
|
+
// 4.2) create scatter
|
|
141
|
+
const scoresScatter = DG.Viewer.scatterPlot(input.table, {
|
|
142
|
+
title: TITLE.SCORES,
|
|
143
|
+
xColumnName: plsCols[0].name,
|
|
144
|
+
yColumnName: (plsCols.length > 1) ? plsCols[1].name : result.uScores[0],
|
|
145
|
+
markerType: DG.MARKER_TYPE.CIRCLE,
|
|
146
|
+
labels: input.names?.name,
|
|
147
|
+
help: LINK.SCORES,
|
|
148
|
+
showViewerFormulaLines: true,
|
|
149
|
+
});
|
|
150
|
+
|
|
151
|
+
// 4.3) create lines & circles
|
|
152
|
+
const lines = [] as DG.FormulaLine[];
|
|
153
|
+
|
|
154
|
+
const addLine = (formula: string, radius: number) => {
|
|
155
|
+
lines.push({
|
|
156
|
+
type: 'line',
|
|
157
|
+
formula: formula,
|
|
158
|
+
width: LINE_WIDTH,
|
|
159
|
+
visible: true,
|
|
160
|
+
title: ' ',
|
|
161
|
+
min: -radius,
|
|
162
|
+
max: radius,
|
|
163
|
+
color: COLOR.CIRCLE,
|
|
164
|
+
})};
|
|
165
|
+
|
|
166
|
+
scoreNames.forEach((xName) => {
|
|
167
|
+
const x = '${' + xName + '}';
|
|
168
|
+
lines.push({type: 'line', formula: `${x} = 0`, width: LINE_WIDTH, visible: true, title: ' ', color: COLOR.AXIS});
|
|
169
|
+
|
|
170
|
+
scoreNames.forEach((yName) => {
|
|
171
|
+
const y = '${' + yName + '}';
|
|
172
|
+
|
|
173
|
+
RADIUS.forEach((r) => {
|
|
174
|
+
addLine(y + ` = sqrt(${r*r} - ${x} * ${x})`, r);
|
|
175
|
+
addLine(y + ` = -sqrt(${r*r} - ${x} * ${x})`, r);
|
|
176
|
+
});
|
|
177
|
+
});
|
|
178
|
+
});
|
|
179
|
+
|
|
180
|
+
scoresScatter.meta.formulaLines.addAll(lines);
|
|
181
|
+
view.addViewer(scoresScatter);
|
|
182
|
+
|
|
183
|
+
// 5. Explained Variances
|
|
184
|
+
|
|
185
|
+
// 5.1) computation, source: the paper https://doi.org/10.1002/cem.2589
|
|
186
|
+
// here, we use notations from this paper
|
|
187
|
+
const q = result.yLoadings.getRawData();
|
|
188
|
+
const p = result.xLoadings.map((col) => col.getRawData());
|
|
189
|
+
const n = input.table.rowCount;
|
|
190
|
+
const m = featuresNames.length;
|
|
191
|
+
const A = input.components;
|
|
192
|
+
const yExplVars = new Float32Array(A);
|
|
193
|
+
const compNames = [] as string[];
|
|
194
|
+
const xExplVars: Float32Array[] = [];
|
|
195
|
+
for (let i = 0; i < m; ++i)
|
|
196
|
+
xExplVars.push(new Float32Array(A));
|
|
197
|
+
|
|
198
|
+
yExplVars[0] = q[0]**2 / n;
|
|
199
|
+
compNames.push(`1 ${RESULT_NAMES.COMP}`);
|
|
200
|
+
xExplVars.forEach((arr, idx) => {arr[0] = p[0][idx]**2 / n;});
|
|
201
|
+
|
|
202
|
+
for (let comp = 1; comp < A; ++comp) {
|
|
203
|
+
yExplVars[comp] = yExplVars[comp - 1] + q[comp]**2 / n;
|
|
204
|
+
xExplVars.forEach((arr, idx) => arr[comp] = arr[comp - 1] + p[comp][idx]**2 / n);
|
|
205
|
+
compNames.push(`${comp + 1} ${RESULT_NAMES.COMPS}`);
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
// 5.2) create df
|
|
209
|
+
const explVarsDF = DG.DataFrame.fromColumns([
|
|
210
|
+
DG.Column.fromStrings(TITLE.COMPONENTS, compNames),
|
|
211
|
+
DG.Column.fromFloat32Array(input.predict.name, yExplVars),
|
|
212
|
+
]);
|
|
213
|
+
|
|
214
|
+
xExplVars.forEach((arr, idx) => explVarsDF.columns.add(DG.Column.fromFloat32Array(featuresNames[idx], arr)));
|
|
215
|
+
|
|
216
|
+
// 5.3) bar chart
|
|
217
|
+
const explVarsBar = view.addViewer(DG.Viewer.barChart(explVarsDF, {
|
|
218
|
+
title: TITLE.EXPL_VAR,
|
|
219
|
+
splitColumnName: TITLE.COMPONENTS,
|
|
220
|
+
valueColumnName: input.predict.name,
|
|
221
|
+
valueAggrType: DG.AGG.AVG,
|
|
222
|
+
help: LINK.EXPL_VARS,
|
|
223
|
+
showCategorySelector: false,
|
|
224
|
+
showStackSelector: false,
|
|
225
|
+
}));
|
|
226
|
+
|
|
227
|
+
// emphasize viewers in the demo case
|
|
228
|
+
if (analysisType === PLS_ANALYSIS.DEMO) {
|
|
229
|
+
const pages = [predictVsReferScatter, scoresScatter, loadingsScatter, regrCoeffsBar, explVarsBar].map((viewer, idx) => {
|
|
230
|
+
return {
|
|
231
|
+
text: DEMO_RESULTS[idx].text,
|
|
232
|
+
showNextTo: viewer.root,
|
|
233
|
+
}
|
|
234
|
+
});
|
|
235
|
+
|
|
236
|
+
const wizard = ui.hints.addTextHint({title: TITLE.EXPLORE, pages: pages});
|
|
237
|
+
wizard.helpUrl = LINK.MVA;
|
|
238
|
+
grok.shell.windows.help.showHelp(ui.markdown(DEMO_RESULTS_MD));
|
|
239
|
+
}
|
|
240
|
+
} // performMVA
|
|
241
|
+
|
|
242
|
+
/** Run multivariate analysis (PLS) */
|
|
243
|
+
export async function runMVA(analysisType: PLS_ANALYSIS): Promise<void> {
|
|
244
|
+
const table = grok.shell.t;
|
|
245
|
+
|
|
246
|
+
if (table === null) {
|
|
247
|
+
grok.shell.warning(ERROR_MSG.NO_DF);
|
|
248
|
+
return;
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
if (table.rowCount === 0) {
|
|
252
|
+
grok.shell.warning(ERROR_MSG.EMPTY_DF);
|
|
253
|
+
return;
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
const numColNames = [] as string[];
|
|
257
|
+
const numCols = [] as DG.Column[];
|
|
258
|
+
const strCols = [] as DG.Column[];
|
|
259
|
+
|
|
260
|
+
const isValidNumeric = (col: DG.Column) =>
|
|
261
|
+
((col.type === DG.COLUMN_TYPE.INT) || (col.type === DG.COLUMN_TYPE.FLOAT)) &&
|
|
262
|
+
(col.stats.missingValueCount === 0);
|
|
263
|
+
|
|
264
|
+
table.columns.toList().forEach((col) => {
|
|
265
|
+
if (isValidNumeric(col)) {
|
|
266
|
+
numColNames.push(col.name);
|
|
267
|
+
numCols.push(col);
|
|
268
|
+
} else if (col.type === DG.COLUMN_TYPE.STRING)
|
|
269
|
+
strCols.push(col);
|
|
270
|
+
});
|
|
271
|
+
|
|
272
|
+
if (numColNames.length === 0) {
|
|
273
|
+
grok.shell.warning(ERROR_MSG.NO_COLS);
|
|
274
|
+
return;
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
if (numColNames.length === 1) {
|
|
278
|
+
grok.shell.warning(ERROR_MSG.ONE_COL);
|
|
279
|
+
return;
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
// responce (to predict)
|
|
283
|
+
let predict = numCols[numCols.length - 1];
|
|
284
|
+
const predictInput = ui.columnInput(TITLE.PREDICT, table, predict, () => {
|
|
285
|
+
predict = predictInput.value!;
|
|
286
|
+
updateIputs();
|
|
287
|
+
},
|
|
288
|
+
{filter: (col: DG.Column) => isValidNumeric(col)},
|
|
289
|
+
);
|
|
290
|
+
predictInput.setTooltip(HINT.PREDICT);
|
|
291
|
+
|
|
292
|
+
// predictors (features)
|
|
293
|
+
let features: DG.Column[];
|
|
294
|
+
const featuresInput = ui.columnsInput(TITLE.USING, table, () => {}, {available: numColNames});
|
|
295
|
+
featuresInput.onInput(() => updateIputs());
|
|
296
|
+
featuresInput.setTooltip(HINT.FEATURES);
|
|
297
|
+
|
|
298
|
+
// components count
|
|
299
|
+
let components = min(numColNames.length - 1, COMPONENTS.DEFAULT as number);
|
|
300
|
+
const componentsInput = ui.input.forProperty(DG.Property.fromOptions({
|
|
301
|
+
name: TITLE.COMPONENTS,
|
|
302
|
+
inputType: INT,
|
|
303
|
+
defaultValue: components,
|
|
304
|
+
//@ts-ignore
|
|
305
|
+
showPlusMinus: true,
|
|
306
|
+
min: COMPONENTS.MIN,
|
|
307
|
+
}));
|
|
308
|
+
componentsInput.onInput(() => updateIputs());
|
|
309
|
+
componentsInput.setTooltip(HINT.COMPONENTS);
|
|
310
|
+
|
|
311
|
+
let dlgTitle: string;
|
|
312
|
+
let dlgHelpUrl: string;
|
|
313
|
+
let dlgRunBtnTooltip: string;
|
|
314
|
+
|
|
315
|
+
if (analysisType === PLS_ANALYSIS.COMPUTE_COMPONENTS) {
|
|
316
|
+
dlgTitle = TITLE.PLS;
|
|
317
|
+
dlgHelpUrl = LINK.PLS;
|
|
318
|
+
dlgRunBtnTooltip = HINT.PLS;
|
|
319
|
+
} else {
|
|
320
|
+
dlgTitle = TITLE.MVA;
|
|
321
|
+
dlgHelpUrl = LINK.MVA;
|
|
322
|
+
dlgRunBtnTooltip = HINT.MVA;
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
const updateIputs = () => {
|
|
326
|
+
featuresInput.value = featuresInput.value.filter((col) => col !== predict);
|
|
327
|
+
features = featuresInput.value;
|
|
328
|
+
|
|
329
|
+
componentsInput.value = min(max(componentsInput.value ?? components, COMPONENTS.MIN), features.length);
|
|
330
|
+
components = componentsInput.value;
|
|
331
|
+
|
|
332
|
+
dlg.getButton(TITLE.RUN).disabled = (features.length === 0) || (components <= 0);
|
|
333
|
+
};
|
|
334
|
+
|
|
335
|
+
// names of samples
|
|
336
|
+
let names = (strCols.length > 0) ? strCols[0] : null;
|
|
337
|
+
const namesInputs = ui.columnInput(TITLE.NAMES, table, names, () => names = predictInput.value,
|
|
338
|
+
{filter: (col: DG.Column) => col.type === DG.COLUMN_TYPE.STRING},
|
|
339
|
+
);
|
|
340
|
+
namesInputs.setTooltip(HINT.NAMES);
|
|
341
|
+
namesInputs.root.hidden = (strCols.length === 0) || (analysisType === PLS_ANALYSIS.COMPUTE_COMPONENTS);
|
|
342
|
+
|
|
343
|
+
const dlg = ui.dialog({title: dlgTitle, helpUrl: dlgHelpUrl})
|
|
344
|
+
.add(ui.form([predictInput, featuresInput, componentsInput, namesInputs]))
|
|
345
|
+
.addButton(TITLE.RUN, async () => {
|
|
346
|
+
dlg.close();
|
|
347
|
+
|
|
348
|
+
await performMVA({
|
|
349
|
+
table: table,
|
|
350
|
+
features: DG.DataFrame.fromColumns(features).columns,
|
|
351
|
+
predict: predict,
|
|
352
|
+
components: components,
|
|
353
|
+
names: names,
|
|
354
|
+
}, analysisType);
|
|
355
|
+
}, undefined, dlgRunBtnTooltip)
|
|
356
|
+
.show({x: X_COORD, y: Y_COORD});
|
|
357
|
+
|
|
358
|
+
// the following delay provides correct styles (see https://reddata.atlassian.net/browse/GROK-15196)
|
|
359
|
+
setTimeout(() => {
|
|
360
|
+
featuresInput.value = numCols.filter((col) => col !== predict);
|
|
361
|
+
features = featuresInput.value;
|
|
362
|
+
}, TIMEOUT);
|
|
363
|
+
|
|
364
|
+
grok.shell.v.append(dlg.root);
|
|
365
|
+
} // runMVA
|
|
366
|
+
|
|
367
|
+
/** Run multivariate analysis demo */
|
|
368
|
+
export async function runDemoMVA(): Promise<void> {
|
|
369
|
+
grok.shell.addTableView(carsDataframe());
|
|
370
|
+
grok.shell.windows.help.visible = true;
|
|
371
|
+
grok.shell.windows.help.showHelp(ui.markdown(DEMO_INTRO_MD));
|
|
372
|
+
grok.shell.windows.showContextPanel = false;
|
|
373
|
+
grok.shell.windows.showProperties = false;
|
|
374
|
+
|
|
375
|
+
await runMVA(PLS_ANALYSIS.DEMO);
|
|
376
|
+
}
|
package/src/utils.ts
CHANGED
|
@@ -32,12 +32,18 @@ const INCORRECT_STEPS_MES = 'steps must be non-negative.';
|
|
|
32
32
|
const INCORRECT_CYCLES_MES = 'cycles must be positive.';
|
|
33
33
|
const INCORRECT_CUTOFF_MES = 'cutoff must be non-negative.';
|
|
34
34
|
|
|
35
|
-
|
|
35
|
+
/** Check column type */
|
|
36
36
|
export function checkColumnType(col: DG.Column): void {
|
|
37
37
|
if ((col.type != DG.COLUMN_TYPE.FLOAT) && (col.type != DG.COLUMN_TYPE.INT))
|
|
38
38
|
throw new Error(UNSUPPORTED_COLUMN_TYPE_MES + col.type);
|
|
39
39
|
}
|
|
40
40
|
|
|
41
|
+
/** Check missing values */
|
|
42
|
+
export function checkMissingVals(col: DG.Column): void {
|
|
43
|
+
if (col.stats.missingValueCount > 0 )
|
|
44
|
+
throw new Error(`The column '${col.name}' has missing values.`);
|
|
45
|
+
}
|
|
46
|
+
|
|
41
47
|
// Check dimension reducer inputs
|
|
42
48
|
export function checkDimensionReducerInputs(features: DG.ColumnList, components: number): void {
|
|
43
49
|
if (components < COMP_MIN)
|
|
@@ -46,8 +52,10 @@ export function checkDimensionReducerInputs(features: DG.ColumnList, components:
|
|
|
46
52
|
if (components > features.length)
|
|
47
53
|
throw new Error(COMP_EXCESS);
|
|
48
54
|
|
|
49
|
-
for (const col of features)
|
|
55
|
+
for (const col of features) {
|
|
50
56
|
checkColumnType(col);
|
|
57
|
+
checkMissingVals(col);
|
|
58
|
+
}
|
|
51
59
|
}
|
|
52
60
|
|
|
53
61
|
// Check UMAP inputs
|
package/wasm/EDA.js
CHANGED
|
@@ -125,11 +125,18 @@ var partialLeastSquareRegression = {
|
|
|
125
125
|
ref: 'componentsCount',
|
|
126
126
|
value: 'data'
|
|
127
127
|
}
|
|
128
|
+
},
|
|
129
|
+
yLoadings: {
|
|
130
|
+
type: 'newFloatColumn',
|
|
131
|
+
numOfRows: {
|
|
132
|
+
ref: 'componentsCount',
|
|
133
|
+
value: 'data'
|
|
134
|
+
}
|
|
128
135
|
}
|
|
129
136
|
},
|
|
130
137
|
output: {
|
|
131
138
|
type: 'objects',
|
|
132
|
-
source: ['prediction', 'regressionCoefficients', 'tScores', 'uScores', 'xLoadings']
|
|
139
|
+
source: ['prediction', 'regressionCoefficients', 'tScores', 'uScores', 'xLoadings', 'yLoadings']
|
|
133
140
|
}
|
|
134
141
|
}; // partialLeastSquareRegression
|
|
135
142
|
|
package/wasm/EDA.wasm
CHANGED
|
Binary file
|
package/wasm/PLS/PLS.h
CHANGED
package/wasm/PLS/pls.cpp
CHANGED
|
@@ -33,7 +33,8 @@ int pls::partialLeastSquareExtended(Float * predictorColumnsDataPtr,
|
|
|
33
33
|
Float * regressionCoefficientsPtr,
|
|
34
34
|
Float * predictorScoresPtr,
|
|
35
35
|
Float * responceScoresPtr,
|
|
36
|
-
Float * predictorLoadingsPtr
|
|
36
|
+
Float * predictorLoadingsPtr,
|
|
37
|
+
Float * responceLoadingsPtr) noexcept
|
|
37
38
|
{
|
|
38
39
|
// check correctness of arguments
|
|
39
40
|
if (componentsCount <= 0 || componentsCount > columnCount)
|
|
@@ -108,7 +109,7 @@ int pls::partialLeastSquareExtended(Float * predictorColumnsDataPtr,
|
|
|
108
109
|
Map<Matrix<Float, Dynamic, Dynamic, ColMajor>> U(responceScoresPtr, rowCount, componentsCount);
|
|
109
110
|
|
|
110
111
|
// Y-loadings, q
|
|
111
|
-
Vector<Float, Dynamic
|
|
112
|
+
Map<Vector<Float, Dynamic>> q(responceLoadingsPtr, componentsCount);
|
|
112
113
|
|
|
113
114
|
// PLS1 routine auxiliry vectors
|
|
114
115
|
Vector<Float, Dynamic> normTau(componentsCount);
|
package/wasm/plsExport.cpp
CHANGED
|
@@ -30,7 +30,9 @@ extern "C" {
|
|
|
30
30
|
int predictionScoresColumnsColumnCount,
|
|
31
31
|
float * predictionLoadingsColumns,
|
|
32
32
|
int predictionLoadingsColumnsRowCount,
|
|
33
|
-
int predictionLoadingsColumnsColumnCount
|
|
33
|
+
int predictionLoadingsColumnsColumnCount,
|
|
34
|
+
float * yLoadingsColumn,
|
|
35
|
+
int yLoadingsColumnLength);
|
|
34
36
|
}
|
|
35
37
|
|
|
36
38
|
#include "PLS\PLS.h"
|
|
@@ -45,30 +47,33 @@ extern "C" {
|
|
|
45
47
|
//output: column_list tScores [new(predict.rowCount, componentsCount)]
|
|
46
48
|
//output: column_list uScores [new(predict.rowCount, componentsCount)]
|
|
47
49
|
//output: column_list xLoadings [new(features.columnCount, componentsCount)]
|
|
50
|
+
//output: column yLoadings [new(componentsCount)]
|
|
48
51
|
EMSCRIPTEN_KEEPALIVE
|
|
49
|
-
int partialLeastSquareRegression(float *
|
|
52
|
+
int partialLeastSquareRegression(float * featuresColumns,
|
|
50
53
|
int rowCount,
|
|
51
54
|
int columnCount,
|
|
52
|
-
float *
|
|
53
|
-
int
|
|
55
|
+
float * predictColumn,
|
|
56
|
+
int predictColumnLength,
|
|
54
57
|
int componentsCount,
|
|
55
58
|
float * predictionColumn,
|
|
56
59
|
int predictionColumnLength,
|
|
57
60
|
float * regressionCoefficients,
|
|
58
61
|
int regressionCoefficientsLength,
|
|
59
|
-
float *
|
|
60
|
-
int
|
|
61
|
-
int
|
|
62
|
-
float *
|
|
63
|
-
int
|
|
64
|
-
int
|
|
65
|
-
float *
|
|
66
|
-
int
|
|
67
|
-
int
|
|
62
|
+
float * tScoresColumns,
|
|
63
|
+
int tScoresColumnsRowCount,
|
|
64
|
+
int tScoresColumnsColumnCount,
|
|
65
|
+
float * uScoresColumns,
|
|
66
|
+
int uScoresColumnsRowCount,
|
|
67
|
+
int uScoresColumnsColumnCount,
|
|
68
|
+
float * xLoadingsColumns,
|
|
69
|
+
int xLoadingsColumnsRowCount,
|
|
70
|
+
int xLoadingsColumnsColumnCount,
|
|
71
|
+
float * yLoadingsColumn,
|
|
72
|
+
int yLoadingsColumnLength)
|
|
68
73
|
{
|
|
69
|
-
return pls::partialLeastSquareExtended(
|
|
70
|
-
|
|
71
|
-
|
|
74
|
+
return pls::partialLeastSquareExtended(featuresColumns, rowCount, columnCount,
|
|
75
|
+
predictColumn, componentsCount, predictionColumn, regressionCoefficients,
|
|
76
|
+
tScoresColumns, uScoresColumns, xLoadingsColumns, yLoadingsColumn);
|
|
72
77
|
}
|
|
73
78
|
|
|
74
79
|
|