@datagrok/eda 1.1.29 → 1.1.30
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +4 -0
- package/README.md +2 -1
- package/dist/package.js +1 -1
- package/dist/package.js.map +1 -1
- package/package.json +1 -1
- package/src/package.ts +66 -0
- package/src/pls/pls-constants.ts +1 -0
- package/src/pls/pls-ml.ts +376 -0
- package/src/pls/pls-tools.ts +41 -33
- package/src/regression.ts +1 -1
package/package.json
CHANGED
package/src/package.ts
CHANGED
|
@@ -31,6 +31,7 @@ import {markovCluster} from '@datagrok-libraries/ml/src/MCL/clustering-view';
|
|
|
31
31
|
import {MCL_OPTIONS_TAG, MCLSerializableOptions} from '@datagrok-libraries/ml/src/MCL';
|
|
32
32
|
|
|
33
33
|
import {getLinearRegressionParams, getPredictionByLinearRegression} from './regression';
|
|
34
|
+
import {PlsModel} from './pls/pls-ml';
|
|
34
35
|
import {SoftmaxClassifier} from './softmax-classifier';
|
|
35
36
|
|
|
36
37
|
export const _package = new DG.Package();
|
|
@@ -668,3 +669,68 @@ export function isApplicableSoftmax(df: DG.DataFrame, predictColumn: DG.Column):
|
|
|
668
669
|
export function isInteractiveSoftmax(df: DG.DataFrame, predictColumn: DG.Column): boolean {
|
|
669
670
|
return SoftmaxClassifier.isInteractive(df.columns, predictColumn);
|
|
670
671
|
}
|
|
672
|
+
|
|
673
|
+
//name: trainPLSRegression
|
|
674
|
+
//meta.mlname: PLS Regression
|
|
675
|
+
//meta.mlrole: train
|
|
676
|
+
//input: dataframe df
|
|
677
|
+
//input: column predictColumn
|
|
678
|
+
//input: int components = 3 {min: 1; max: 10} [Number of latent components]
|
|
679
|
+
//output: dynamic model
|
|
680
|
+
export async function trainPLSRegression(df: DG.DataFrame, predictColumn: DG.Column, components: number): Promise<Uint8Array> {
|
|
681
|
+
const features = df.columns;
|
|
682
|
+
|
|
683
|
+
if (components > features.length)
|
|
684
|
+
throw new Error('Number of components is greater than features count');
|
|
685
|
+
|
|
686
|
+
const model = new PlsModel();
|
|
687
|
+
await model.fit(features, predictColumn, components);
|
|
688
|
+
|
|
689
|
+
return model.toBytes();
|
|
690
|
+
}
|
|
691
|
+
|
|
692
|
+
//name: applyPLSRegression
|
|
693
|
+
//meta.mlname: PLS Regression
|
|
694
|
+
//meta.mlrole: apply
|
|
695
|
+
//input: dataframe df
|
|
696
|
+
//input: dynamic model
|
|
697
|
+
//output: dataframe table
|
|
698
|
+
export function applyPLSRegression(df: DG.DataFrame, model: any): DG.DataFrame {
|
|
699
|
+
const unpackedModel = new PlsModel(model);
|
|
700
|
+
return DG.DataFrame.fromColumns([unpackedModel.predict(df.columns)]);
|
|
701
|
+
}
|
|
702
|
+
|
|
703
|
+
//name: isApplicablePLSRegression
|
|
704
|
+
//meta.mlname: PLS Regression
|
|
705
|
+
//meta.mlrole: isApplicable
|
|
706
|
+
//input: dataframe df
|
|
707
|
+
//input: column predictColumn
|
|
708
|
+
//output: bool result
|
|
709
|
+
export function isApplicablePLSRegression(df: DG.DataFrame, predictColumn: DG.Column): boolean {
|
|
710
|
+
return PlsModel.isApplicable(df.columns, predictColumn);
|
|
711
|
+
}
|
|
712
|
+
|
|
713
|
+
//name: visualizePLSRegression
|
|
714
|
+
//meta.mlname: PLS Regression
|
|
715
|
+
//meta.mlrole: visualize
|
|
716
|
+
//input: dataframe df
|
|
717
|
+
//input: column targetColumn
|
|
718
|
+
//input: column predictColumn
|
|
719
|
+
//input: dynamic model
|
|
720
|
+
//output: dynamic widget
|
|
721
|
+
export async function visualizePLSRegression(df: DG.DataFrame, targetColumn: DG.Column, predictColumn: DG.Column, model: any): Promise<any> {
|
|
722
|
+
const unpackedModel = new PlsModel(model);
|
|
723
|
+
const viewers = unpackedModel.viewers();
|
|
724
|
+
|
|
725
|
+
return viewers.map((v) => v.root);
|
|
726
|
+
}
|
|
727
|
+
|
|
728
|
+
//name: isInteractivePLSRegression
|
|
729
|
+
//meta.mlname: PLS Regression
|
|
730
|
+
//meta.mlrole: isInteractive
|
|
731
|
+
//input: dataframe df
|
|
732
|
+
//input: column predictColumn
|
|
733
|
+
//output: bool result
|
|
734
|
+
export function isInteractivePLSRegression(df: DG.DataFrame, predictColumn: DG.Column): boolean {
|
|
735
|
+
return PlsModel.isInteractive(df.columns, predictColumn);
|
|
736
|
+
}
|
package/src/pls/pls-constants.ts
CHANGED
|
@@ -0,0 +1,376 @@
|
|
|
1
|
+
// Predicitve tools based on the PLS method
|
|
2
|
+
|
|
3
|
+
import * as grok from 'datagrok-api/grok';
|
|
4
|
+
import * as ui from 'datagrok-api/ui';
|
|
5
|
+
import * as DG from 'datagrok-api/dg';
|
|
6
|
+
|
|
7
|
+
import {TITLE, RESULT_NAMES} from './pls-constants';
|
|
8
|
+
import {getPlsAnalysis, PlsOutput, getLines} from './pls-tools';
|
|
9
|
+
import {LINK} from './pls-constants';
|
|
10
|
+
import {getPredictionByLinearRegression} from '../regression';
|
|
11
|
+
|
|
12
|
+
// PLS ML specific constants
|
|
13
|
+
const EXTRA_ROWS = 1;
|
|
14
|
+
const SHIFT = 2;
|
|
15
|
+
const MIN_LOADINGS = 1;
|
|
16
|
+
const MIN_COLS_COUNT = SHIFT + MIN_LOADINGS;
|
|
17
|
+
const SIZE_ARR_LEN = 2;
|
|
18
|
+
const MODEL_IDX = 0;
|
|
19
|
+
const SCORES_IDX = 1;
|
|
20
|
+
const BYTES_PER_SIZES = SIZE_ARR_LEN * 4;
|
|
21
|
+
const BLOCK_SIZE = 64;
|
|
22
|
+
|
|
23
|
+
/** Interactivity tresholds */
|
|
24
|
+
enum INTERACTIVITY {
|
|
25
|
+
MAX_SAMLPES = 100000,
|
|
26
|
+
MAX_FEATURES = 1000,
|
|
27
|
+
};
|
|
28
|
+
|
|
29
|
+
/** Model specification */
|
|
30
|
+
type PlsModelSpecification = {
|
|
31
|
+
params: Float32Array,
|
|
32
|
+
names: string[],
|
|
33
|
+
loadings: Float32Array[],
|
|
34
|
+
dim: number,
|
|
35
|
+
components: number,
|
|
36
|
+
scores: DG.DataFrame,
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
/** PLS regression modeling tool */
|
|
40
|
+
export class PlsModel {
|
|
41
|
+
/** Check applicability */
|
|
42
|
+
static isApplicable(features: DG.ColumnList, predictColumn: DG.Column): boolean {
|
|
43
|
+
for (const col of features) {
|
|
44
|
+
if (!col.matches('numerical'))
|
|
45
|
+
return false;
|
|
46
|
+
}
|
|
47
|
+
if (!predictColumn.matches('numerical'))
|
|
48
|
+
return false;
|
|
49
|
+
|
|
50
|
+
return true;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
/** Check interactivity */
|
|
54
|
+
static isInteractive(features: DG.ColumnList, predictColumn: DG.Column): boolean {
|
|
55
|
+
return (features.length <= INTERACTIVITY.MAX_FEATURES) &&
|
|
56
|
+
(predictColumn.length <= INTERACTIVITY.MAX_SAMLPES);
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
/** Specification of the PLS model */
|
|
60
|
+
private specn: PlsModelSpecification | null = null;
|
|
61
|
+
|
|
62
|
+
constructor(packedModel?: Uint8Array) {
|
|
63
|
+
if (packedModel) {
|
|
64
|
+
try {
|
|
65
|
+
// Extract model's bytes count
|
|
66
|
+
const sizeArr = new Uint32Array(packedModel.buffer, 0, SIZE_ARR_LEN); // 1-st element is a size of model bytes
|
|
67
|
+
const modelDfBytesCount = sizeArr[MODEL_IDX];
|
|
68
|
+
const scoresDfBytesCount = sizeArr[SCORES_IDX];
|
|
69
|
+
|
|
70
|
+
// Model's bytes
|
|
71
|
+
const modelBytes = new Uint8Array(packedModel.buffer, BYTES_PER_SIZES, modelDfBytesCount);
|
|
72
|
+
|
|
73
|
+
// Model as dataframe
|
|
74
|
+
const modelDf = DG.DataFrame.fromByteArray(modelBytes);
|
|
75
|
+
const rowCount = modelDf.rowCount;
|
|
76
|
+
const columns = modelDf.columns;
|
|
77
|
+
const colsCount = columns.length;
|
|
78
|
+
|
|
79
|
+
// Scores
|
|
80
|
+
const scoresBytes = new Uint8Array(packedModel.buffer, BYTES_PER_SIZES + modelDfBytesCount, scoresDfBytesCount);
|
|
81
|
+
const scores = DG.DataFrame.fromByteArray(scoresBytes);
|
|
82
|
+
|
|
83
|
+
if (colsCount < MIN_COLS_COUNT)
|
|
84
|
+
throw new Error('incorrect columns count');
|
|
85
|
+
|
|
86
|
+
// Extract names of features
|
|
87
|
+
const featureNames = columns.byName(TITLE.FEATURES).toList();
|
|
88
|
+
|
|
89
|
+
// Extract parameters of the linear model
|
|
90
|
+
const params = new Float32Array(rowCount);
|
|
91
|
+
params.set(columns.byName(TITLE.REGR_COEFS).getRawData());
|
|
92
|
+
|
|
93
|
+
// Extract loadings
|
|
94
|
+
const components = colsCount - SHIFT;
|
|
95
|
+
const loadings = new Array<Float32Array>(components);
|
|
96
|
+
|
|
97
|
+
for (let i = 0; i < components; ++i) {
|
|
98
|
+
loadings[i] = new Float32Array(rowCount);
|
|
99
|
+
loadings[i].set(columns.byIndex(i + SHIFT).getRawData());
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
this.specn = {
|
|
103
|
+
params: params,
|
|
104
|
+
loadings: loadings,
|
|
105
|
+
names: featureNames,
|
|
106
|
+
dim: rowCount - EXTRA_ROWS,
|
|
107
|
+
components: colsCount - SHIFT,
|
|
108
|
+
scores: scores,
|
|
109
|
+
};
|
|
110
|
+
} catch (error) {
|
|
111
|
+
throw new Error(`Failed to load model: ${(error instanceof Error ? error.message : 'the platform issue')}`);
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
/** Train model */
|
|
117
|
+
public async fit(features: DG.ColumnList, target: DG.Column, components: number) {
|
|
118
|
+
const analysis = await getPlsAnalysis({
|
|
119
|
+
table: DG.DataFrame.fromColumns([target]),
|
|
120
|
+
features: features,
|
|
121
|
+
predict: target,
|
|
122
|
+
components: components,
|
|
123
|
+
names: undefined,
|
|
124
|
+
});
|
|
125
|
+
|
|
126
|
+
// 1. Names of features
|
|
127
|
+
const featureNames = features.names();
|
|
128
|
+
featureNames.push('_'); // add extra item
|
|
129
|
+
|
|
130
|
+
// 2. Regression coefficients
|
|
131
|
+
const params = this.getRegrCoeffs(features, target, analysis.regressionCoefficients);
|
|
132
|
+
|
|
133
|
+
// 3. Loadings
|
|
134
|
+
const loadings = this.getLoadings(components, analysis.xLoadings);
|
|
135
|
+
|
|
136
|
+
// 4. Model specification
|
|
137
|
+
this.specn = {
|
|
138
|
+
names: featureNames,
|
|
139
|
+
params: params,
|
|
140
|
+
loadings: loadings,
|
|
141
|
+
components: components,
|
|
142
|
+
dim: features.length,
|
|
143
|
+
scores: this.getScoresDf(analysis),
|
|
144
|
+
};
|
|
145
|
+
|
|
146
|
+
// 4. Compute explained variances
|
|
147
|
+
this.computeExplVars(target.length, components, analysis.yLoadings);
|
|
148
|
+
} // fit
|
|
149
|
+
|
|
150
|
+
/** Return x-loadings with extra items reserved for explained variances */
|
|
151
|
+
private getLoadings(components: number, loadingsCols: DG.Column[]): Float32Array[] {
|
|
152
|
+
const res = Array<Float32Array>(components);
|
|
153
|
+
const len = loadingsCols[0].length + EXTRA_ROWS;
|
|
154
|
+
|
|
155
|
+
for (let i = 0; i < components; ++i) {
|
|
156
|
+
res[i] = new Float32Array(len);
|
|
157
|
+
res[i].set(loadingsCols[i].getRawData());
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
return res;
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
/** Return regression coefficients */
|
|
164
|
+
private getRegrCoeffs(features: DG.ColumnList, target: DG.Column, regrCoefsCol: DG.Column): Float32Array {
|
|
165
|
+
const dim = features.length;
|
|
166
|
+
const params = new Float32Array(dim + EXTRA_ROWS);
|
|
167
|
+
const paramsByPLS = regrCoefsCol.getRawData();
|
|
168
|
+
|
|
169
|
+
let tmpSum = 0;
|
|
170
|
+
|
|
171
|
+
for (let i = 0; i < dim; ++i) {
|
|
172
|
+
params[i] = paramsByPLS[i];
|
|
173
|
+
tmpSum += paramsByPLS[i] * features.byIndex(i).stats.avg;
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
// compute bias
|
|
177
|
+
params[dim] = target.stats.avg - tmpSum;
|
|
178
|
+
|
|
179
|
+
return params;
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
/** Return explained variances */
|
|
183
|
+
private computeExplVars(samplesCount: number, components: number, yLoadings: DG.Column) {
|
|
184
|
+
if (this.specn === null)
|
|
185
|
+
throw new Error('Failed to compute explained variances');
|
|
186
|
+
|
|
187
|
+
const raw = yLoadings.getRawData();
|
|
188
|
+
const dim = this.specn.loadings[0].length - EXTRA_ROWS;
|
|
189
|
+
|
|
190
|
+
// Compute, source: the paper https://doi.org/10.1002/cem.2589
|
|
191
|
+
let explVar = raw[0]**2 / samplesCount;
|
|
192
|
+
|
|
193
|
+
this.specn.loadings[0][dim] = explVar;
|
|
194
|
+
|
|
195
|
+
for (let comp = 1; comp < components; ++comp) {
|
|
196
|
+
explVar += raw[comp]**2 / samplesCount;
|
|
197
|
+
this.specn.loadings[comp][dim] = explVar;
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
/** Return packed model */
|
|
202
|
+
public toBytes(): Uint8Array {
|
|
203
|
+
if (this.specn === null)
|
|
204
|
+
throw new Error('Failed to pack untrained model');
|
|
205
|
+
|
|
206
|
+
// 1. Store model params in dataframe
|
|
207
|
+
const modelDf = DG.DataFrame.fromColumns([
|
|
208
|
+
DG.Column.fromStrings(TITLE.FEATURES, this.specn.names),
|
|
209
|
+
DG.Column.fromFloat32Array(TITLE.REGR_COEFS, this.specn.params),
|
|
210
|
+
]);
|
|
211
|
+
|
|
212
|
+
this.specn.loadings.forEach((array, idx) => modelDf.columns.add(DG.Column.fromFloat32Array(
|
|
213
|
+
`${TITLE.XLOADING}${idx + 1}`,
|
|
214
|
+
array,
|
|
215
|
+
)));
|
|
216
|
+
|
|
217
|
+
// 2. Pack model dataframe
|
|
218
|
+
const modelDfBytes = modelDf.toByteArray();
|
|
219
|
+
const modelDfBytesCount = modelDfBytes.length;
|
|
220
|
+
|
|
221
|
+
const scoresBytes = this.specn.scores.toByteArray();
|
|
222
|
+
const scoresBytesCount = scoresBytes.length;
|
|
223
|
+
|
|
224
|
+
const requiredBytes = modelDfBytesCount + scoresBytesCount + BYTES_PER_SIZES;
|
|
225
|
+
|
|
226
|
+
const packedModel = new Uint8Array((Math.ceil(requiredBytes / BLOCK_SIZE) + 1) * BLOCK_SIZE);
|
|
227
|
+
|
|
228
|
+
// 4 bytes for storing model's bytes count
|
|
229
|
+
const sizeArr = new Uint32Array(packedModel.buffer, 0, SIZE_ARR_LEN);
|
|
230
|
+
sizeArr[MODEL_IDX] = modelDfBytesCount;
|
|
231
|
+
sizeArr[SCORES_IDX] = scoresBytesCount;
|
|
232
|
+
|
|
233
|
+
// Store model's bytes
|
|
234
|
+
packedModel.set(modelDfBytes, BYTES_PER_SIZES);
|
|
235
|
+
|
|
236
|
+
// Store scores bytes
|
|
237
|
+
packedModel.set(scoresBytes, BYTES_PER_SIZES + modelDfBytesCount);
|
|
238
|
+
|
|
239
|
+
return packedModel;
|
|
240
|
+
} // toBytes
|
|
241
|
+
|
|
242
|
+
/** Return prediction */
|
|
243
|
+
public predict(features: DG.ColumnList): DG.Column {
|
|
244
|
+
if (this.specn === null)
|
|
245
|
+
throw new Error('Predicting failed: model is not trained');
|
|
246
|
+
|
|
247
|
+
return getPredictionByLinearRegression(features, this.specn.params);
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
/** Return loadings and regression coefficients viewers */
|
|
251
|
+
private loadingsParamsViewers(): DG.Viewer[] {
|
|
252
|
+
if (this.specn === null)
|
|
253
|
+
throw new Error('Failed to create loadings and parameters viewers: untrained model');
|
|
254
|
+
|
|
255
|
+
const viewers: DG.Viewer[] = [];
|
|
256
|
+
|
|
257
|
+
const dim = this.specn.dim;
|
|
258
|
+
|
|
259
|
+
// Parameters and loadings dataframe
|
|
260
|
+
const loadingsDf = DG.DataFrame.fromColumns([
|
|
261
|
+
DG.Column.fromStrings(TITLE.FEATURES, this.specn.names.slice(0, -1)),
|
|
262
|
+
DG.Column.fromFloat32Array(TITLE.REGR_COEFS, this.specn.params, dim),
|
|
263
|
+
]);
|
|
264
|
+
|
|
265
|
+
const columns = loadingsDf.columns;
|
|
266
|
+
const shift = columns.length;
|
|
267
|
+
const components = this.specn.components;
|
|
268
|
+
|
|
269
|
+
this.specn.loadings.forEach((arr, idx) => loadingsDf.columns.add(
|
|
270
|
+
DG.Column.fromFloat32Array(`${TITLE.XLOADING}${idx + 1}`, arr, dim),
|
|
271
|
+
));
|
|
272
|
+
|
|
273
|
+
// Loading scatterplot
|
|
274
|
+
viewers.push(DG.Viewer.scatterPlot(loadingsDf, {
|
|
275
|
+
title: TITLE.LOADINGS,
|
|
276
|
+
xColumnName: columns.byIndex(shift).name,
|
|
277
|
+
yColumnName: columns.byIndex(shift + (components > 1 ? 1 : 0)).name,
|
|
278
|
+
markerType: DG.MARKER_TYPE.CIRCLE,
|
|
279
|
+
labels: TITLE.FEATURES,
|
|
280
|
+
help: LINK.LOADINGS,
|
|
281
|
+
}));
|
|
282
|
+
|
|
283
|
+
// Regression coefficients barchart
|
|
284
|
+
viewers.push(DG.Viewer.barChart(loadingsDf, {
|
|
285
|
+
title: TITLE.REGR_COEFS,
|
|
286
|
+
splitColumnName: TITLE.FEATURES,
|
|
287
|
+
valueColumnName: TITLE.REGR_COEFS,
|
|
288
|
+
valueAggrType: DG.AGG.AVG,
|
|
289
|
+
help: LINK.COEFFS,
|
|
290
|
+
showValueSelector: false,
|
|
291
|
+
showStackSelector: false,
|
|
292
|
+
}));
|
|
293
|
+
|
|
294
|
+
return viewers;
|
|
295
|
+
} // getLoadingsParamsViewers
|
|
296
|
+
|
|
297
|
+
/** Return explained variances viewer */
|
|
298
|
+
private explVarsViewer(): DG.Viewer {
|
|
299
|
+
if (this.specn === null)
|
|
300
|
+
throw new Error('Failed to create exaplained variances viewer: untrained model');
|
|
301
|
+
|
|
302
|
+
const components = this.specn.components;
|
|
303
|
+
const dim = this.specn.dim;
|
|
304
|
+
|
|
305
|
+
const compNames = new Array<string>(components);
|
|
306
|
+
const explVars = new Float32Array(components);
|
|
307
|
+
|
|
308
|
+
compNames[0] = `${RESULT_NAMES.COMP} 1`;
|
|
309
|
+
explVars[0] = this.specn.loadings[0][dim];
|
|
310
|
+
|
|
311
|
+
for (let i = 1; i < components; ++i) {
|
|
312
|
+
compNames[i] = `${RESULT_NAMES.COMPS} ${i + 1}`;
|
|
313
|
+
explVars[i] = this.specn.loadings[i][dim];
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
return DG.Viewer.barChart(DG.DataFrame.fromColumns([
|
|
317
|
+
DG.Column.fromStrings(RESULT_NAMES.COMPS, compNames),
|
|
318
|
+
DG.Column.fromFloat32Array(TITLE.EXPL_VAR, explVars),
|
|
319
|
+
]), {
|
|
320
|
+
title: TITLE.EXPL_VAR,
|
|
321
|
+
splitColumnName: RESULT_NAMES.COMPS,
|
|
322
|
+
valueColumnName: TITLE.EXPL_VAR,
|
|
323
|
+
valueAggrType: DG.AGG.AVG,
|
|
324
|
+
help: LINK.EXPL_VARS,
|
|
325
|
+
showCategorySelector: false,
|
|
326
|
+
showStackSelector: false,
|
|
327
|
+
showValueSelector: false,
|
|
328
|
+
});
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
/** Returns viewers */
|
|
332
|
+
public viewers(): DG.Viewer[] {
|
|
333
|
+
if (this.specn === null)
|
|
334
|
+
throw new Error('Failed to create viewers: untrained model');
|
|
335
|
+
|
|
336
|
+
const viewers = this.loadingsParamsViewers();
|
|
337
|
+
viewers.push(
|
|
338
|
+
this.explVarsViewer(),
|
|
339
|
+
this.getScoresScatter(),
|
|
340
|
+
);
|
|
341
|
+
|
|
342
|
+
return viewers;
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
/** Return dataframe with scores */
|
|
346
|
+
private getScoresDf(analysis: PlsOutput): DG.DataFrame {
|
|
347
|
+
const tScores = analysis.tScores;
|
|
348
|
+
const uScores = analysis.uScores;
|
|
349
|
+
|
|
350
|
+
tScores.forEach((col, idx) => col.name = `${TITLE.XSCORE}${idx + 1}`);
|
|
351
|
+
uScores.forEach((col, idx) => col.name = `${TITLE.YSCORE}${idx + 1}`);
|
|
352
|
+
|
|
353
|
+
return DG.DataFrame.fromColumns(tScores.concat(uScores));
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
/** Return scores scatter */
|
|
357
|
+
private getScoresScatter(): DG.Viewer {
|
|
358
|
+
if (this.specn === null)
|
|
359
|
+
throw new Error('Failed to create scores scatter: untrained model');
|
|
360
|
+
|
|
361
|
+
const names = this.specn.scores.columns.names();
|
|
362
|
+
|
|
363
|
+
const scatter = DG.Viewer.scatterPlot(this.specn.scores, {
|
|
364
|
+
title: TITLE.SCORES,
|
|
365
|
+
xColumnName: names[0],
|
|
366
|
+
yColumnName: names[1],
|
|
367
|
+
markerType: DG.MARKER_TYPE.CIRCLE,
|
|
368
|
+
help: LINK.SCORES,
|
|
369
|
+
showViewerFormulaLines: true,
|
|
370
|
+
});
|
|
371
|
+
|
|
372
|
+
scatter.meta.formulaLines.addAll(getLines(names));
|
|
373
|
+
|
|
374
|
+
return scatter;
|
|
375
|
+
}
|
|
376
|
+
};
|
package/src/pls/pls-tools.ts
CHANGED
|
@@ -30,9 +30,43 @@ export type PlsInput = {
|
|
|
30
30
|
features: DG.ColumnList,
|
|
31
31
|
predict: DG.Column,
|
|
32
32
|
components: number,
|
|
33
|
-
names : DG.Column |
|
|
33
|
+
names : DG.Column | undefined,
|
|
34
34
|
};
|
|
35
35
|
|
|
36
|
+
/** Return lines */
|
|
37
|
+
export function getLines(names: string[]): DG.FormulaLine[] {
|
|
38
|
+
const lines: DG.FormulaLine[] = [];
|
|
39
|
+
|
|
40
|
+
const addLine = (formula: string, radius: number) => {
|
|
41
|
+
lines.push({
|
|
42
|
+
type: 'line',
|
|
43
|
+
formula: formula,
|
|
44
|
+
width: LINE_WIDTH,
|
|
45
|
+
visible: true,
|
|
46
|
+
title: ' ',
|
|
47
|
+
min: -radius,
|
|
48
|
+
max: radius,
|
|
49
|
+
color: COLOR.CIRCLE,
|
|
50
|
+
});
|
|
51
|
+
};
|
|
52
|
+
|
|
53
|
+
names.forEach((xName) => {
|
|
54
|
+
const x = '${' + xName + '}';
|
|
55
|
+
lines.push({type: 'line', formula: `${x} = 0`, width: LINE_WIDTH, visible: true, title: ' ', color: COLOR.AXIS});
|
|
56
|
+
|
|
57
|
+
names.forEach((yName) => {
|
|
58
|
+
const y = '${' + yName + '}';
|
|
59
|
+
|
|
60
|
+
RADIUS.forEach((r) => {
|
|
61
|
+
addLine(y + ` = sqrt(${r*r} - ${x} * ${x})`, r);
|
|
62
|
+
addLine(y + ` = -sqrt(${r*r} - ${x} * ${x})`, r);
|
|
63
|
+
});
|
|
64
|
+
});
|
|
65
|
+
});
|
|
66
|
+
|
|
67
|
+
return lines;
|
|
68
|
+
}
|
|
69
|
+
|
|
36
70
|
/** Partial least square regression (PLS) */
|
|
37
71
|
export async function getPlsAnalysis(input: PlsInput): Promise<PlsOutput> {
|
|
38
72
|
checkWasmDimensionReducerInputs(input.features, input.components);
|
|
@@ -149,36 +183,7 @@ async function performMVA(input: PlsInput, analysisType: PLS_ANALYSIS): Promise<
|
|
|
149
183
|
});
|
|
150
184
|
|
|
151
185
|
// 4.3) create lines & circles
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
const addLine = (formula: string, radius: number) => {
|
|
155
|
-
lines.push({
|
|
156
|
-
type: 'line',
|
|
157
|
-
formula: formula,
|
|
158
|
-
width: LINE_WIDTH,
|
|
159
|
-
visible: true,
|
|
160
|
-
title: ' ',
|
|
161
|
-
min: -radius,
|
|
162
|
-
max: radius,
|
|
163
|
-
color: COLOR.CIRCLE,
|
|
164
|
-
});
|
|
165
|
-
};
|
|
166
|
-
|
|
167
|
-
scoreNames.forEach((xName) => {
|
|
168
|
-
const x = '${' + xName + '}';
|
|
169
|
-
lines.push({type: 'line', formula: `${x} = 0`, width: LINE_WIDTH, visible: true, title: ' ', color: COLOR.AXIS});
|
|
170
|
-
|
|
171
|
-
scoreNames.forEach((yName) => {
|
|
172
|
-
const y = '${' + yName + '}';
|
|
173
|
-
|
|
174
|
-
RADIUS.forEach((r) => {
|
|
175
|
-
addLine(y + ` = sqrt(${r*r} - ${x} * ${x})`, r);
|
|
176
|
-
addLine(y + ` = -sqrt(${r*r} - ${x} * ${x})`, r);
|
|
177
|
-
});
|
|
178
|
-
});
|
|
179
|
-
});
|
|
180
|
-
|
|
181
|
-
scoresScatter.meta.formulaLines.addAll(lines);
|
|
186
|
+
scoresScatter.meta.formulaLines.addAll(getLines(scoreNames));
|
|
182
187
|
view.addViewer(scoresScatter);
|
|
183
188
|
|
|
184
189
|
// 5. Explained Variances
|
|
@@ -334,8 +339,11 @@ export async function runMVA(analysisType: PLS_ANALYSIS): Promise<void> {
|
|
|
334
339
|
};
|
|
335
340
|
|
|
336
341
|
// names of samples
|
|
337
|
-
let names = (strCols.length > 0) ? strCols[0] :
|
|
338
|
-
const namesInputs = ui.input.column(TITLE.NAMES, {
|
|
342
|
+
let names = (strCols.length > 0) ? strCols[0] : undefined;
|
|
343
|
+
const namesInputs = ui.input.column(TITLE.NAMES, {
|
|
344
|
+
table: table,
|
|
345
|
+
value: names,
|
|
346
|
+
onValueChanged: () => names = predictInput.value ?? undefined,
|
|
339
347
|
filter: (col: DG.Column) => col.type === DG.COLUMN_TYPE.STRING},
|
|
340
348
|
);
|
|
341
349
|
namesInputs.setTooltip(HINT.NAMES);
|
package/src/regression.ts
CHANGED
|
@@ -191,7 +191,7 @@ async function getLinearRegressionParamsUsingPLS(features: DG.ColumnList,
|
|
|
191
191
|
features: features,
|
|
192
192
|
predict: targets,
|
|
193
193
|
components: components,
|
|
194
|
-
names:
|
|
194
|
+
names: undefined,
|
|
195
195
|
});
|
|
196
196
|
|
|
197
197
|
return plsAnalysis.regressionCoefficients.getRawData() as Float32Array;
|