@winm2m/inferential-stats-js 0.1.3 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +211 -693
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -12,10 +12,6 @@
|
|
|
12
12
|
## Table of Contents
|
|
13
13
|
|
|
14
14
|
- [Architecture Overview](#architecture-overview)
|
|
15
|
-
- [Installation](#installation)
|
|
16
|
-
- [Quick Start](#quick-start)
|
|
17
|
-
- [CDN / CodePen Usage](#cdn--codepen-usage)
|
|
18
|
-
- [API Reference](#api-reference)
|
|
19
15
|
- [Core Analysis Features — Mathematical & Technical Documentation](#core-analysis-features--mathematical--technical-documentation)
|
|
20
16
|
- [① Descriptive Statistics](#-descriptive-statistics)
|
|
21
17
|
- [② Compare Means](#-compare-means)
|
|
@@ -23,6 +19,10 @@
|
|
|
23
19
|
- [④ Classify](#-classify)
|
|
24
20
|
- [⑤ Dimension Reduction](#-dimension-reduction)
|
|
25
21
|
- [⑥ Scale](#-scale)
|
|
22
|
+
- [Installation](#installation)
|
|
23
|
+
- [Quick Start](#quick-start)
|
|
24
|
+
- [CDN / CodePen Usage](#cdn--codepen-usage)
|
|
25
|
+
- [API Reference](#api-reference)
|
|
26
26
|
- [Sample Data](#sample-data)
|
|
27
27
|
- [Progress Event Handling](#progress-event-handling)
|
|
28
28
|
- [License](#license)
|
|
@@ -41,14 +41,14 @@
|
|
|
41
41
|
│ │ (ESM / CJS) │ (Transferable) │ │
|
|
42
42
|
│ └───────────────────────┘ ▼ │
|
|
43
43
|
│ ┌─────────────────────┐ │
|
|
44
|
-
│ │ Web Worker
|
|
45
|
-
│ │ ┌────────────────┐
|
|
46
|
-
│ │ │ Pyodide WASM │
|
|
47
|
-
│ │ │ ┌───────────┐ │
|
|
48
|
-
│ │ │ │ Python
|
|
49
|
-
│ │ │ │ Runtime
|
|
50
|
-
│ │ │ └───────────┘ │
|
|
51
|
-
│ │ └────────────────┘
|
|
44
|
+
│ │ Web Worker │ │
|
|
45
|
+
│ │ ┌────────────────┐ │ │
|
|
46
|
+
│ │ │ Pyodide WASM │ │ │
|
|
47
|
+
│ │ │ ┌───────────┐ │ │ │
|
|
48
|
+
│ │ │ │ Python │ │ │ │
|
|
49
|
+
│ │ │ │ Runtime │ │ │ │
|
|
50
|
+
│ │ │ └───────────┘ │ │ │
|
|
51
|
+
│ │ └────────────────┘ │ │
|
|
52
52
|
│ └─────────────────────┘ │
|
|
53
53
|
└─────────────────────────────────────────────────────────┘
|
|
54
54
|
```
|
|
@@ -66,633 +66,12 @@
|
|
|
66
66
|
|
|
67
67
|
---
|
|
68
68
|
|
|
69
|
-
## Installation
|
|
70
|
-
|
|
71
|
-
```bash
|
|
72
|
-
npm install @winm2m/inferential-stats-js
|
|
73
|
-
```
|
|
74
|
-
|
|
75
|
-
> **Peer dependency (optional):** If you want explicit control over the Pyodide version, install `pyodide` (>= 0.26.0) as a peer dependency. Otherwise the SDK loads Pyodide from the jsDelivr CDN automatically.
|
|
76
|
-
|
|
77
|
-
---
|
|
78
|
-
|
|
79
|
-
## Quick Start
|
|
80
|
-
|
|
81
|
-
```typescript
|
|
82
|
-
import { InferentialStats, PROGRESS_EVENT_NAME } from '@winm2m/inferential-stats-js';
|
|
83
|
-
|
|
84
|
-
// 1. Listen for initialization progress
|
|
85
|
-
window.addEventListener(PROGRESS_EVENT_NAME, (e: Event) => {
|
|
86
|
-
const { stage, progress, message } = (e as CustomEvent).detail;
|
|
87
|
-
console.log(`[${stage}] ${progress}% — ${message}`);
|
|
88
|
-
});
|
|
89
|
-
|
|
90
|
-
// 2. Create an instance (pass the URL to the bundled worker)
|
|
91
|
-
const stats = new InferentialStats({
|
|
92
|
-
workerUrl: new URL('@winm2m/inferential-stats-js/worker', import.meta.url).href,
|
|
93
|
-
});
|
|
94
|
-
|
|
95
|
-
// 3. Initialize (loads Pyodide + Python packages inside the worker)
|
|
96
|
-
await stats.init();
|
|
97
|
-
|
|
98
|
-
// 4. Prepare your data
|
|
99
|
-
const data = [
|
|
100
|
-
{ group: 'A', score: 85 },
|
|
101
|
-
{ group: 'A', score: 90 },
|
|
102
|
-
{ group: 'B', score: 78 },
|
|
103
|
-
{ group: 'B', score: 82 },
|
|
104
|
-
// ... more rows
|
|
105
|
-
];
|
|
106
|
-
|
|
107
|
-
// 5. Run an analysis
|
|
108
|
-
const result = await stats.anovaOneway({
|
|
109
|
-
data,
|
|
110
|
-
variable: 'score',
|
|
111
|
-
groupVariable: 'group',
|
|
112
|
-
});
|
|
113
|
-
|
|
114
|
-
console.log(result);
|
|
115
|
-
// {
|
|
116
|
-
// success: true,
|
|
117
|
-
// data: { fStatistic: ..., pValue: ..., groupStats: [...], ... },
|
|
118
|
-
// executionTimeMs: 42
|
|
119
|
-
// }
|
|
120
|
-
|
|
121
|
-
// 6. Clean up when done
|
|
122
|
-
stats.destroy();
|
|
123
|
-
```
|
|
124
|
-
|
|
125
|
-
---
|
|
126
|
-
|
|
127
|
-
## CDN / CodePen Usage
|
|
128
|
-
|
|
129
|
-
You can use the SDK directly in a browser or CodePen with no build step. The snippet below loads the library from a CDN, fetches the sample dataset from GitHub Pages, and runs every analysis method in the SDK. Results are rendered as HTML tables.
|
|
130
|
-
|
|
131
|
-
```html
|
|
132
|
-
<!DOCTYPE html>
|
|
133
|
-
<html lang="en">
|
|
134
|
-
<head>
|
|
135
|
-
<meta charset="UTF-8" />
|
|
136
|
-
<title>inferential-stats-js CDN Demo</title>
|
|
137
|
-
</head>
|
|
138
|
-
<body>
|
|
139
|
-
<h1>inferential-stats-js — CDN Demo</h1>
|
|
140
|
-
<p id="status">Initializing...</p>
|
|
141
|
-
<div id="output"></div>
|
|
142
|
-
|
|
143
|
-
<style>
|
|
144
|
-
body { font-family: "IBM Plex Sans", "Segoe UI", sans-serif; margin: 24px; }
|
|
145
|
-
table { border-collapse: collapse; margin: 12px 0 24px; width: 100%; }
|
|
146
|
-
th, td { border: 1px solid #ddd; padding: 6px 10px; font-size: 14px; }
|
|
147
|
-
th { background: #f5f5f5; text-align: left; }
|
|
148
|
-
h2 { margin: 20px 0 8px; font-size: 18px; }
|
|
149
|
-
</style>
|
|
150
|
-
|
|
151
|
-
<!-- Load the worker script (global IIFE, no import needed) -->
|
|
152
|
-
<!-- The worker is loaded by URL below, not as a script tag -->
|
|
153
|
-
|
|
154
|
-
<script type="module">
|
|
155
|
-
// 1. Import the SDK from a CDN
|
|
156
|
-
import { InferentialStats, PROGRESS_EVENT_NAME } from 'https://unpkg.com/@winm2m/inferential-stats-js/dist/index.js';
|
|
157
|
-
|
|
158
|
-
const status = document.getElementById('status');
|
|
159
|
-
const output = document.getElementById('output');
|
|
160
|
-
|
|
161
|
-
const setStatus = (message) => {
|
|
162
|
-
if (status) {
|
|
163
|
-
status.textContent = message;
|
|
164
|
-
}
|
|
165
|
-
};
|
|
166
|
-
|
|
167
|
-
const renderTable = (title, headers, rows) => {
|
|
168
|
-
if (!output) return;
|
|
169
|
-
|
|
170
|
-
const section = document.createElement('section');
|
|
171
|
-
const heading = document.createElement('h2');
|
|
172
|
-
heading.textContent = title;
|
|
173
|
-
section.appendChild(heading);
|
|
174
|
-
|
|
175
|
-
const table = document.createElement('table');
|
|
176
|
-
const thead = document.createElement('thead');
|
|
177
|
-
const headerRow = document.createElement('tr');
|
|
178
|
-
headers.forEach((header) => {
|
|
179
|
-
const th = document.createElement('th');
|
|
180
|
-
th.textContent = header;
|
|
181
|
-
headerRow.appendChild(th);
|
|
182
|
-
});
|
|
183
|
-
thead.appendChild(headerRow);
|
|
184
|
-
table.appendChild(thead);
|
|
185
|
-
|
|
186
|
-
const tbody = document.createElement('tbody');
|
|
187
|
-
rows.forEach((cells) => {
|
|
188
|
-
const tr = document.createElement('tr');
|
|
189
|
-
cells.forEach((cell) => {
|
|
190
|
-
const td = document.createElement('td');
|
|
191
|
-
td.textContent = cell;
|
|
192
|
-
tr.appendChild(td);
|
|
193
|
-
});
|
|
194
|
-
tbody.appendChild(tr);
|
|
195
|
-
});
|
|
196
|
-
table.appendChild(tbody);
|
|
197
|
-
section.appendChild(table);
|
|
198
|
-
output.appendChild(section);
|
|
199
|
-
};
|
|
200
|
-
|
|
201
|
-
const renderKeyValueTable = (title, rows) => {
|
|
202
|
-
renderTable(title, ['Metric', 'Value'], rows);
|
|
203
|
-
};
|
|
204
|
-
|
|
205
|
-
const renderErrorTable = (title, message) => {
|
|
206
|
-
renderKeyValueTable(title, [['Error', message ?? 'Unknown error']]);
|
|
207
|
-
};
|
|
208
|
-
|
|
209
|
-
const formatNumber = (value, digits = 4) => Number(value).toFixed(digits);
|
|
210
|
-
|
|
211
|
-
// 2. Listen for progress events
|
|
212
|
-
window.addEventListener(PROGRESS_EVENT_NAME, (e) => {
|
|
213
|
-
const { stage, progress, message } = e.detail;
|
|
214
|
-
setStatus(`[${stage}] ${message} (${progress}%)`);
|
|
215
|
-
});
|
|
216
|
-
|
|
217
|
-
// 3. Create an instance pointing to the CDN-hosted worker
|
|
218
|
-
const stats = new InferentialStats({
|
|
219
|
-
workerUrl: 'https://unpkg.com/@winm2m/inferential-stats-js/dist/stats-worker.js',
|
|
220
|
-
});
|
|
221
|
-
|
|
222
|
-
try {
|
|
223
|
-
// 4. Initialize (downloads Pyodide WASM + Python packages)
|
|
224
|
-
await stats.init();
|
|
225
|
-
setStatus('Initialization complete. Running analyses...');
|
|
226
|
-
|
|
227
|
-
// 5. Fetch sample survey data from GitHub Pages
|
|
228
|
-
const response = await fetch(
|
|
229
|
-
'https://winm2m.github.io/inferential-stats-js/sample-survey-data.json'
|
|
230
|
-
);
|
|
231
|
-
const data = await response.json();
|
|
232
|
-
setStatus(`Loaded ${data.length} rows. Rendering tables...`);
|
|
233
|
-
|
|
234
|
-
const binaryData = data.map((row) => {
|
|
235
|
-
const musicScore = Number(row.music_satisfaction);
|
|
236
|
-
return {
|
|
237
|
-
...row,
|
|
238
|
-
is_high_music: Number.isFinite(musicScore) && musicScore >= 4 ? 1 : 0,
|
|
239
|
-
};
|
|
240
|
-
});
|
|
241
|
-
|
|
242
|
-
const sampledData = data.slice(0, 300);
|
|
243
|
-
|
|
244
|
-
// 6. Descriptive Statistics — Frequencies
|
|
245
|
-
const frequenciesResult = await stats.frequencies({
|
|
246
|
-
data,
|
|
247
|
-
variable: 'favorite_music',
|
|
248
|
-
});
|
|
249
|
-
if (frequenciesResult.success) {
|
|
250
|
-
const frequencyRows = frequenciesResult.data.frequencies
|
|
251
|
-
.slice(0, 6)
|
|
252
|
-
.map((item) => [
|
|
253
|
-
String(item.value),
|
|
254
|
-
String(item.count),
|
|
255
|
-
`${item.percentage.toFixed(2)}%`,
|
|
256
|
-
]);
|
|
257
|
-
renderTable(
|
|
258
|
-
'Descriptive Statistics — Frequencies (favorite_music, top 6)',
|
|
259
|
-
['Value', 'Count', 'Percent'],
|
|
260
|
-
frequencyRows
|
|
261
|
-
);
|
|
262
|
-
} else {
|
|
263
|
-
renderErrorTable('Descriptive Statistics — Frequencies (favorite_music, top 6)', frequenciesResult.error);
|
|
264
|
-
}
|
|
265
|
-
|
|
266
|
-
const descriptivesResult = await stats.descriptives({
|
|
267
|
-
data,
|
|
268
|
-
variables: [
|
|
269
|
-
'music_satisfaction',
|
|
270
|
-
'movie_satisfaction',
|
|
271
|
-
'art_satisfaction',
|
|
272
|
-
'weekly_hours_music',
|
|
273
|
-
'weekly_hours_movie',
|
|
274
|
-
'monthly_art_visits',
|
|
275
|
-
],
|
|
276
|
-
});
|
|
277
|
-
if (descriptivesResult.success) {
|
|
278
|
-
const descriptiveRows = descriptivesResult.data.statistics.map((stat) => [
|
|
279
|
-
stat.variable,
|
|
280
|
-
formatNumber(stat.mean),
|
|
281
|
-
formatNumber(stat.std),
|
|
282
|
-
formatNumber(stat.min),
|
|
283
|
-
formatNumber(stat.max),
|
|
284
|
-
]);
|
|
285
|
-
renderTable(
|
|
286
|
-
'Descriptive Statistics — Descriptives',
|
|
287
|
-
['Variable', 'Mean', 'Std', 'Min', 'Max'],
|
|
288
|
-
descriptiveRows
|
|
289
|
-
);
|
|
290
|
-
} else {
|
|
291
|
-
renderErrorTable('Descriptive Statistics — Descriptives', descriptivesResult.error);
|
|
292
|
-
}
|
|
293
|
-
|
|
294
|
-
const crosstabsResult = await stats.crosstabs({
|
|
295
|
-
data,
|
|
296
|
-
rowVariable: 'gender',
|
|
297
|
-
colVariable: 'favorite_music',
|
|
298
|
-
});
|
|
299
|
-
if (crosstabsResult.success) {
|
|
300
|
-
renderKeyValueTable('Descriptive Statistics — Crosstabs Summary (gender x favorite_music)', [
|
|
301
|
-
['Chi-square', formatNumber(crosstabsResult.data.chiSquare)],
|
|
302
|
-
['p-value', formatNumber(crosstabsResult.data.pValue)],
|
|
303
|
-
['Cramers V', formatNumber(crosstabsResult.data.cramersV)],
|
|
304
|
-
['df', String(crosstabsResult.data.degreesOfFreedom)],
|
|
305
|
-
]);
|
|
306
|
-
const crosstabRows = crosstabsResult.data.table.slice(0, 12).map((cell) => [
|
|
307
|
-
cell.row,
|
|
308
|
-
cell.col,
|
|
309
|
-
String(cell.observed),
|
|
310
|
-
formatNumber(cell.expected),
|
|
311
|
-
formatNumber(cell.rowPercentage),
|
|
312
|
-
formatNumber(cell.colPercentage),
|
|
313
|
-
formatNumber(cell.totalPercentage),
|
|
314
|
-
]);
|
|
315
|
-
renderTable(
|
|
316
|
-
'Descriptive Statistics — Crosstabs Cells (top 12)',
|
|
317
|
-
['Row', 'Col', 'Obs', 'Exp', 'Row %', 'Col %', 'Total %'],
|
|
318
|
-
crosstabRows
|
|
319
|
-
);
|
|
320
|
-
} else {
|
|
321
|
-
renderErrorTable('Descriptive Statistics — Crosstabs Summary (gender x favorite_music)', crosstabsResult.error);
|
|
322
|
-
}
|
|
323
|
-
|
|
324
|
-
const ttestIndependentResult = await stats.ttestIndependent({
|
|
325
|
-
data,
|
|
326
|
-
variable: 'music_satisfaction',
|
|
327
|
-
groupVariable: 'gender',
|
|
328
|
-
group1Value: 'Male',
|
|
329
|
-
group2Value: 'Female',
|
|
330
|
-
});
|
|
331
|
-
if (ttestIndependentResult.success) {
|
|
332
|
-
const levene = ttestIndependentResult.data.leveneTest;
|
|
333
|
-
const equal = ttestIndependentResult.data.equalVariance;
|
|
334
|
-
const unequal = ttestIndependentResult.data.unequalVariance;
|
|
335
|
-
renderKeyValueTable('Compare Means — Independent T-Test (music_satisfaction by gender)', [
|
|
336
|
-
['Levene p-value', formatNumber(levene.pValue)],
|
|
337
|
-
['Equal variance', String(levene.equalVariance)],
|
|
338
|
-
['t (equal var)', formatNumber(equal.tStatistic)],
|
|
339
|
-
['p (equal var)', formatNumber(equal.pValue)],
|
|
340
|
-
['t (unequal var)', formatNumber(unequal.tStatistic)],
|
|
341
|
-
['p (unequal var)', formatNumber(unequal.pValue)],
|
|
342
|
-
]);
|
|
343
|
-
} else {
|
|
344
|
-
renderErrorTable('Compare Means — Independent T-Test (music_satisfaction by gender)', ttestIndependentResult.error);
|
|
345
|
-
}
|
|
346
|
-
|
|
347
|
-
const ttestPairedResult = await stats.ttestPaired({
|
|
348
|
-
data,
|
|
349
|
-
variable1: 'music_satisfaction',
|
|
350
|
-
variable2: 'movie_satisfaction',
|
|
351
|
-
});
|
|
352
|
-
if (ttestPairedResult.success) {
|
|
353
|
-
renderKeyValueTable('Compare Means — Paired T-Test (music vs movie satisfaction)', [
|
|
354
|
-
['t-statistic', formatNumber(ttestPairedResult.data.tStatistic)],
|
|
355
|
-
['p-value', formatNumber(ttestPairedResult.data.pValue)],
|
|
356
|
-
['Mean diff', formatNumber(ttestPairedResult.data.meanDifference)],
|
|
357
|
-
['Std diff', formatNumber(ttestPairedResult.data.stdDifference)],
|
|
358
|
-
['n', String(ttestPairedResult.data.n)],
|
|
359
|
-
]);
|
|
360
|
-
} else {
|
|
361
|
-
renderErrorTable('Compare Means — Paired T-Test (music vs movie satisfaction)', ttestPairedResult.error);
|
|
362
|
-
}
|
|
363
|
-
|
|
364
|
-
const anovaResult = await stats.anovaOneway({
|
|
365
|
-
data,
|
|
366
|
-
variable: 'music_satisfaction',
|
|
367
|
-
groupVariable: 'age_group',
|
|
368
|
-
});
|
|
369
|
-
if (anovaResult.success) {
|
|
370
|
-
renderKeyValueTable('Compare Means — One-Way ANOVA (music_satisfaction by age_group)', [
|
|
371
|
-
['F-statistic', formatNumber(anovaResult.data.fStatistic)],
|
|
372
|
-
['p-value', formatNumber(anovaResult.data.pValue)],
|
|
373
|
-
['eta-squared', formatNumber(anovaResult.data.etaSquared)],
|
|
374
|
-
]);
|
|
375
|
-
} else {
|
|
376
|
-
renderErrorTable('Compare Means — One-Way ANOVA (music_satisfaction by age_group)', anovaResult.error);
|
|
377
|
-
}
|
|
378
|
-
|
|
379
|
-
const posthocResult = await stats.posthocTukey({
|
|
380
|
-
data,
|
|
381
|
-
variable: 'music_satisfaction',
|
|
382
|
-
groupVariable: 'age_group',
|
|
383
|
-
alpha: 0.05,
|
|
384
|
-
});
|
|
385
|
-
if (posthocResult.success) {
|
|
386
|
-
const posthocRows = posthocResult.data.comparisons.slice(0, 8).map((comp) => [
|
|
387
|
-
comp.group1,
|
|
388
|
-
comp.group2,
|
|
389
|
-
formatNumber(comp.meanDifference),
|
|
390
|
-
formatNumber(comp.pValue),
|
|
391
|
-
comp.reject ? 'Yes' : 'No',
|
|
392
|
-
]);
|
|
393
|
-
renderTable(
|
|
394
|
-
'Compare Means — Post-hoc Tukey (top 8 comparisons)',
|
|
395
|
-
['Group 1', 'Group 2', 'Mean diff', 'p-value', 'Reject'],
|
|
396
|
-
posthocRows
|
|
397
|
-
);
|
|
398
|
-
} else {
|
|
399
|
-
renderErrorTable('Compare Means — Post-hoc Tukey (top 8 comparisons)', posthocResult.error);
|
|
400
|
-
}
|
|
401
|
-
|
|
402
|
-
const regressionResult = await stats.linearRegression({
|
|
403
|
-
data,
|
|
404
|
-
dependentVariable: 'music_satisfaction',
|
|
405
|
-
independentVariables: ['weekly_hours_music', 'weekly_hours_movie'],
|
|
406
|
-
});
|
|
407
|
-
if (regressionResult.success) {
|
|
408
|
-
renderKeyValueTable('Regression — OLS (music_satisfaction ~ weekly_hours_music + weekly_hours_movie)', [
|
|
409
|
-
['R-squared', formatNumber(regressionResult.data.rSquared)],
|
|
410
|
-
['Adj. R-squared', formatNumber(regressionResult.data.adjustedRSquared)],
|
|
411
|
-
['F-statistic', formatNumber(regressionResult.data.fStatistic)],
|
|
412
|
-
['F p-value', formatNumber(regressionResult.data.fPValue)],
|
|
413
|
-
['Durbin-Watson', formatNumber(regressionResult.data.durbinWatson)],
|
|
414
|
-
]);
|
|
415
|
-
} else {
|
|
416
|
-
renderErrorTable('Regression — OLS (music_satisfaction ~ weekly_hours_music + weekly_hours_movie)', regressionResult.error);
|
|
417
|
-
}
|
|
418
|
-
|
|
419
|
-
const logisticBinaryResult = await stats.logisticBinary({
|
|
420
|
-
data: binaryData,
|
|
421
|
-
dependentVariable: 'is_high_music',
|
|
422
|
-
independentVariables: ['weekly_hours_music', 'weekly_hours_movie', 'monthly_art_visits'],
|
|
423
|
-
});
|
|
424
|
-
if (logisticBinaryResult.success) {
|
|
425
|
-
renderKeyValueTable('Regression — Binary Logistic (is_high_music)', [
|
|
426
|
-
['Pseudo R-squared', formatNumber(logisticBinaryResult.data.pseudoRSquared)],
|
|
427
|
-
['LLR p-value', formatNumber(logisticBinaryResult.data.llrPValue)],
|
|
428
|
-
['AIC', formatNumber(logisticBinaryResult.data.aic)],
|
|
429
|
-
['BIC', formatNumber(logisticBinaryResult.data.bic)],
|
|
430
|
-
['Converged', logisticBinaryResult.data.convergence ? 'Yes' : 'No'],
|
|
431
|
-
]);
|
|
432
|
-
const binaryCoefRows = logisticBinaryResult.data.coefficients.slice(0, 6).map((coef) => [
|
|
433
|
-
coef.variable,
|
|
434
|
-
formatNumber(coef.coefficient),
|
|
435
|
-
formatNumber(coef.oddsRatio),
|
|
436
|
-
formatNumber(coef.pValue),
|
|
437
|
-
]);
|
|
438
|
-
renderTable('Regression — Binary Logistic Coefficients (top 6)', ['Variable', 'Coef', 'Odds Ratio', 'p-value'], binaryCoefRows);
|
|
439
|
-
} else {
|
|
440
|
-
renderErrorTable('Regression — Binary Logistic (is_high_music)', logisticBinaryResult.error);
|
|
441
|
-
}
|
|
442
|
-
|
|
443
|
-
const logisticMultinomialResult = await stats.logisticMultinomial({
|
|
444
|
-
data,
|
|
445
|
-
dependentVariable: 'age_group',
|
|
446
|
-
independentVariables: ['music_satisfaction', 'movie_satisfaction', 'art_satisfaction'],
|
|
447
|
-
referenceCategory: '20s',
|
|
448
|
-
});
|
|
449
|
-
if (logisticMultinomialResult.success) {
|
|
450
|
-
renderKeyValueTable('Regression — Multinomial Logistic (age_group)', [
|
|
451
|
-
['Pseudo R-squared', formatNumber(logisticMultinomialResult.data.pseudoRSquared)],
|
|
452
|
-
['AIC', formatNumber(logisticMultinomialResult.data.aic)],
|
|
453
|
-
['BIC', formatNumber(logisticMultinomialResult.data.bic)],
|
|
454
|
-
['Reference', logisticMultinomialResult.data.referenceCategory],
|
|
455
|
-
]);
|
|
456
|
-
const multiRows = logisticMultinomialResult.data.coefficients.slice(0, 10).map((coef) => [
|
|
457
|
-
coef.category,
|
|
458
|
-
coef.variable,
|
|
459
|
-
formatNumber(coef.coefficient),
|
|
460
|
-
formatNumber(coef.oddsRatio),
|
|
461
|
-
]);
|
|
462
|
-
renderTable('Regression — Multinomial Coefficients (top 10)', ['Category', 'Variable', 'Coef', 'Odds Ratio'], multiRows);
|
|
463
|
-
} else {
|
|
464
|
-
renderErrorTable('Regression — Multinomial Logistic (age_group)', logisticMultinomialResult.error);
|
|
465
|
-
}
|
|
466
|
-
|
|
467
|
-
const kmeansResult = await stats.kmeans({
|
|
468
|
-
data,
|
|
469
|
-
variables: ['weekly_hours_music', 'weekly_hours_movie', 'monthly_art_visits'],
|
|
470
|
-
k: 3,
|
|
471
|
-
randomState: 42,
|
|
472
|
-
maxIterations: 100,
|
|
473
|
-
});
|
|
474
|
-
if (kmeansResult.success) {
|
|
475
|
-
const clusterRows = Object.entries(kmeansResult.data.clusterSizes).map(
|
|
476
|
-
([cluster, size]) => [`Cluster ${cluster}`, String(size)]
|
|
477
|
-
);
|
|
478
|
-
renderKeyValueTable('Classify — K-Means (k=3)', [
|
|
479
|
-
['Inertia', kmeansResult.data.inertia.toFixed(2)],
|
|
480
|
-
['Iterations', String(kmeansResult.data.iterations)],
|
|
481
|
-
...clusterRows,
|
|
482
|
-
]);
|
|
483
|
-
} else {
|
|
484
|
-
renderErrorTable('Classify — K-Means (k=3)', kmeansResult.error);
|
|
485
|
-
}
|
|
486
|
-
|
|
487
|
-
const hierarchicalResult = await stats.hierarchicalCluster({
|
|
488
|
-
data: sampledData,
|
|
489
|
-
variables: ['weekly_hours_music', 'weekly_hours_movie', 'monthly_art_visits'],
|
|
490
|
-
method: 'ward',
|
|
491
|
-
metric: 'euclidean',
|
|
492
|
-
nClusters: 3,
|
|
493
|
-
});
|
|
494
|
-
if (hierarchicalResult.success) {
|
|
495
|
-
const hierarchicalRows = Object.entries(hierarchicalResult.data.clusterSizes).map(
|
|
496
|
-
([cluster, size]) => [`Cluster ${cluster}`, String(size)]
|
|
497
|
-
);
|
|
498
|
-
renderKeyValueTable('Classify — Hierarchical Cluster (n=3)', [
|
|
499
|
-
['Clusters', String(hierarchicalResult.data.nClusters)],
|
|
500
|
-
...hierarchicalRows,
|
|
501
|
-
]);
|
|
502
|
-
} else {
|
|
503
|
-
renderErrorTable('Classify — Hierarchical Cluster (n=3)', hierarchicalResult.error);
|
|
504
|
-
}
|
|
505
|
-
|
|
506
|
-
const pcaResult = await stats.pca({
|
|
507
|
-
data,
|
|
508
|
-
variables: [
|
|
509
|
-
'music_satisfaction',
|
|
510
|
-
'movie_satisfaction',
|
|
511
|
-
'art_satisfaction',
|
|
512
|
-
'weekly_hours_music',
|
|
513
|
-
'weekly_hours_movie',
|
|
514
|
-
'monthly_art_visits',
|
|
515
|
-
],
|
|
516
|
-
nComponents: 3,
|
|
517
|
-
standardize: true,
|
|
518
|
-
});
|
|
519
|
-
if (pcaResult.success) {
|
|
520
|
-
const pcaRows = pcaResult.data.explainedVarianceRatio.map((value, index) => [
|
|
521
|
-
`PC${index + 1}`,
|
|
522
|
-
formatNumber(value),
|
|
523
|
-
]);
|
|
524
|
-
renderTable('Dimension Reduction — PCA (top 3 components)', ['Component', 'Explained Variance Ratio'], pcaRows);
|
|
525
|
-
} else {
|
|
526
|
-
renderErrorTable('Dimension Reduction — PCA (top 3 components)', pcaResult.error);
|
|
527
|
-
}
|
|
528
|
-
|
|
529
|
-
const efaResult = await stats.efa({
|
|
530
|
-
data,
|
|
531
|
-
variables: [
|
|
532
|
-
'music_satisfaction',
|
|
533
|
-
'movie_satisfaction',
|
|
534
|
-
'art_satisfaction',
|
|
535
|
-
'weekly_hours_music',
|
|
536
|
-
'weekly_hours_movie',
|
|
537
|
-
'monthly_art_visits',
|
|
538
|
-
],
|
|
539
|
-
nFactors: 3,
|
|
540
|
-
rotation: 'varimax',
|
|
541
|
-
});
|
|
542
|
-
if (efaResult.success) {
|
|
543
|
-
const efaHeaders = ['Variable', 'Factor 1', 'Factor 2', 'Factor 3'];
|
|
544
|
-
const efaRows = Object.entries(efaResult.data.loadings).map(([variable, loadings]) => [
|
|
545
|
-
variable,
|
|
546
|
-
formatNumber(loadings[0]),
|
|
547
|
-
formatNumber(loadings[1]),
|
|
548
|
-
formatNumber(loadings[2]),
|
|
549
|
-
]);
|
|
550
|
-
renderTable('Dimension Reduction — EFA Loadings (varimax, 3 factors)', efaHeaders, efaRows);
|
|
551
|
-
} else {
|
|
552
|
-
renderErrorTable('Dimension Reduction — EFA Loadings (varimax, 3 factors)', efaResult.error);
|
|
553
|
-
}
|
|
554
|
-
|
|
555
|
-
setStatus('Running MDS on sampled data (300 rows)...');
|
|
556
|
-
const mdsResult = await stats.mds({
|
|
557
|
-
data: sampledData,
|
|
558
|
-
variables: [
|
|
559
|
-
'music_satisfaction',
|
|
560
|
-
'movie_satisfaction',
|
|
561
|
-
'art_satisfaction',
|
|
562
|
-
'weekly_hours_music',
|
|
563
|
-
'weekly_hours_movie',
|
|
564
|
-
'monthly_art_visits',
|
|
565
|
-
],
|
|
566
|
-
nComponents: 2,
|
|
567
|
-
metric: true,
|
|
568
|
-
maxIterations: 100,
|
|
569
|
-
randomState: 42,
|
|
570
|
-
});
|
|
571
|
-
if (mdsResult.success) {
|
|
572
|
-
renderKeyValueTable('Dimension Reduction — MDS Summary', [
|
|
573
|
-
['Stress', formatNumber(mdsResult.data.stress)],
|
|
574
|
-
['Components', String(mdsResult.data.nComponents)],
|
|
575
|
-
]);
|
|
576
|
-
const mdsRows = mdsResult.data.coordinates.slice(0, 5).map((row, index) => [
|
|
577
|
-
String(index + 1),
|
|
578
|
-
formatNumber(row[0]),
|
|
579
|
-
formatNumber(row[1]),
|
|
580
|
-
]);
|
|
581
|
-
renderTable('Dimension Reduction — MDS Coordinates (first 5, sample 300)', ['Index', 'Dim 1', 'Dim 2'], mdsRows);
|
|
582
|
-
} else {
|
|
583
|
-
renderErrorTable('Dimension Reduction — MDS', mdsResult.error);
|
|
584
|
-
}
|
|
585
|
-
|
|
586
|
-
const alphaResult = await stats.cronbachAlpha({
|
|
587
|
-
data,
|
|
588
|
-
items: ['music_satisfaction', 'movie_satisfaction', 'art_satisfaction'],
|
|
589
|
-
});
|
|
590
|
-
if (alphaResult.success) {
|
|
591
|
-
renderKeyValueTable('Scale — Cronbach Alpha (satisfaction items)', [
|
|
592
|
-
['Alpha', formatNumber(alphaResult.data.alpha)],
|
|
593
|
-
['Standardized Alpha', formatNumber(alphaResult.data.standardizedAlpha)],
|
|
594
|
-
['Inter-item correlation mean', formatNumber(alphaResult.data.interItemCorrelationMean)],
|
|
595
|
-
['Items', String(alphaResult.data.nItems)],
|
|
596
|
-
['Observations', String(alphaResult.data.nObservations)],
|
|
597
|
-
]);
|
|
598
|
-
} else {
|
|
599
|
-
renderErrorTable('Scale — Cronbach Alpha (satisfaction items)', alphaResult.error);
|
|
600
|
-
}
|
|
601
|
-
|
|
602
|
-
setStatus('All analyses completed.');
|
|
603
|
-
|
|
604
|
-
} catch (err) {
|
|
605
|
-
setStatus('Error: ' + err.message);
|
|
606
|
-
} finally {
|
|
607
|
-
stats.destroy();
|
|
608
|
-
}
|
|
609
|
-
</script>
|
|
610
|
-
</body>
|
|
611
|
-
</html>
|
|
612
|
-
```
|
|
613
|
-
|
|
614
|
-
> **Tip:** Paste the JavaScript portion into the **JS panel** of CodePen (with the "JavaScript preprocessor" set to **None** or **Babel**) and the HTML into the **HTML panel**. The demo runs entirely in the browser.
|
|
615
|
-
>
|
|
616
|
-
> **Live Demo:** Try it out on CodePen: https://codepen.io/editor/YoungjuneKwon/pen/019d3c97-35c0-743c-ad43-78e02225b008
|
|
617
|
-
|
|
618
|
-
---
|
|
619
|
-
|
|
620
|
-
## API Reference
|
|
621
|
-
|
|
622
|
-
All analysis methods are async and return `Promise<AnalysisResult<T>>`:
|
|
623
|
-
|
|
624
|
-
```typescript
|
|
625
|
-
interface AnalysisResult<T> {
|
|
626
|
-
success: boolean;
|
|
627
|
-
data: T;
|
|
628
|
-
error?: string;
|
|
629
|
-
executionTimeMs: number;
|
|
630
|
-
}
|
|
631
|
-
```
|
|
632
|
-
|
|
633
|
-
### Lifecycle Methods
|
|
634
|
-
|
|
635
|
-
| Method | Description |
|
|
636
|
-
|---|---|
|
|
637
|
-
| `new InferentialStats(config)` | Create an instance. `config.workerUrl` is required. Optional: `config.pyodideUrl`, `config.eventTarget`. |
|
|
638
|
-
| `init(): Promise<void>` | Load Pyodide and install Python packages inside the Web Worker. |
|
|
639
|
-
| `isInitialized(): boolean` | Returns `true` if the worker is ready. |
|
|
640
|
-
| `destroy(): void` | Terminate the Web Worker and release resources. |
|
|
641
|
-
|
|
642
|
-
### Analysis Methods (16 total)
|
|
643
|
-
|
|
644
|
-
#### Descriptive Statistics
|
|
645
|
-
|
|
646
|
-
| # | Method | Input → Output | Description |
|
|
647
|
-
|---|---|---|---|
|
|
648
|
-
| 1 | `frequencies(input)` | `FrequenciesInput` → `FrequenciesOutput` | Frequency distribution and relative percentages for a categorical variable. |
|
|
649
|
-
| 2 | `descriptives(input)` | `DescriptivesInput` → `DescriptivesOutput` | Summary statistics (mean, std, min, max, quartiles, skewness, kurtosis) for numeric variables. |
|
|
650
|
-
| 3 | `crosstabs(input)` | `CrosstabsInput` → `CrosstabsOutput` | Cross-tabulation with observed/expected counts, Chi-square test, and Cramér's V. |
|
|
651
|
-
|
|
652
|
-
#### Compare Means
|
|
653
|
-
|
|
654
|
-
| # | Method | Input → Output | Description |
|
|
655
|
-
|---|---|---|---|
|
|
656
|
-
| 4 | `ttestIndependent(input)` | `TTestIndependentInput` → `TTestIndependentOutput` | Independent-samples t-test with Levene's equality-of-variances test. |
|
|
657
|
-
| 5 | `ttestPaired(input)` | `TTestPairedInput` → `TTestPairedOutput` | Paired-samples t-test for dependent observations. |
|
|
658
|
-
| 6 | `anovaOneway(input)` | `AnovaInput` → `AnovaOutput` | One-way ANOVA with group descriptives and eta-squared effect size. |
|
|
659
|
-
| 7 | `posthocTukey(input)` | `PostHocInput` → `PostHocOutput` | Post-hoc Tukey HSD pairwise comparisons following ANOVA. |
|
|
660
|
-
|
|
661
|
-
#### Regression
|
|
662
|
-
|
|
663
|
-
| # | Method | Input → Output | Description |
|
|
664
|
-
|---|---|---|---|
|
|
665
|
-
| 8 | `linearRegression(input)` | `LinearRegressionInput` → `LinearRegressionOutput` | OLS linear regression with coefficients, R², F-test, and Durbin-Watson statistic. |
|
|
666
|
-
| 9 | `logisticBinary(input)` | `LogisticBinaryInput` → `LogisticBinaryOutput` | Binary logistic regression with odds ratios, pseudo-R², and model fit statistics. |
|
|
667
|
-
| 10 | `logisticMultinomial(input)` | `MultinomialLogisticInput` → `MultinomialLogisticOutput` | Multinomial logistic regression with per-category coefficients and odds ratios. |
|
|
668
|
-
|
|
669
|
-
#### Classify
|
|
670
|
-
|
|
671
|
-
| # | Method | Input → Output | Description |
|
|
672
|
-
|---|---|---|---|
|
|
673
|
-
| 11 | `kmeans(input)` | `KMeansInput` → `KMeansOutput` | K-Means clustering with cluster centers, labels, and inertia. |
|
|
674
|
-
| 12 | `hierarchicalCluster(input)` | `HierarchicalClusterInput` → `HierarchicalClusterOutput` | Agglomerative hierarchical clustering with linkage matrix and dendrogram data. |
|
|
675
|
-
|
|
676
|
-
#### Dimension Reduction
|
|
677
|
-
|
|
678
|
-
| # | Method | Input → Output | Description |
|
|
679
|
-
|---|---|---|---|
|
|
680
|
-
| 13 | `efa(input)` | `EFAInput` → `EFAOutput` | Exploratory Factor Analysis with rotation, KMO, and Bartlett's test. |
|
|
681
|
-
| 14 | `pca(input)` | `PCAInput` → `PCAOutput` | Principal Component Analysis with loadings and explained variance. |
|
|
682
|
-
| 15 | `mds(input)` | `MDSInput` → `MDSOutput` | Multidimensional Scaling with stress value and coordinate output. |
|
|
683
|
-
|
|
684
|
-
#### Scale
|
|
685
|
-
|
|
686
|
-
| # | Method | Input → Output | Description |
|
|
687
|
-
|---|---|---|---|
|
|
688
|
-
| 16 | `cronbachAlpha(input)` | `CronbachAlphaInput` → `CronbachAlphaOutput` | Reliability analysis with Cronbach's alpha, item-total correlations, and alpha-if-deleted. |
|
|
689
|
-
|
|
690
|
-
---
|
|
691
|
-
|
|
692
69
|
## Core Analysis Features — Mathematical & Technical Documentation
|
|
693
70
|
|
|
694
71
|
This section documents the mathematical foundations and internal Python implementations of all 16 analyses.
|
|
695
72
|
|
|
73
|
+
> **Note on math rendering:** Equations are rendered as images via `latex.codecogs.com` so they display correctly on npm.
|
|
74
|
+
|
|
696
75
|
---
|
|
697
76
|
|
|
698
77
|
### ① Descriptive Statistics
|
|
@@ -705,9 +84,9 @@ Computes a frequency distribution for a categorical variable, including absolute
|
|
|
705
84
|
|
|
706
85
|
**Relative frequency:**
|
|
707
86
|
|
|
708
|
-
|
|
87
|
+

|
|
709
88
|
|
|
710
|
-
where
|
|
89
|
+
where  is the count of category  and  is the total number of observations. Cumulative percentage is the running sum of .
|
|
711
90
|
|
|
712
91
|
---
|
|
713
92
|
|
|
@@ -719,19 +98,19 @@ Produces summary statistics for one or more numeric variables: count, mean, stan
|
|
|
719
98
|
|
|
720
99
|
**Arithmetic mean:**
|
|
721
100
|
|
|
722
|
-
|
|
101
|
+

|
|
723
102
|
|
|
724
103
|
**Sample standard deviation (Bessel-corrected):**
|
|
725
104
|
|
|
726
|
-
|
|
105
|
+
%5E2%7D)
|
|
727
106
|
|
|
728
107
|
**Skewness (Fisher):**
|
|
729
108
|
|
|
730
|
-
|
|
109
|
+
%5Ek)
|
|
731
110
|
|
|
732
111
|
**Excess kurtosis (Fisher):**
|
|
733
112
|
|
|
734
|
-
|
|
113
|
+

|
|
735
114
|
|
|
736
115
|
---
|
|
737
116
|
|
|
@@ -743,15 +122,15 @@ Cross-tabulates two categorical variables and tests for independence using Pears
|
|
|
743
122
|
|
|
744
123
|
**Pearson's Chi-square statistic:**
|
|
745
124
|
|
|
746
|
-
|
|
125
|
+
%5E2%7D%7BE_%7Bij%7D%7D)
|
|
747
126
|
|
|
748
|
-
where
|
|
127
|
+
where  is the observed frequency in cell () and  is the expected frequency under independence.
|
|
749
128
|
|
|
750
129
|
**Cramér's V:**
|
|
751
130
|
|
|
752
|
-
|
|
131
|
+
%7D%7D)
|
|
753
132
|
|
|
754
|
-
where
|
|
133
|
+
where ).
|
|
755
134
|
|
|
756
135
|
---
|
|
757
136
|
|
|
@@ -765,15 +144,15 @@ Compares the means of a numeric variable between two independent groups. Automat
|
|
|
765
144
|
|
|
766
145
|
**T-statistic (equal variance assumed):**
|
|
767
146
|
|
|
768
|
-
|
|
147
|
+

|
|
769
148
|
|
|
770
149
|
**Pooled standard deviation:**
|
|
771
150
|
|
|
772
|
-
|
|
151
|
+
s_1%5E2%2B(n_2-1)s_2%5E2%7D%7Bn_1%2Bn_2-2%7D%7D)
|
|
773
152
|
|
|
774
|
-
**Degrees of freedom:**
|
|
153
|
+
**Degrees of freedom:** 
|
|
775
154
|
|
|
776
|
-
When Levene's test is significant (
|
|
155
|
+
When Levene's test is significant (), Welch's t-test is recommended, which uses the Welch–Satterthwaite approximation for degrees of freedom.
|
|
777
156
|
|
|
778
157
|
---
|
|
779
158
|
|
|
@@ -785,11 +164,11 @@ Tests whether the mean difference between two paired measurements is significant
|
|
|
785
164
|
|
|
786
165
|
**T-statistic:**
|
|
787
166
|
|
|
788
|
-
|
|
167
|
+

|
|
789
168
|
|
|
790
|
-
where
|
|
169
|
+
where ) is the mean difference and  is the standard deviation of the differences.
|
|
791
170
|
|
|
792
|
-
**Degrees of freedom:**
|
|
171
|
+
**Degrees of freedom:** 
|
|
793
172
|
|
|
794
173
|
---
|
|
795
174
|
|
|
@@ -801,23 +180,23 @@ Tests whether the means of a numeric variable differ significantly across three
|
|
|
801
180
|
|
|
802
181
|
**F-statistic:**
|
|
803
182
|
|
|
804
|
-
|
|
183
|
+

|
|
805
184
|
|
|
806
185
|
**Sum of Squares Between Groups:**
|
|
807
186
|
|
|
808
|
-
|
|
187
|
+
%5E2)
|
|
809
188
|
|
|
810
189
|
**Sum of Squares Within Groups:**
|
|
811
190
|
|
|
812
|
-
|
|
191
|
+
%5E2)
|
|
813
192
|
|
|
814
193
|
**Mean Squares:**
|
|
815
194
|
|
|
816
|
-
|
|
195
|
+

|
|
817
196
|
|
|
818
197
|
**Effect size (Eta-squared):**
|
|
819
198
|
|
|
820
|
-
|
|
199
|
+

|
|
821
200
|
|
|
822
201
|
---
|
|
823
202
|
|
|
@@ -829,9 +208,9 @@ Performs pairwise comparisons of group means following a significant ANOVA resul
|
|
|
829
208
|
|
|
830
209
|
**Studentized range statistic:**
|
|
831
210
|
|
|
832
|
-
|
|
211
|
+

|
|
833
212
|
|
|
834
|
-
where
|
|
213
|
+
where  is the within-group mean square from the ANOVA and  is the harmonic mean of group sizes. The critical  value is obtained from the Studentized Range distribution with  groups and  degrees of freedom.
|
|
835
214
|
|
|
836
215
|
---
|
|
837
216
|
|
|
@@ -839,43 +218,43 @@ where $MS_W$ is the within-group mean square from the ANOVA and $n$ is the harmo
|
|
|
839
218
|
|
|
840
219
|
#### Linear Regression (OLS)
|
|
841
220
|
|
|
842
|
-
Fits an Ordinary Least Squares regression model with one or more independent variables. Reports regression coefficients, standard errors, t-statistics, p-values, confidence intervals,
|
|
221
|
+
Fits an Ordinary Least Squares regression model with one or more independent variables. Reports regression coefficients, standard errors, t-statistics, p-values, confidence intervals, , adjusted , F-test, and the Durbin-Watson statistic for autocorrelation detection.
|
|
843
222
|
|
|
844
223
|
**Python implementation:** `statsmodels.api.OLS`
|
|
845
224
|
|
|
846
225
|
**Model:**
|
|
847
226
|
|
|
848
|
-
|
|
227
|
+

|
|
849
228
|
|
|
850
|
-
where
|
|
229
|
+
where ).
|
|
851
230
|
|
|
852
231
|
**OLS estimator:**
|
|
853
232
|
|
|
854
|
-
|
|
233
|
+
%5E%7B-1%7DX%5ETY)
|
|
855
234
|
|
|
856
235
|
**Coefficient of determination:**
|
|
857
236
|
|
|
858
|
-
|
|
237
|
+

|
|
859
238
|
|
|
860
|
-
where
|
|
239
|
+
where %5E2) and %5E2).
|
|
861
240
|
|
|
862
241
|
---
|
|
863
242
|
|
|
864
243
|
#### Binary Logistic Regression
|
|
865
244
|
|
|
866
|
-
Models the probability of a binary outcome as a function of one or more independent variables. Reports coefficients (log-odds), odds ratios, z-statistics, p-values, pseudo
|
|
245
|
+
Models the probability of a binary outcome as a function of one or more independent variables. Reports coefficients (log-odds), odds ratios, z-statistics, p-values, pseudo-, AIC, and BIC.
|
|
867
246
|
|
|
868
247
|
**Python implementation:** `statsmodels.discrete.discrete_model.Logit`
|
|
869
248
|
|
|
870
249
|
**Logit link function:**
|
|
871
250
|
|
|
872
|
-
|
|
251
|
+
%3D%5Cbeta_0%2B%5Cbeta_1X_1%2B%5Ccdots%2B%5Cbeta_pX_p)
|
|
873
252
|
|
|
874
253
|
**Predicted probability:**
|
|
875
254
|
|
|
876
|
-
|
|
255
|
+
%3D%5Cfrac%7B1%7D%7B1%2Be%5E%7B-(%5Cbeta_0%2B%5Cbeta_1X_1%2B%5Ccdots%2B%5Cbeta_pX_p)%7D%7D)
|
|
877
256
|
|
|
878
|
-
Coefficients are estimated by Maximum Likelihood Estimation (MLE). The odds ratio for predictor
|
|
257
|
+
Coefficients are estimated by Maximum Likelihood Estimation (MLE). The odds ratio for predictor j is .
|
|
879
258
|
|
|
880
259
|
---
|
|
881
260
|
|
|
@@ -885,15 +264,15 @@ Extends binary logistic regression to outcomes with more than two unordered cate
|
|
|
885
264
|
|
|
886
265
|
**Python implementation:** `sklearn.linear_model.LogisticRegression(multi_class='multinomial')`
|
|
887
266
|
|
|
888
|
-
**Log-odds relative to reference category
|
|
267
|
+
**Log-odds relative to reference category :**
|
|
889
268
|
|
|
890
|
-
|
|
269
|
+
%7D%7BP(Y%3DK)%7D%5Cright)%3D%5Cbeta_%7Bk0%7D%2B%5Cbeta_%7Bk1%7DX_1%2B%5Ccdots%2B%5Cbeta_%7Bkp%7DX_p)
|
|
891
270
|
|
|
892
|
-
for each category
|
|
271
|
+
for each category .
|
|
893
272
|
|
|
894
273
|
**Predicted probability via softmax:**
|
|
895
274
|
|
|
896
|
-
|
|
275
|
+
%3D%5Cfrac%7Be%5E%7B%5Cbeta_%7Bk0%7D%2B%5Cbeta_%7Bk1%7DX_1%2B%5Ccdots%2B%5Cbeta_%7Bkp%7DX_p%7D%7D%7B%5Csum_%7Bj%3D1%7D%5E%7BK%7De%5E%7B%5Cbeta_%7Bj0%7D%2B%5Cbeta_%7Bj1%7DX_1%2B%5Ccdots%2B%5Cbeta_%7Bjp%7DX_p%7D%7D)
|
|
897
276
|
|
|
898
277
|
---
|
|
899
278
|
|
|
@@ -901,15 +280,15 @@ $$P(Y=k|X) = \frac{e^{\beta_{k0} + \beta_{k1}X_1 + \cdots + \beta_{kp}X_p}}{\sum
|
|
|
901
280
|
|
|
902
281
|
#### K-Means Clustering
|
|
903
282
|
|
|
904
|
-
Partitions observations into
|
|
283
|
+
Partitions observations into  clusters by iteratively assigning points to the nearest centroid and updating centroids until convergence.
|
|
905
284
|
|
|
906
285
|
**Python implementation:** `sklearn.cluster.KMeans`
|
|
907
286
|
|
|
908
287
|
**Objective function (inertia):**
|
|
909
288
|
|
|
910
|
-
|
|
289
|
+

|
|
911
290
|
|
|
912
|
-
where
|
|
291
|
+
where  is the set of observations in cluster j and  is the centroid. The algorithm minimizes J using Lloyd's algorithm (Expectation-Maximization style).
|
|
913
292
|
|
|
914
293
|
---
|
|
915
294
|
|
|
@@ -921,9 +300,9 @@ Builds a hierarchy of clusters using a bottom-up approach. Supports Ward, comple
|
|
|
921
300
|
|
|
922
301
|
**Ward's minimum variance method** (default):
|
|
923
302
|
|
|
924
|
-
|
|
303
|
+
%3D%5Cfrac%7Bn_A%20n_B%7D%7Bn_A%2Bn_B%7D%5C%7C%5Cbar%7Bx%7D_A-%5Cbar%7Bx%7D_B%5C%7C%5E2)
|
|
925
304
|
|
|
926
|
-
At each step, the pair of clusters
|
|
305
|
+
At each step, the pair of clusters (A, B) that produces the smallest increase in total within-cluster variance is merged. Ward's method tends to produce compact, equally sized clusters.
|
|
927
306
|
|
|
928
307
|
---
|
|
929
308
|
|
|
@@ -937,15 +316,15 @@ Discovers latent factors underlying a set of observed variables. Supports varima
|
|
|
937
316
|
|
|
938
317
|
**Factor model:**
|
|
939
318
|
|
|
940
|
-
|
|
319
|
+

|
|
941
320
|
|
|
942
|
-
where
|
|
321
|
+
where  is the observed variable vector,  is the matrix of factor loadings,  is the vector of latent factors, and  is the unique variance.
|
|
943
322
|
|
|
944
323
|
**Kaiser-Meyer-Olkin (KMO) measure:**
|
|
945
324
|
|
|
946
|
-
|
|
325
|
+

|
|
947
326
|
|
|
948
|
-
where
|
|
327
|
+
where  are elements of the correlation matrix and  are elements of the partial correlation matrix. KMO values above 0.6 are generally considered acceptable for factor analysis.
|
|
949
328
|
|
|
950
329
|
---
|
|
951
330
|
|
|
@@ -955,15 +334,15 @@ Finds orthogonal components that maximize variance in the data. Reports componen
|
|
|
955
334
|
|
|
956
335
|
**Python implementation:** `sklearn.decomposition.PCA`
|
|
957
336
|
|
|
958
|
-
**Objective:** Find the weight vector
|
|
337
|
+
**Objective:** Find the weight vector  that maximizes projected variance:
|
|
959
338
|
|
|
960
|
-
|
|
339
|
+
%5Cto%5Cmax%5Cquad%5Ctext%7Bsubject%20to%7D%5Cquad%5C%7Cw%5C%7C%3D1)
|
|
961
340
|
|
|
962
|
-
This is equivalent to finding the eigenvectors of the covariance matrix
|
|
341
|
+
This is equivalent to finding the eigenvectors of the covariance matrix . The eigenvalues  represent the variance explained by each component.
|
|
963
342
|
|
|
964
343
|
**Explained variance ratio:**
|
|
965
344
|
|
|
966
|
-
|
|
345
|
+

|
|
967
346
|
|
|
968
347
|
---
|
|
969
348
|
|
|
@@ -975,9 +354,9 @@ Projects high-dimensional data into a lower-dimensional space (typically 2D) whi
|
|
|
975
354
|
|
|
976
355
|
**Stress function (Kruskal's Stress-1):**
|
|
977
356
|
|
|
978
|
-
|
|
357
|
+
%5E2%7D%7B%5Csum_%7Bi%3Cj%7Dd_%7Bij%7D%5E2%7D%7D)
|
|
979
358
|
|
|
980
|
-
where
|
|
359
|
+
where  is the distance in the reduced space and  is the original distance (or a monotonic transformation for non-metric MDS). A stress value below 0.1 is generally considered a good fit.
|
|
981
360
|
|
|
982
361
|
---
|
|
983
362
|
|
|
@@ -991,17 +370,17 @@ Measures the internal consistency (reliability) of a set of scale items. Reports
|
|
|
991
370
|
|
|
992
371
|
**Cronbach's alpha (raw):**
|
|
993
372
|
|
|
994
|
-
|
|
373
|
+
)
|
|
995
374
|
|
|
996
|
-
where
|
|
375
|
+
where  is the number of items,  is the variance of item i, and  is the variance of the total score.
|
|
997
376
|
|
|
998
377
|
**Standardized alpha (based on mean inter-item correlation):**
|
|
999
378
|
|
|
1000
|
-
|
|
379
|
+
%5Cbar%7Br%7D%7D)
|
|
1001
380
|
|
|
1002
|
-
where
|
|
381
|
+
where  is the mean of all pairwise Pearson correlations among items.
|
|
1003
382
|
|
|
1004
|
-
|
|
|
383
|
+
| Alpha Range | Interpretation |
|
|
1005
384
|
|---|---|
|
|
1006
385
|
| ≥ 0.9 | Excellent |
|
|
1007
386
|
| 0.8 – 0.9 | Good |
|
|
@@ -1011,6 +390,145 @@ where $\bar{r}$ is the mean of all pairwise Pearson correlations among items.
|
|
|
1011
390
|
|
|
1012
391
|
---
|
|
1013
392
|
|
|
393
|
+
## Installation
|
|
394
|
+
|
|
395
|
+
```bash
|
|
396
|
+
npm install @winm2m/inferential-stats-js
|
|
397
|
+
```
|
|
398
|
+
|
|
399
|
+
> **Peer dependency (optional):** If you want explicit control over the Pyodide version, install `pyodide` (>= 0.26.0) as a peer dependency. Otherwise the SDK loads Pyodide from the jsDelivr CDN automatically.
|
|
400
|
+
|
|
401
|
+
---
|
|
402
|
+
|
|
403
|
+
## Quick Start
|
|
404
|
+
|
|
405
|
+
```typescript
|
|
406
|
+
import { InferentialStats, PROGRESS_EVENT_NAME } from '@winm2m/inferential-stats-js';
|
|
407
|
+
|
|
408
|
+
// 1. Listen for initialization progress
|
|
409
|
+
window.addEventListener(PROGRESS_EVENT_NAME, (e: Event) => {
|
|
410
|
+
const { stage, progress, message } = (e as CustomEvent).detail;
|
|
411
|
+
console.log(`[${stage}] ${progress}% — ${message}`);
|
|
412
|
+
});
|
|
413
|
+
|
|
414
|
+
// 2. Create an instance (pass the URL to the bundled worker)
|
|
415
|
+
const stats = new InferentialStats({
|
|
416
|
+
workerUrl: new URL('@winm2m/inferential-stats-js/worker', import.meta.url).href,
|
|
417
|
+
});
|
|
418
|
+
|
|
419
|
+
// 3. Initialize (loads Pyodide + Python packages inside the worker)
|
|
420
|
+
await stats.init();
|
|
421
|
+
|
|
422
|
+
// 4. Prepare your data
|
|
423
|
+
const data = [
|
|
424
|
+
{ group: 'A', score: 85 },
|
|
425
|
+
{ group: 'A', score: 90 },
|
|
426
|
+
{ group: 'B', score: 78 },
|
|
427
|
+
{ group: 'B', score: 82 },
|
|
428
|
+
// ... more rows
|
|
429
|
+
];
|
|
430
|
+
|
|
431
|
+
// 5. Run an analysis
|
|
432
|
+
const result = await stats.anovaOneway({
|
|
433
|
+
data,
|
|
434
|
+
variable: 'score',
|
|
435
|
+
groupVariable: 'group',
|
|
436
|
+
});
|
|
437
|
+
|
|
438
|
+
console.log(result);
|
|
439
|
+
// {
|
|
440
|
+
// success: true,
|
|
441
|
+
// data: { fStatistic: ..., pValue: ..., groupStats: [...], ... },
|
|
442
|
+
// executionTimeMs: 42
|
|
443
|
+
// }
|
|
444
|
+
|
|
445
|
+
// 6. Clean up when done
|
|
446
|
+
stats.destroy();
|
|
447
|
+
```
|
|
448
|
+
|
|
449
|
+
---
|
|
450
|
+
|
|
451
|
+
## CDN / CodePen Usage
|
|
452
|
+
|
|
453
|
+
You can use the SDK directly in a browser or CodePen with no build step. The full demo code is identical to the local page below (except for CDN import paths).
|
|
454
|
+
|
|
455
|
+
- **Local demo source:** `src/dev/demo.html`
|
|
456
|
+
- **CodePen live demo:** https://codepen.io/editor/YoungjuneKwon/pen/019d3c97-35c0-743c-ad43-78e02225b008
|
|
457
|
+
|
|
458
|
+
---
|
|
459
|
+
|
|
460
|
+
## API Reference
|
|
461
|
+
|
|
462
|
+
All analysis methods are async and return `Promise<AnalysisResult<T>>`:
|
|
463
|
+
|
|
464
|
+
```typescript
|
|
465
|
+
interface AnalysisResult<T> {
|
|
466
|
+
success: boolean;
|
|
467
|
+
data: T;
|
|
468
|
+
error?: string;
|
|
469
|
+
executionTimeMs: number;
|
|
470
|
+
}
|
|
471
|
+
```
|
|
472
|
+
|
|
473
|
+
### Lifecycle Methods
|
|
474
|
+
|
|
475
|
+
| Method | Description |
|
|
476
|
+
|---|---|
|
|
477
|
+
| `new InferentialStats(config)` | Create an instance. `config.workerUrl` is required. Optional: `config.pyodideUrl`, `config.eventTarget`. |
|
|
478
|
+
| `init(): Promise<void>` | Load Pyodide and install Python packages inside the Web Worker. |
|
|
479
|
+
| `isInitialized(): boolean` | Returns `true` if the worker is ready. |
|
|
480
|
+
| `destroy(): void` | Terminate the Web Worker and release resources. |
|
|
481
|
+
|
|
482
|
+
### Analysis Methods (16 total)
|
|
483
|
+
|
|
484
|
+
#### Descriptive Statistics
|
|
485
|
+
|
|
486
|
+
| # | Method | Input → Output | Description |
|
|
487
|
+
|---|---|---|---|
|
|
488
|
+
| 1 | `frequencies(input)` | `FrequenciesInput` → `FrequenciesOutput` | Frequency distribution and relative percentages for a categorical variable. |
|
|
489
|
+
| 2 | `descriptives(input)` | `DescriptivesInput` → `DescriptivesOutput` | Summary statistics (mean, std, min, max, quartiles, skewness, kurtosis) for numeric variables. |
|
|
490
|
+
| 3 | `crosstabs(input)` | `CrosstabsInput` → `CrosstabsOutput` | Cross-tabulation with observed/expected counts, Chi-square test, and Cramér's V. |
|
|
491
|
+
|
|
492
|
+
#### Compare Means
|
|
493
|
+
|
|
494
|
+
| # | Method | Input → Output | Description |
|
|
495
|
+
|---|---|---|---|
|
|
496
|
+
| 4 | `ttestIndependent(input)` | `TTestIndependentInput` → `TTestIndependentOutput` | Independent-samples t-test with Levene's equality-of-variances test. |
|
|
497
|
+
| 5 | `ttestPaired(input)` | `TTestPairedInput` → `TTestPairedOutput` | Paired-samples t-test for dependent observations. |
|
|
498
|
+
| 6 | `anovaOneway(input)` | `AnovaInput` → `AnovaOutput` | One-way ANOVA with group descriptives and eta-squared effect size. |
|
|
499
|
+
| 7 | `posthocTukey(input)` | `PostHocInput` → `PostHocOutput` | Post-hoc Tukey HSD pairwise comparisons following ANOVA. |
|
|
500
|
+
|
|
501
|
+
#### Regression
|
|
502
|
+
|
|
503
|
+
| # | Method | Input → Output | Description |
|
|
504
|
+
|---|---|---|---|
|
|
505
|
+
| 8 | `linearRegression(input)` | `LinearRegressionInput` → `LinearRegressionOutput` | OLS linear regression with coefficients, R², F-test, and Durbin-Watson statistic. |
|
|
506
|
+
| 9 | `logisticBinary(input)` | `LogisticBinaryInput` → `LogisticBinaryOutput` | Binary logistic regression with odds ratios, pseudo-R², and model fit statistics. |
|
|
507
|
+
| 10 | `logisticMultinomial(input)` | `MultinomialLogisticInput` → `MultinomialLogisticOutput` | Multinomial logistic regression with per-category coefficients and odds ratios. |
|
|
508
|
+
|
|
509
|
+
#### Classify
|
|
510
|
+
|
|
511
|
+
| # | Method | Input → Output | Description |
|
|
512
|
+
|---|---|---|---|
|
|
513
|
+
| 11 | `kmeans(input)` | `KMeansInput` → `KMeansOutput` | K-Means clustering with cluster centers, labels, and inertia. |
|
|
514
|
+
| 12 | `hierarchicalCluster(input)` | `HierarchicalClusterInput` → `HierarchicalClusterOutput` | Agglomerative hierarchical clustering with linkage matrix and dendrogram data. |
|
|
515
|
+
|
|
516
|
+
#### Dimension Reduction
|
|
517
|
+
|
|
518
|
+
| # | Method | Input → Output | Description |
|
|
519
|
+
|---|---|---|---|
|
|
520
|
+
| 13 | `efa(input)` | `EFAInput` → `EFAOutput` | Exploratory Factor Analysis with rotation, KMO, and Bartlett's test. |
|
|
521
|
+
| 14 | `pca(input)` | `PCAInput` → `PCAOutput` | Principal Component Analysis with loadings and explained variance. |
|
|
522
|
+
| 15 | `mds(input)` | `MDSInput` → `MDSOutput` | Multidimensional Scaling with stress value and coordinate output. |
|
|
523
|
+
|
|
524
|
+
#### Scale
|
|
525
|
+
|
|
526
|
+
| # | Method | Input → Output | Description |
|
|
527
|
+
|---|---|---|---|
|
|
528
|
+
| 16 | `cronbachAlpha(input)` | `CronbachAlphaInput` → `CronbachAlphaOutput` | Reliability analysis with Cronbach's alpha, item-total correlations, and alpha-if-deleted. |
|
|
529
|
+
|
|
530
|
+
---
|
|
531
|
+
|
|
1014
532
|
## Sample Data
|
|
1015
533
|
|
|
1016
534
|
The repository includes a ready-to-use sample dataset at `docs/sample-survey-data.json`, also hosted on GitHub Pages at:
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@winm2m/inferential-stats-js",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.5",
|
|
4
4
|
"description": "A headless JavaScript SDK for advanced statistical analysis in the browser using WebAssembly (Pyodide). Performs SPSS-level inferential statistics entirely client-side with no backend required.",
|
|
5
5
|
"author": "Youngjune Kwon <yjkwon@winm2m.com>",
|
|
6
6
|
"license": "MIT",
|