nodepyx 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +22 -0
- package/README.md +399 -0
- package/binding.gyp +73 -0
- package/dist/core/PyCallable.d.ts +65 -0
- package/dist/core/PyCallable.d.ts.map +1 -0
- package/dist/core/PyCallable.js +109 -0
- package/dist/core/PyCallable.js.map +1 -0
- package/dist/core/PyContext.d.ts +76 -0
- package/dist/core/PyContext.d.ts.map +1 -0
- package/dist/core/PyContext.js +228 -0
- package/dist/core/PyContext.js.map +1 -0
- package/dist/core/PyIterator.d.ts +84 -0
- package/dist/core/PyIterator.d.ts.map +1 -0
- package/dist/core/PyIterator.js +243 -0
- package/dist/core/PyIterator.js.map +1 -0
- package/dist/core/PyModule.d.ts +55 -0
- package/dist/core/PyModule.d.ts.map +1 -0
- package/dist/core/PyModule.js +172 -0
- package/dist/core/PyModule.js.map +1 -0
- package/dist/core/PyProxy.d.ts +65 -0
- package/dist/core/PyProxy.d.ts.map +1 -0
- package/dist/core/PyProxy.js +483 -0
- package/dist/core/PyProxy.js.map +1 -0
- package/dist/core/PyRuntime.d.ts +105 -0
- package/dist/core/PyRuntime.d.ts.map +1 -0
- package/dist/core/PyRuntime.js +438 -0
- package/dist/core/PyRuntime.js.map +1 -0
- package/dist/env/CondaManager.d.ts +118 -0
- package/dist/env/CondaManager.d.ts.map +1 -0
- package/dist/env/CondaManager.js +401 -0
- package/dist/env/CondaManager.js.map +1 -0
- package/dist/env/PackageInstaller.d.ts +233 -0
- package/dist/env/PackageInstaller.d.ts.map +1 -0
- package/dist/env/PackageInstaller.js +609 -0
- package/dist/env/PackageInstaller.js.map +1 -0
- package/dist/env/PythonDetector.d.ts +103 -0
- package/dist/env/PythonDetector.d.ts.map +1 -0
- package/dist/env/PythonDetector.js +381 -0
- package/dist/env/PythonDetector.js.map +1 -0
- package/dist/env/VenvManager.d.ts +117 -0
- package/dist/env/VenvManager.d.ts.map +1 -0
- package/dist/env/VenvManager.js +331 -0
- package/dist/env/VenvManager.js.map +1 -0
- package/dist/index.d.ts +169 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +393 -0
- package/dist/index.js.map +1 -0
- package/dist/plugins/Plugin.interface.d.ts +41 -0
- package/dist/plugins/Plugin.interface.d.ts.map +1 -0
- package/dist/plugins/Plugin.interface.js +12 -0
- package/dist/plugins/Plugin.interface.js.map +1 -0
- package/dist/plugins/PluginManager.d.ts +26 -0
- package/dist/plugins/PluginManager.d.ts.map +1 -0
- package/dist/plugins/PluginManager.js +174 -0
- package/dist/plugins/PluginManager.js.map +1 -0
- package/dist/plugins/builtin/NumpyPlugin.d.ts +17 -0
- package/dist/plugins/builtin/NumpyPlugin.d.ts.map +1 -0
- package/dist/plugins/builtin/NumpyPlugin.js +41 -0
- package/dist/plugins/builtin/NumpyPlugin.js.map +1 -0
- package/dist/plugins/builtin/PandasPlugin.d.ts +19 -0
- package/dist/plugins/builtin/PandasPlugin.d.ts.map +1 -0
- package/dist/plugins/builtin/PandasPlugin.js +57 -0
- package/dist/plugins/builtin/PandasPlugin.js.map +1 -0
- package/dist/plugins/builtin/TorchPlugin.d.ts +23 -0
- package/dist/plugins/builtin/TorchPlugin.d.ts.map +1 -0
- package/dist/plugins/builtin/TorchPlugin.js +50 -0
- package/dist/plugins/builtin/TorchPlugin.js.map +1 -0
- package/dist/plugins/index.d.ts +7 -0
- package/dist/plugins/index.d.ts.map +1 -0
- package/dist/plugins/index.js +12 -0
- package/dist/plugins/index.js.map +1 -0
- package/dist/serialization/DataFrameBridge.d.ts +141 -0
- package/dist/serialization/DataFrameBridge.d.ts.map +1 -0
- package/dist/serialization/DataFrameBridge.js +355 -0
- package/dist/serialization/DataFrameBridge.js.map +1 -0
- package/dist/serialization/MsgPackSerializer.d.ts +45 -0
- package/dist/serialization/MsgPackSerializer.d.ts.map +1 -0
- package/dist/serialization/MsgPackSerializer.js +242 -0
- package/dist/serialization/MsgPackSerializer.js.map +1 -0
- package/dist/serialization/NumpyBridge.d.ts +96 -0
- package/dist/serialization/NumpyBridge.d.ts.map +1 -0
- package/dist/serialization/NumpyBridge.js +323 -0
- package/dist/serialization/NumpyBridge.js.map +1 -0
- package/dist/serialization/Serializer.d.ts +78 -0
- package/dist/serialization/Serializer.d.ts.map +1 -0
- package/dist/serialization/Serializer.js +281 -0
- package/dist/serialization/Serializer.js.map +1 -0
- package/dist/types/PythonTypeMapper.d.ts +87 -0
- package/dist/types/PythonTypeMapper.d.ts.map +1 -0
- package/dist/types/PythonTypeMapper.js +449 -0
- package/dist/types/PythonTypeMapper.js.map +1 -0
- package/dist/types/StubCache.d.ts +109 -0
- package/dist/types/StubCache.d.ts.map +1 -0
- package/dist/types/StubCache.js +333 -0
- package/dist/types/StubCache.js.map +1 -0
- package/dist/types/TypeGenerator.d.ts +139 -0
- package/dist/types/TypeGenerator.d.ts.map +1 -0
- package/dist/types/TypeGenerator.js +372 -0
- package/dist/types/TypeGenerator.js.map +1 -0
- package/dist/types/addon.d.ts +114 -0
- package/dist/types/addon.d.ts.map +1 -0
- package/dist/types/addon.js +32 -0
- package/dist/types/addon.js.map +1 -0
- package/dist/types/config.d.ts +175 -0
- package/dist/types/config.d.ts.map +1 -0
- package/dist/types/config.js +35 -0
- package/dist/types/config.js.map +1 -0
- package/dist/types/index.d.ts +10 -0
- package/dist/types/index.d.ts.map +1 -0
- package/dist/types/index.js +12 -0
- package/dist/types/index.js.map +1 -0
- package/dist/types/python.d.ts +235 -0
- package/dist/types/python.d.ts.map +1 -0
- package/dist/types/python.js +7 -0
- package/dist/types/python.js.map +1 -0
- package/dist/utils/ErrorTranslator.d.ts +83 -0
- package/dist/utils/ErrorTranslator.d.ts.map +1 -0
- package/dist/utils/ErrorTranslator.js +210 -0
- package/dist/utils/ErrorTranslator.js.map +1 -0
- package/dist/utils/Logger.d.ts +27 -0
- package/dist/utils/Logger.d.ts.map +1 -0
- package/dist/utils/Logger.js +115 -0
- package/dist/utils/Logger.js.map +1 -0
- package/dist/utils/MemoryMonitor.d.ts +44 -0
- package/dist/utils/MemoryMonitor.d.ts.map +1 -0
- package/dist/utils/MemoryMonitor.js +143 -0
- package/dist/utils/MemoryMonitor.js.map +1 -0
- package/package.json +177 -0
- package/python/error_handler.py +433 -0
- package/python/nodepyx_runtime.py +575 -0
- package/python/serializer.py +379 -0
- package/python/type_inspector.py +288 -0
- package/scripts/build-native.js +68 -0
- package/scripts/download-prebuilds.js +99 -0
- package/scripts/generate-stubs.js +405 -0
- package/scripts/install.js +260 -0
- package/src/core/PyCallable.ts +137 -0
- package/src/core/PyContext.ts +296 -0
- package/src/core/PyIterator.ts +294 -0
- package/src/core/PyModule.ts +194 -0
- package/src/core/PyProxy.ts +605 -0
- package/src/core/PyRuntime.ts +504 -0
- package/src/env/CondaManager.ts +451 -0
- package/src/env/PackageInstaller.ts +738 -0
- package/src/env/PythonDetector.ts +414 -0
- package/src/env/VenvManager.ts +396 -0
- package/src/index.ts +425 -0
- package/src/native/gil_guard.cpp +26 -0
- package/src/native/gil_guard.h +175 -0
- package/src/native/nodepyx_addon.cpp +886 -0
- package/src/native/python_bridge.cpp +790 -0
- package/src/native/python_bridge.h +257 -0
- package/src/native/thread_pool.cpp +336 -0
- package/src/native/thread_pool.h +175 -0
- package/src/native/type_converter.cpp +901 -0
- package/src/native/type_converter.h +272 -0
- package/src/nextjs/PyProvider.tsx +123 -0
- package/src/nextjs/index.ts +21 -0
- package/src/nextjs/usePython.ts +106 -0
- package/src/nextjs/withnodepyx.ts +88 -0
- package/src/plugins/Plugin.interface.ts +51 -0
- package/src/plugins/PluginManager.ts +155 -0
- package/src/plugins/builtin/NumpyPlugin.ts +36 -0
- package/src/plugins/builtin/PandasPlugin.ts +49 -0
- package/src/plugins/builtin/TorchPlugin.ts +56 -0
- package/src/plugins/index.ts +7 -0
- package/src/serialization/DataFrameBridge.ts +398 -0
- package/src/serialization/MsgPackSerializer.ts +220 -0
- package/src/serialization/NumpyBridge.ts +332 -0
- package/src/serialization/Serializer.ts +320 -0
- package/src/types/PythonTypeMapper.ts +495 -0
- package/src/types/StubCache.ts +340 -0
- package/src/types/TypeGenerator.ts +491 -0
- package/src/types/addon.ts +170 -0
- package/src/types/config.ts +226 -0
- package/src/types/index.ts +55 -0
- package/src/types/python.ts +309 -0
- package/src/types/stubs/numpy.d.ts +441 -0
- package/src/types/stubs/pandas.d.ts +575 -0
- package/src/types/stubs/sklearn.d.ts +728 -0
- package/src/types/stubs/torch.d.ts +694 -0
- package/src/utils/ErrorTranslator.ts +220 -0
- package/src/utils/Logger.ts +119 -0
- package/src/utils/MemoryMonitor.ts +175 -0
|
@@ -0,0 +1,398 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* nodepyx — DataFrameBridge
|
|
3
|
+
* Converts Pandas DataFrames and Series to/from JavaScript objects.
|
|
4
|
+
*
|
|
5
|
+
* ─── Wire Protocol ──────────────────────────────────────────────────────────
|
|
6
|
+
*
|
|
7
|
+
* DataFrames use the "split" orientation of pandas JSON:
|
|
8
|
+
* {
|
|
9
|
+
* "columns": ["col1", "col2", ...],
|
|
10
|
+
* "data": [[row0_val0, row0_val1], [row1_val0, row1_val1], ...],
|
|
11
|
+
* "index": [0, 1, 2, ...],
|
|
12
|
+
* "dtypes": {"col1": "int64", "col2": "object", ...},
|
|
13
|
+
* "shape": [nrows, ncols]
|
|
14
|
+
* }
|
|
15
|
+
*
|
|
16
|
+
* For large DataFrames (>64KB) the JSON is embedded in a MessagePack
|
|
17
|
+
* binary to reduce parse overhead. The format field in SerializedValue
|
|
18
|
+
* tells the bridge which path to take:
|
|
19
|
+
* format === 'pandas_dataframe' → DataFrame decode
|
|
20
|
+
* format === 'pandas_series' → Series decode
|
|
21
|
+
*
|
|
22
|
+
* Series wire format:
|
|
23
|
+
* {
|
|
24
|
+
* "name": "my_series",
|
|
25
|
+
* "data": [1, 2, 3, 4],
|
|
26
|
+
* "index": [0, 1, 2, 3],
|
|
27
|
+
* "dtype": "int64",
|
|
28
|
+
* "length": 4
|
|
29
|
+
* }
|
|
30
|
+
*
|
|
31
|
+
* ─── JavaScript representation ───────────────────────────────────────────────
|
|
32
|
+
*
|
|
33
|
+
* DataFrameResult (records orientation is built client-side):
|
|
34
|
+
* {
|
|
35
|
+
* columns: string[],
|
|
36
|
+
* data: Record<string, unknown>[], ↠row objects
|
|
37
|
+
* index: unknown[],
|
|
38
|
+
* dtypes: Record<string, string>,
|
|
39
|
+
* shape: [number, number]
|
|
40
|
+
* }
|
|
41
|
+
*
|
|
42
|
+
* SeriesResult:
|
|
43
|
+
* {
|
|
44
|
+
* name: string,
|
|
45
|
+
* data: unknown[],
|
|
46
|
+
* index: unknown[],
|
|
47
|
+
* dtype: string,
|
|
48
|
+
* length: number
|
|
49
|
+
* }
|
|
50
|
+
*/
|
|
51
|
+
|
|
52
|
+
import * as msgpack from '@msgpack/msgpack';
|
|
53
|
+
import type {
|
|
54
|
+
SerializedValue,
|
|
55
|
+
SerializedFormat,
|
|
56
|
+
DataFrameResult,
|
|
57
|
+
SeriesResult,
|
|
58
|
+
} from '../types/python';
|
|
59
|
+
import { Logger } from '../utils/Logger';
|
|
60
|
+
|
|
61
|
+
const logger = new Logger('DataFrameBridge');
|
|
62
|
+
|
|
63
|
+
// ─── Wire schemas ─────────────────────────────────────────────────────────────
|
|
64
|
+
|
|
65
|
+
interface DataFrameWire {
|
|
66
|
+
columns: string[];
|
|
67
|
+
data: unknown[][];
|
|
68
|
+
index: unknown[];
|
|
69
|
+
dtypes?: Record<string, string>;
|
|
70
|
+
shape?: [number, number];
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
interface SeriesWire {
|
|
74
|
+
name: string;
|
|
75
|
+
data: unknown[];
|
|
76
|
+
index: unknown[];
|
|
77
|
+
dtype: string;
|
|
78
|
+
length: number;
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
// ─── Serialization for JS → Python ───────────────────────────────────────────
|
|
82
|
+
|
|
83
|
+
interface DataFrameSerializeOptions {
|
|
84
|
+
/**
|
|
85
|
+
* Row limit when serializing JS → Python.
|
|
86
|
+
* Excess rows are truncated. Default: unlimited.
|
|
87
|
+
*/
|
|
88
|
+
maxRows?: number;
|
|
89
|
+
/**
|
|
90
|
+
* Use MessagePack instead of JSON for the wire format.
|
|
91
|
+
* Default: true when data size > 64 KB.
|
|
92
|
+
*/
|
|
93
|
+
forceMsgPack?: boolean;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
/**
|
|
97
|
+
* DataFrameBridge — bidirectional conversion for Pandas DataFrames and Series.
|
|
98
|
+
*
|
|
99
|
+
* @example
|
|
100
|
+
* ```typescript
|
|
101
|
+
* const bridge = new DataFrameBridge();
|
|
102
|
+
*
|
|
103
|
+
* // Deserialize a DataFrame received from Python
|
|
104
|
+
* const df: DataFrameResult = bridge.deserializeDataFrame(sv);
|
|
105
|
+
* console.log(df.columns); // ['city', 'population']
|
|
106
|
+
* console.log(df.data[0]); // { city: 'Algiers', population: 3500000 }
|
|
107
|
+
*
|
|
108
|
+
* // Serialize a records-array back to Python
|
|
109
|
+
* const sv2 = bridge.serializeDataFrame(df.data, df.columns);
|
|
110
|
+
* ```
|
|
111
|
+
*/
|
|
112
|
+
export class DataFrameBridge {
|
|
113
|
+
|
|
114
|
+
// ─── Deserialization (Python → JS) ────────────────────────────────────────
|
|
115
|
+
|
|
116
|
+
/**
|
|
117
|
+
* Deserialize a PANDAS_DATAFRAME SerializedValue → DataFrameResult.
|
|
118
|
+
*/
|
|
119
|
+
deserializeDataFrame(sv: SerializedValue): DataFrameResult {
|
|
120
|
+
try {
|
|
121
|
+
const wire = this._decodeWire<DataFrameWire>(sv);
|
|
122
|
+
return this._buildDataFrameResult(wire);
|
|
123
|
+
} catch (err) {
|
|
124
|
+
logger.error('DataFrameBridge.deserializeDataFrame failed', err);
|
|
125
|
+
return { columns: [], data: [], records: [], index: [], dtypes: {}, shape: [0, 0] };
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
/**
|
|
130
|
+
* Deserialize a PANDAS_SERIES SerializedValue → SeriesResult.
|
|
131
|
+
*/
|
|
132
|
+
deserializeSeries(sv: SerializedValue): SeriesResult {
|
|
133
|
+
try {
|
|
134
|
+
const wire = this._decodeWire<SeriesWire>(sv);
|
|
135
|
+
return this._buildSeriesResult(wire);
|
|
136
|
+
} catch (err) {
|
|
137
|
+
logger.error('DataFrameBridge.deserializeSeries failed', err);
|
|
138
|
+
return { name: '', data: [], values: [], index: [], dtype: 'object', length: 0 };
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
// ─── Serialization (JS → Python) ─────────────────────────────────────────
|
|
143
|
+
|
|
144
|
+
/**
|
|
145
|
+
* Serialize a records-array (array of row objects) into a PANDAS_DATAFRAME
|
|
146
|
+
* SerializedValue that Python can reconstruct with pd.DataFrame(wire).
|
|
147
|
+
*
|
|
148
|
+
* @param records - array of row objects (e.g. [{col1: val1, col2: val2}, ...])
|
|
149
|
+
* @param columns - explicit column order; inferred from first record if omitted
|
|
150
|
+
*/
|
|
151
|
+
serializeDataFrame(
|
|
152
|
+
records: Record<string, unknown>[],
|
|
153
|
+
columns?: string[],
|
|
154
|
+
options: DataFrameSerializeOptions = {},
|
|
155
|
+
): SerializedValue {
|
|
156
|
+
const cols = columns ?? (records[0] ? Object.keys(records[0]) : []);
|
|
157
|
+
const maxRows = options.maxRows;
|
|
158
|
+
const rows = maxRows !== undefined ? records.slice(0, maxRows) : records;
|
|
159
|
+
|
|
160
|
+
// Build split-orientation wire object
|
|
161
|
+
const wire: DataFrameWire = {
|
|
162
|
+
columns: cols,
|
|
163
|
+
data: rows.map(row => cols.map(col => row[col] ?? null)),
|
|
164
|
+
index: rows.map((_, i) => i),
|
|
165
|
+
dtypes: {},
|
|
166
|
+
shape: [rows.length, cols.length],
|
|
167
|
+
};
|
|
168
|
+
|
|
169
|
+
return this._encodeWire(wire, 'dataframe', options.forceMsgPack);
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
/**
|
|
173
|
+
* Serialize a plain array of values into a PANDAS_SERIES SerializedValue.
|
|
174
|
+
*
|
|
175
|
+
* @param data - the series values
|
|
176
|
+
* @param name - series name
|
|
177
|
+
* @param index - optional explicit index
|
|
178
|
+
*/
|
|
179
|
+
serializeSeries(
|
|
180
|
+
data: unknown[],
|
|
181
|
+
name: string = '',
|
|
182
|
+
index?: unknown[],
|
|
183
|
+
): SerializedValue {
|
|
184
|
+
const wire: SeriesWire = {
|
|
185
|
+
name,
|
|
186
|
+
data,
|
|
187
|
+
index: index ?? data.map((_, i) => i),
|
|
188
|
+
dtype: this._inferDtype(data),
|
|
189
|
+
length: data.length,
|
|
190
|
+
};
|
|
191
|
+
|
|
192
|
+
return this._encodeWire(wire, 'series');
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
// ─── DataFrame transformation helpers ────────────────────────────────────
|
|
196
|
+
|
|
197
|
+
/**
|
|
198
|
+
* Convert a DataFrameResult to a 2-D array (column-major).
|
|
199
|
+
* Useful for charting libraries.
|
|
200
|
+
*/
|
|
201
|
+
static toColumnArrays(
|
|
202
|
+
df: DataFrameResult,
|
|
203
|
+
): Record<string, unknown[]> {
|
|
204
|
+
const result: Record<string, unknown[]> = {};
|
|
205
|
+
for (const col of df.columns) {
|
|
206
|
+
result[col] = df.data.map(row => row[col]);
|
|
207
|
+
}
|
|
208
|
+
return result;
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
/**
|
|
212
|
+
* Convert a DataFrameResult to a 2-D raw array (no column keys).
|
|
213
|
+
* Row-major: result[row][colIndex].
|
|
214
|
+
*/
|
|
215
|
+
static toMatrix(df: DataFrameResult): unknown[][] {
|
|
216
|
+
return df.data.map(row => df.columns.map(col => row[col]));
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
/**
|
|
220
|
+
* Filter rows of a DataFrameResult.
|
|
221
|
+
*/
|
|
222
|
+
static filterRows(
|
|
223
|
+
df: DataFrameResult,
|
|
224
|
+
predicate: (row: Record<string, unknown>, index: unknown) => boolean,
|
|
225
|
+
): DataFrameResult {
|
|
226
|
+
const filteredData: Record<string, unknown>[] = [];
|
|
227
|
+
const filteredIndex: unknown[] = [];
|
|
228
|
+
|
|
229
|
+
for (let i = 0; i < df.data.length; i++) {
|
|
230
|
+
const row = df.data[i]!;
|
|
231
|
+
const idx = df.index[i];
|
|
232
|
+
if (predicate(row, idx)) {
|
|
233
|
+
filteredData.push(row);
|
|
234
|
+
filteredIndex.push(idx);
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
return {
|
|
239
|
+
...df,
|
|
240
|
+
data: filteredData,
|
|
241
|
+
records: filteredData,
|
|
242
|
+
index: filteredIndex,
|
|
243
|
+
shape: [filteredData.length, df.shape[1]],
|
|
244
|
+
};
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
/**
|
|
248
|
+
* Select specific columns from a DataFrameResult.
|
|
249
|
+
*/
|
|
250
|
+
static selectColumns(df: DataFrameResult, cols: string[]): DataFrameResult {
|
|
251
|
+
const validCols = cols.filter(c => df.columns.includes(c));
|
|
252
|
+
const data = df.data.map(row => Object.fromEntries(validCols.map(c => [c, row[c]])));
|
|
253
|
+
return {
|
|
254
|
+
columns: validCols,
|
|
255
|
+
data,
|
|
256
|
+
records: data,
|
|
257
|
+
index: df.index,
|
|
258
|
+
dtypes: Object.fromEntries(validCols.map(c => [c, df.dtypes[c] ?? 'object'])),
|
|
259
|
+
shape: [df.shape[0], validCols.length],
|
|
260
|
+
};
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
/**
|
|
264
|
+
* Compute basic descriptive stats for a numeric column in a DataFrameResult.
|
|
265
|
+
*/
|
|
266
|
+
static describeColumn(df: DataFrameResult, col: string): {
|
|
267
|
+
count: number;
|
|
268
|
+
mean: number;
|
|
269
|
+
std: number;
|
|
270
|
+
min: number;
|
|
271
|
+
max: number;
|
|
272
|
+
sum: number;
|
|
273
|
+
} {
|
|
274
|
+
const values = df.data
|
|
275
|
+
.map(row => Number(row[col]))
|
|
276
|
+
.filter(v => Number.isFinite(v));
|
|
277
|
+
|
|
278
|
+
const count = values.length;
|
|
279
|
+
if (count === 0) {return { count: 0, mean: 0, std: 0, min: 0, max: 0, sum: 0 };}
|
|
280
|
+
|
|
281
|
+
const sum = values.reduce((a, b) => a + b, 0);
|
|
282
|
+
const mean = sum / count;
|
|
283
|
+
const min = Math.min(...values);
|
|
284
|
+
const max = Math.max(...values);
|
|
285
|
+
const variance = values.reduce((acc, v) => acc + (v - mean) ** 2, 0) / count;
|
|
286
|
+
const std = Math.sqrt(variance);
|
|
287
|
+
|
|
288
|
+
return { count, mean, std, min, max, sum };
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
// ─── Private helpers ──────────────────────────────────────────────────────
|
|
292
|
+
|
|
293
|
+
private _decodeWire<T>(sv: SerializedValue): T {
|
|
294
|
+
const data = sv.data;
|
|
295
|
+
if (!data) {throw new Error('SerializedValue.data is null');}
|
|
296
|
+
|
|
297
|
+
if (typeof data === 'string') {
|
|
298
|
+
return JSON.parse(data) as T;
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
if (data instanceof Uint8Array) {
|
|
302
|
+
// Attempt MessagePack first
|
|
303
|
+
try {
|
|
304
|
+
const decoded = msgpack.decode(data);
|
|
305
|
+
if (decoded && typeof decoded === 'object') {return decoded as T;}
|
|
306
|
+
} catch {
|
|
307
|
+
// Fall through to UTF-8 JSON
|
|
308
|
+
}
|
|
309
|
+
// Try JSON
|
|
310
|
+
const text = new TextDecoder().decode(data);
|
|
311
|
+
return JSON.parse(text) as T;
|
|
312
|
+
}
|
|
313
|
+
|
|
314
|
+
throw new Error(`DataFrameBridge: unsupported data type: ${typeof data}`);
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
private _encodeWire(
|
|
318
|
+
wire: DataFrameWire | SeriesWire,
|
|
319
|
+
format: 'dataframe' | 'pandas_dataframe' | 'series' | 'pandas_series',
|
|
320
|
+
forceMsgPack?: boolean,
|
|
321
|
+
): SerializedValue {
|
|
322
|
+
const json = JSON.stringify(wire);
|
|
323
|
+
const useMsgPack = forceMsgPack ?? json.length > 65_536;
|
|
324
|
+
|
|
325
|
+
if (useMsgPack) {
|
|
326
|
+
const packed = msgpack.encode(wire);
|
|
327
|
+
return {
|
|
328
|
+
format: format as SerializedFormat,
|
|
329
|
+
data: packed,
|
|
330
|
+
metadata: {
|
|
331
|
+
length: ('data' in wire) ? wire.data.length : 0,
|
|
332
|
+
},
|
|
333
|
+
};
|
|
334
|
+
}
|
|
335
|
+
|
|
336
|
+
return {
|
|
337
|
+
format: format as SerializedFormat,
|
|
338
|
+
data: json,
|
|
339
|
+
metadata: {
|
|
340
|
+
length: ('data' in wire) ? wire.data.length : 0,
|
|
341
|
+
},
|
|
342
|
+
};
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
private _buildDataFrameResult(wire: DataFrameWire): DataFrameResult {
|
|
346
|
+
const columns = wire.columns ?? [];
|
|
347
|
+
const rawData = wire.data ?? [];
|
|
348
|
+
const index = wire.index ?? rawData.map((_, i) => i);
|
|
349
|
+
const dtypes = wire.dtypes ?? {};
|
|
350
|
+
const nrows = rawData.length;
|
|
351
|
+
const ncols = columns.length;
|
|
352
|
+
|
|
353
|
+
// Convert 2-D array → records
|
|
354
|
+
const records: Record<string, unknown>[] = rawData.map(row => {
|
|
355
|
+
const obj: Record<string, unknown> = {};
|
|
356
|
+
for (let c = 0; c < columns.length; c++) {
|
|
357
|
+
obj[columns[c]!] = row[c] ?? null;
|
|
358
|
+
}
|
|
359
|
+
return obj;
|
|
360
|
+
});
|
|
361
|
+
|
|
362
|
+
return {
|
|
363
|
+
columns,
|
|
364
|
+
data: records,
|
|
365
|
+
records,
|
|
366
|
+
index,
|
|
367
|
+
dtypes,
|
|
368
|
+
shape: wire.shape ?? [nrows, ncols],
|
|
369
|
+
};
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
private _buildSeriesResult(wire: SeriesWire): SeriesResult {
|
|
373
|
+
// Support both 'data' and 'values' as the series values key
|
|
374
|
+
const seriesData: unknown[] = wire.data ?? (wire as unknown as Record<string, unknown[]>)['values'] as unknown[] ?? [];
|
|
375
|
+
return {
|
|
376
|
+
name: wire.name ?? '',
|
|
377
|
+
data: seriesData,
|
|
378
|
+
values: seriesData,
|
|
379
|
+
index: wire.index ?? [],
|
|
380
|
+
dtype: wire.dtype ?? 'object',
|
|
381
|
+
length: wire.length ?? seriesData.length ?? 0,
|
|
382
|
+
};
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
private _inferDtype(data: unknown[]): string {
|
|
386
|
+
if (data.length === 0) {return 'object';}
|
|
387
|
+
const sample = data.find(v => v !== null && v !== undefined);
|
|
388
|
+
if (sample === undefined) {return 'object';}
|
|
389
|
+
if (typeof sample === 'number') {
|
|
390
|
+
return Number.isInteger(sample) ? 'int64' : 'float64';
|
|
391
|
+
}
|
|
392
|
+
if (typeof sample === 'boolean') {return 'bool';}
|
|
393
|
+
if (typeof sample === 'string') {return 'object';}
|
|
394
|
+
if (sample instanceof Date) {return 'datetime64[ns]';}
|
|
395
|
+
return 'object';
|
|
396
|
+
}
|
|
397
|
+
}
|
|
398
|
+
|
|
@@ -0,0 +1,220 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* nodepyx — MsgPackSerializer
|
|
3
|
+
* MessagePack encoder/decoder for high-performance binary serialization.
|
|
4
|
+
* Used for large arrays, complex objects, and bulk data transfer.
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import * as msgpack from '@msgpack/msgpack';
|
|
8
|
+
import { Logger } from '../utils/Logger';
|
|
9
|
+
|
|
10
|
+
const logger = new Logger('MsgPackSerializer');
|
|
11
|
+
|
|
12
|
+
/**
|
|
13
|
+
* Extension type definitions for custom Python types.
|
|
14
|
+
* Extension type 1 = Python datetime
|
|
15
|
+
* Extension type 2 = Python complex number
|
|
16
|
+
* Extension type 3 = Python set
|
|
17
|
+
* Extension type 4 = Python tuple
|
|
18
|
+
* Extension type 5 = Python bytes
|
|
19
|
+
*/
|
|
20
|
+
enum MsgPackExtType {
|
|
21
|
+
DATETIME = 1,
|
|
22
|
+
COMPLEX = 2,
|
|
23
|
+
SET = 3,
|
|
24
|
+
TUPLE = 4,
|
|
25
|
+
BYTES = 5,
|
|
26
|
+
NUMPY_DTYPE = 10,
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
/**
|
|
30
|
+
* MsgPackSerializer — encodes/decodes JavaScript ↔ MessagePack binary.
|
|
31
|
+
*
|
|
32
|
+
* Handles Python-specific types via extension codecs:
|
|
33
|
+
* - Python datetime → JS Date
|
|
34
|
+
* - Python set → JS Set
|
|
35
|
+
* - Python tuple → JS Array (immutable semantics lost, acceptable)
|
|
36
|
+
* - Python bytes → JS Uint8Array
|
|
37
|
+
* - Python complex → JS {real, imag}
|
|
38
|
+
*
|
|
39
|
+
* @example
|
|
40
|
+
* ```typescript
|
|
41
|
+
* const serializer = new MsgPackSerializer();
|
|
42
|
+
* const encoded = serializer.encode({ key: 'value', nums: [1, 2, 3] });
|
|
43
|
+
* const decoded = serializer.decode(encoded);
|
|
44
|
+
* ```
|
|
45
|
+
*/
|
|
46
|
+
export class MsgPackSerializer {
|
|
47
|
+
private readonly _encoderOptions: msgpack.EncoderOptions;
|
|
48
|
+
private readonly _decoderOptions: msgpack.DecoderOptions;
|
|
49
|
+
|
|
50
|
+
constructor() {
|
|
51
|
+
this._encoderOptions = {
|
|
52
|
+
extensionCodec: this._createExtensionCodec(),
|
|
53
|
+
forceIntegerToFloat: false,
|
|
54
|
+
forceFloat32: false,
|
|
55
|
+
sortKeys: false,
|
|
56
|
+
maxDepth: 64,
|
|
57
|
+
};
|
|
58
|
+
|
|
59
|
+
this._decoderOptions = {
|
|
60
|
+
extensionCodec: this._createExtensionCodec(),
|
|
61
|
+
// rawBinaryType: 'Uint8Array',
|
|
62
|
+
|
|
63
|
+
useBigInt64: false,
|
|
64
|
+
};
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
/**
|
|
68
|
+
* Encode a JavaScript value to MessagePack binary.
|
|
69
|
+
*/
|
|
70
|
+
encode(value: unknown): Uint8Array {
|
|
71
|
+
try {
|
|
72
|
+
return msgpack.encode(value, this._encoderOptions);
|
|
73
|
+
} catch (err) {
|
|
74
|
+
logger.error('MsgPack encoding failed', err);
|
|
75
|
+
// Fallback: try encoding as JSON string
|
|
76
|
+
try {
|
|
77
|
+
const json = JSON.stringify(value);
|
|
78
|
+
return msgpack.encode(json, this._encoderOptions);
|
|
79
|
+
} catch {
|
|
80
|
+
// Last resort: empty bytes
|
|
81
|
+
return new Uint8Array(0);
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
/**
|
|
87
|
+
* Decode a MessagePack binary to a JavaScript value.
|
|
88
|
+
*/
|
|
89
|
+
decode(data: Uint8Array): unknown {
|
|
90
|
+
if (!data || data.length === 0) {return null;}
|
|
91
|
+
try {
|
|
92
|
+
return msgpack.decode(data, this._decoderOptions);
|
|
93
|
+
} catch (err) {
|
|
94
|
+
logger.error('MsgPack decoding failed', err);
|
|
95
|
+
return null;
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
/**
|
|
100
|
+
* Encode a stream of values (for streaming large datasets).
|
|
101
|
+
*/
|
|
102
|
+
encodeStream(values: Iterable<unknown>): Uint8Array {
|
|
103
|
+
const chunks: Uint8Array[] = [];
|
|
104
|
+
let totalLength = 0;
|
|
105
|
+
|
|
106
|
+
for (const value of values) {
|
|
107
|
+
const encoded = this.encode(value);
|
|
108
|
+
chunks.push(encoded);
|
|
109
|
+
totalLength += encoded.length;
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
// Concatenate all chunks
|
|
113
|
+
const result = new Uint8Array(totalLength);
|
|
114
|
+
let offset = 0;
|
|
115
|
+
for (const chunk of chunks) {
|
|
116
|
+
result.set(chunk, offset);
|
|
117
|
+
offset += chunk.length;
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
return result;
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
/**
|
|
124
|
+
* Decode a stream of values from MessagePack binary.
|
|
125
|
+
*/
|
|
126
|
+
*decodeStream(data: Uint8Array): Iterable<unknown> {
|
|
127
|
+
if (!data || data.length === 0) {return;}
|
|
128
|
+
|
|
129
|
+
try {
|
|
130
|
+
for (const item of msgpack.decodeMulti(data, this._decoderOptions)) {
|
|
131
|
+
yield item;
|
|
132
|
+
}
|
|
133
|
+
} catch (err) {
|
|
134
|
+
logger.error('MsgPack stream decoding failed', err);
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
// ─── Extension Codec ────────────────────────────────────────────────────────
|
|
139
|
+
|
|
140
|
+
private _createExtensionCodec(): msgpack.ExtensionCodec {
|
|
141
|
+
const codec = new msgpack.ExtensionCodec();
|
|
142
|
+
|
|
143
|
+
// ── Python datetime → JS Date ──────────────────────────────────────────
|
|
144
|
+
codec.register({
|
|
145
|
+
type: MsgPackExtType.DATETIME,
|
|
146
|
+
encode: (value: unknown): Uint8Array | null => {
|
|
147
|
+
if (value instanceof Date) {
|
|
148
|
+
const ms = value.getTime();
|
|
149
|
+
const buf = new ArrayBuffer(8);
|
|
150
|
+
new DataView(buf).setFloat64(0, ms, false);
|
|
151
|
+
return new Uint8Array(buf);
|
|
152
|
+
}
|
|
153
|
+
return null;
|
|
154
|
+
},
|
|
155
|
+
decode: (data: Uint8Array): Date => {
|
|
156
|
+
const ms = new DataView(data.buffer, data.byteOffset, data.byteLength).getFloat64(0, false);
|
|
157
|
+
return new Date(ms);
|
|
158
|
+
},
|
|
159
|
+
});
|
|
160
|
+
|
|
161
|
+
// ── Python complex → {real, imag} ─────────────────────────────────────
|
|
162
|
+
codec.register({
|
|
163
|
+
type: MsgPackExtType.COMPLEX,
|
|
164
|
+
encode: (value: unknown): Uint8Array | null => {
|
|
165
|
+
if (
|
|
166
|
+
value !== null &&
|
|
167
|
+
typeof value === 'object' &&
|
|
168
|
+
'real' in (value as Record<string, unknown>) &&
|
|
169
|
+
'imag' in (value as Record<string, unknown>)
|
|
170
|
+
) {
|
|
171
|
+
const buf = new ArrayBuffer(16);
|
|
172
|
+
const dv = new DataView(buf);
|
|
173
|
+
dv.setFloat64(0, (value as { real: number }).real, false);
|
|
174
|
+
dv.setFloat64(8, (value as { imag: number }).imag, false);
|
|
175
|
+
return new Uint8Array(buf);
|
|
176
|
+
}
|
|
177
|
+
return null;
|
|
178
|
+
},
|
|
179
|
+
decode: (data: Uint8Array): { real: number; imag: number } => {
|
|
180
|
+
const dv = new DataView(data.buffer, data.byteOffset, data.byteLength);
|
|
181
|
+
return {
|
|
182
|
+
real: dv.getFloat64(0, false),
|
|
183
|
+
imag: dv.getFloat64(8, false),
|
|
184
|
+
};
|
|
185
|
+
},
|
|
186
|
+
});
|
|
187
|
+
|
|
188
|
+
// ── Python set → JS Set ───────────────────────────────────────────────
|
|
189
|
+
codec.register({
|
|
190
|
+
type: MsgPackExtType.SET,
|
|
191
|
+
encode: (value: unknown): Uint8Array | null => {
|
|
192
|
+
if (value instanceof Set) {
|
|
193
|
+
const arr = Array.from(value);
|
|
194
|
+
return this.encode(arr);
|
|
195
|
+
}
|
|
196
|
+
return null;
|
|
197
|
+
},
|
|
198
|
+
decode: (data: Uint8Array): Set<unknown> => {
|
|
199
|
+
const arr = this.decode(data) as unknown[];
|
|
200
|
+
return new Set(arr);
|
|
201
|
+
},
|
|
202
|
+
});
|
|
203
|
+
|
|
204
|
+
// ── Python tuple → JS Array ───────────────────────────────────────────
|
|
205
|
+
codec.register({
|
|
206
|
+
type: MsgPackExtType.TUPLE,
|
|
207
|
+
encode: (_value: unknown): Uint8Array | null => {
|
|
208
|
+
// Tuples are encoded as arrays by Python side
|
|
209
|
+
return null;
|
|
210
|
+
},
|
|
211
|
+
decode: (data: Uint8Array): unknown[] => {
|
|
212
|
+
const decoded = this.decode(data);
|
|
213
|
+
return Array.isArray(decoded) ? decoded : [decoded];
|
|
214
|
+
},
|
|
215
|
+
});
|
|
216
|
+
|
|
217
|
+
return codec;
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
|