@isidorus/cpu 0.0.0-alpha.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. package/README.md +47 -0
  2. package/binding.gyp +103 -0
  3. package/dist/ts/_native.d.ts +13 -0
  4. package/dist/ts/_native.d.ts.map +1 -0
  5. package/dist/ts/_native.js +22 -0
  6. package/dist/ts/_native.js.map +1 -0
  7. package/dist/ts/graph.d.ts +91 -0
  8. package/dist/ts/graph.d.ts.map +1 -0
  9. package/dist/ts/graph.js +95 -0
  10. package/dist/ts/graph.js.map +1 -0
  11. package/dist/ts/index.d.ts +47 -0
  12. package/dist/ts/index.d.ts.map +1 -0
  13. package/dist/ts/index.js +58 -0
  14. package/dist/ts/index.js.map +1 -0
  15. package/dist/ts/inference-pool.d.ts +84 -0
  16. package/dist/ts/inference-pool.d.ts.map +1 -0
  17. package/dist/ts/inference-pool.js +625 -0
  18. package/dist/ts/inference-pool.js.map +1 -0
  19. package/dist/ts/inference_pool.d.ts +99 -0
  20. package/dist/ts/inference_pool.d.ts.map +1 -0
  21. package/dist/ts/inference_pool.js +370 -0
  22. package/dist/ts/inference_pool.js.map +1 -0
  23. package/dist/ts/install-libtensorflow.d.ts +34 -0
  24. package/dist/ts/install-libtensorflow.d.ts.map +1 -0
  25. package/dist/ts/install-libtensorflow.js +254 -0
  26. package/dist/ts/install-libtensorflow.js.map +1 -0
  27. package/dist/ts/ops/array_ops.d.ts +29 -0
  28. package/dist/ts/ops/array_ops.d.ts.map +1 -0
  29. package/dist/ts/ops/array_ops.js +54 -0
  30. package/dist/ts/ops/array_ops.js.map +1 -0
  31. package/dist/ts/ops/index.d.ts +5 -0
  32. package/dist/ts/ops/index.d.ts.map +1 -0
  33. package/dist/ts/ops/index.js +5 -0
  34. package/dist/ts/ops/index.js.map +1 -0
  35. package/dist/ts/ops/math_ops.d.ts +96 -0
  36. package/dist/ts/ops/math_ops.d.ts.map +1 -0
  37. package/dist/ts/ops/math_ops.js +277 -0
  38. package/dist/ts/ops/math_ops.js.map +1 -0
  39. package/dist/ts/ops/nn_ops.d.ts +130 -0
  40. package/dist/ts/ops/nn_ops.d.ts.map +1 -0
  41. package/dist/ts/ops/nn_ops.js +340 -0
  42. package/dist/ts/ops/nn_ops.js.map +1 -0
  43. package/dist/ts/ops/variable_ops.d.ts +128 -0
  44. package/dist/ts/ops/variable_ops.d.ts.map +1 -0
  45. package/dist/ts/ops/variable_ops.js +267 -0
  46. package/dist/ts/ops/variable_ops.js.map +1 -0
  47. package/dist/ts/session.d.ts +83 -0
  48. package/dist/ts/session.d.ts.map +1 -0
  49. package/dist/ts/session.js +81 -0
  50. package/dist/ts/session.js.map +1 -0
  51. package/package.json +63 -0
  52. package/scripts/install.js +100 -0
  53. package/scripts/test-install.js +82 -0
  54. package/scripts/test.js +45 -0
  55. package/src/native/addon.cc +12 -0
  56. package/src/native/graph.cc +442 -0
  57. package/src/native/graph.h +52 -0
  58. package/src/native/platform_tf.h +8 -0
  59. package/src/native/session.cc +716 -0
  60. package/src/native/session.h +92 -0
@@ -0,0 +1,716 @@
1
+ #include "session.h"
2
+ #include "graph.h"
3
+ #include <uv.h>
4
+ #include <cstring>
5
+ #include <thread>
6
+ #include <cstdio>
7
+
8
+ // ---------------------------------------------------------------------------
9
+ // ConfigProto — minimal binary encoding for TF thread configuration.
10
+ //
11
+ // Proto3 wire format:
12
+ // field 2 (intra_op_parallelism_threads) : varint tag=0x10
13
+ // field 5 (inter_op_parallelism_threads) : varint tag=0x28
14
+ // ---------------------------------------------------------------------------
15
+
16
+ static constexpr int MAX_VARINT1 = 127;
17
+
18
+ static void make_config_proto(
19
+ uint8_t *buf,
20
+ size_t &len,
21
+ int intra_op,
22
+ int inter_op)
23
+ {
24
+ if (intra_op > MAX_VARINT1)
25
+ intra_op = MAX_VARINT1;
26
+ if (inter_op > MAX_VARINT1)
27
+ inter_op = MAX_VARINT1;
28
+ if (intra_op < 1)
29
+ intra_op = 1;
30
+ if (inter_op < 1)
31
+ inter_op = 1;
32
+ len = 0;
33
+ buf[len++] = 0x10;
34
+ buf[len++] = static_cast<uint8_t>(intra_op);
35
+ buf[len++] = 0x28;
36
+ buf[len++] = static_cast<uint8_t>(inter_op);
37
+ }
38
+
39
+ // ---------------------------------------------------------------------------
40
+ // CPU affinity helpers
41
+ //
42
+ // Design:
43
+ // affinity_mask_all() — bitmask of all online cores
44
+ // affinity_mask_range(f, n) — bits [f, f+n) set
45
+ // affinity_get() — current thread's affinity
46
+ // affinity_set(mask) — set current thread's affinity
47
+ //
48
+ // These are called from OnRunWork (libuv thread pool thread) immediately
49
+ // before and after TF_SessionRun. TF's eigen threadpool inherits the
50
+ // calling thread's affinity when it spawns new threads, so restricting
51
+ // the libuv worker's affinity before TF_SessionRun pins TF's threads
52
+ // to the designated cores.
53
+ //
54
+ // After TF_SessionRun returns, the libuv worker's affinity is restored
55
+ // to full_affinity_mask so it can service unrelated work (I/O callbacks,
56
+ // other native addons) on any core.
57
+ // ---------------------------------------------------------------------------
58
+
59
+ #ifdef _WIN32
60
+
61
+ AffinityMask affinity_mask_all()
62
+ {
63
+ DWORD_PTR proc_mask = 0, sys_mask = 0;
64
+ if (GetProcessAffinityMask(GetCurrentProcess(), &proc_mask, &sys_mask))
65
+ return static_cast<AffinityMask>(proc_mask);
66
+ // Fallback: all 64 bits set.
67
+ return static_cast<AffinityMask>(~0ULL);
68
+ }
69
+
70
+ AffinityMask affinity_mask_range(int first_core, int num_cores)
71
+ {
72
+ AffinityMask mask = 0;
73
+ for (int i = first_core; i < first_core + num_cores; ++i)
74
+ mask |= (AffinityMask(1) << i);
75
+ return mask;
76
+ }
77
+
78
+ AffinityMask affinity_get()
79
+ {
80
+ // Windows has no GetThreadAffinityMask. We set to full and get back
81
+ // the previous value as the return of SetThreadAffinityMask.
82
+ AffinityMask full = affinity_mask_all();
83
+ DWORD_PTR prev = SetThreadAffinityMask(GetCurrentThread(),
84
+ static_cast<DWORD_PTR>(full));
85
+ if (prev)
86
+ {
87
+ // Restore immediately — we only wanted to read.
88
+ SetThreadAffinityMask(GetCurrentThread(), prev);
89
+ return static_cast<AffinityMask>(prev);
90
+ }
91
+ return full;
92
+ }
93
+
94
+ bool affinity_set(AffinityMask mask)
95
+ {
96
+ return SetThreadAffinityMask(
97
+ GetCurrentThread(),
98
+ static_cast<DWORD_PTR>(mask)) != 0;
99
+ }
100
+
101
+ #else // POSIX (Linux, macOS partial)
102
+
103
+ AffinityMask affinity_mask_all()
104
+ {
105
+ cpu_set_t cs;
106
+ CPU_ZERO(&cs);
107
+ if (sched_getaffinity(0, sizeof(cs), &cs) != 0)
108
+ {
109
+ // Fallback: mark all 64 bits.
110
+ return static_cast<AffinityMask>(~0ULL);
111
+ }
112
+ AffinityMask mask = 0;
113
+ int count = static_cast<int>(sizeof(AffinityMask) * 8);
114
+ for (int i = 0; i < count; ++i)
115
+ if (CPU_ISSET(i, &cs))
116
+ mask |= (AffinityMask(1) << i);
117
+ return mask;
118
+ }
119
+
120
+ AffinityMask affinity_mask_range(int first_core, int num_cores)
121
+ {
122
+ AffinityMask mask = 0;
123
+ for (int i = first_core; i < first_core + num_cores; ++i)
124
+ mask |= (AffinityMask(1) << i);
125
+ return mask;
126
+ }
127
+
128
+ AffinityMask affinity_get()
129
+ {
130
+ cpu_set_t cs;
131
+ CPU_ZERO(&cs);
132
+ sched_getaffinity(0, sizeof(cs), &cs);
133
+ AffinityMask mask = 0;
134
+ int count = static_cast<int>(sizeof(AffinityMask) * 8);
135
+ for (int i = 0; i < count; ++i)
136
+ if (CPU_ISSET(i, &cs))
137
+ mask |= (AffinityMask(1) << i);
138
+ return mask;
139
+ }
140
+
141
+ bool affinity_set(AffinityMask mask)
142
+ {
143
+ cpu_set_t cs;
144
+ CPU_ZERO(&cs);
145
+ int count = static_cast<int>(sizeof(AffinityMask) * 8);
146
+ for (int i = 0; i < count; ++i)
147
+ if (mask & (AffinityMask(1) << i))
148
+ CPU_SET(i, &cs);
149
+ return pthread_setaffinity_np(pthread_self(), sizeof(cs), &cs) == 0;
150
+ }
151
+
152
+ #endif // _WIN32
153
+
154
+ // ---------------------------------------------------------------------------
155
+ // SessionWrap
156
+ // ---------------------------------------------------------------------------
157
+
158
+ Napi::Object SessionWrap::Init(Napi::Env env, Napi::Object exports)
159
+ {
160
+ Napi::Function func = DefineClass(env, "Session", {
161
+ InstanceMethod<&SessionWrap::Run>("run"),
162
+ InstanceMethod<&SessionWrap::RunAsync>("runAsync"),
163
+ InstanceMethod<&SessionWrap::Destroy>("destroy"),
164
+ InstanceAccessor<&SessionWrap::IntraOpThreads>("intraOpThreads"),
165
+ InstanceAccessor<&SessionWrap::InterOpThreads>("interOpThreads"),
166
+ InstanceAccessor<&SessionWrap::TfAffinityMask>("tfAffinityMask"),
167
+ InstanceAccessor<&SessionWrap::FullAffinityMask>("fullAffinityMask"),
168
+ });
169
+ auto *ctor = new Napi::FunctionReference(Napi::Persistent(func));
170
+ env.SetInstanceData<Napi::FunctionReference>(ctor);
171
+ exports.Set("Session", func);
172
+ return exports;
173
+ }
174
+
175
+ // ---------------------------------------------------------------------------
176
+ // Constructor
177
+ //
178
+ // Options:
179
+ // strategy?: "worker-pool" | "tf-parallel"
180
+ // intraOpThreads?: number (overrides strategy default)
181
+ // interOpThreads?: number (overrides strategy default)
182
+ // reserveCores?: number (reserve first N cores for event loop / other libs)
183
+ //
184
+ // CPU affinity model:
185
+ //
186
+ // reserveCores = R means:
187
+ // Cores 0..(R-1) → reserved for event loop, libuv I/O, opencv, etc.
188
+ // Cores R..(N-1) → given to TF_SessionRun via thread affinity
189
+ //
190
+ // Before each TF_SessionRun:
191
+ // libuv worker's affinity → tf_affinity_mask_ (cores R..N-1)
192
+ // TF's eigen threads inherit this affinity when spawned
193
+ //
194
+ // After each TF_SessionRun:
195
+ // libuv worker's affinity → full_affinity_mask_ (all cores)
196
+ // Worker returns to normal scheduling
197
+ //
198
+ // reserveCores = 0 (default): no affinity restriction, TF may use any core.
199
+ // ---------------------------------------------------------------------------
200
+ SessionWrap::SessionWrap(const Napi::CallbackInfo &info)
201
+ : Napi::ObjectWrap<SessionWrap>(info)
202
+ {
203
+ Napi::Env env = info.Env();
204
+
205
+ if (info.Length() < 1 || !info[0].IsObject())
206
+ {
207
+ Napi::TypeError::New(env, "Session(graph: Graph, options?)")
208
+ .ThrowAsJavaScriptException();
209
+ return;
210
+ }
211
+
212
+ GraphWrap *gw = Napi::ObjectWrap<GraphWrap>::Unwrap(
213
+ info[0].As<Napi::Object>());
214
+ if (!gw || !gw->GetGraph())
215
+ {
216
+ Napi::Error::New(env, "Invalid or destroyed Graph")
217
+ .ThrowAsJavaScriptException();
218
+ return;
219
+ }
220
+
221
+ graph_ = gw->GetGraph();
222
+ graph_ref_ = Napi::ObjectReference::New(info[0].As<Napi::Object>(), 1);
223
+
224
+ // ── Thread count defaults ───────────────────────────────────────────────
225
+ int intra_op = 1;
226
+ int inter_op = 1;
227
+ int reserve_cores = 0;
228
+
229
+ if (info.Length() >= 2 && info[1].IsObject())
230
+ {
231
+ auto opts = info[1].As<Napi::Object>();
232
+
233
+ // Strategy sets thread count defaults.
234
+ if (opts.Has("strategy"))
235
+ {
236
+ std::string strat = opts.Get("strategy")
237
+ .As<Napi::String>()
238
+ .Utf8Value();
239
+ if (strat == "tf-parallel")
240
+ {
241
+ unsigned hw = std::thread::hardware_concurrency();
242
+ intra_op = hw > 0 ? static_cast<int>(hw) : 4;
243
+ inter_op = 1;
244
+ }
245
+ }
246
+
247
+ // Explicit values always override strategy defaults.
248
+ if (opts.Has("intraOpThreads"))
249
+ intra_op = opts.Get("intraOpThreads").As<Napi::Number>().Int32Value();
250
+ if (opts.Has("interOpThreads"))
251
+ inter_op = opts.Get("interOpThreads").As<Napi::Number>().Int32Value();
252
+ if (opts.Has("reserveCores"))
253
+ reserve_cores = opts.Get("reserveCores")
254
+ .As<Napi::Number>()
255
+ .Int32Value();
256
+ }
257
+
258
+ intra_op_threads_ = intra_op;
259
+ inter_op_threads_ = inter_op;
260
+
261
+ // ── Affinity masks ──────────────────────────────────────────────────────
262
+ full_affinity_mask_ = affinity_mask_all();
263
+
264
+ if (reserve_cores > 0)
265
+ {
266
+ int total_cores = static_cast<int>(
267
+ sizeof(AffinityMask) * 8);
268
+
269
+ // Count actual online cores from the full mask.
270
+ int online = 0;
271
+ for (int i = 0; i < total_cores; ++i)
272
+ if (full_affinity_mask_ & (AffinityMask(1) << i))
273
+ ++online;
274
+
275
+ int tf_cores = online - reserve_cores;
276
+ if (tf_cores < 1)
277
+ tf_cores = 1;
278
+
279
+ // TF gets the LAST tf_cores online cores (highest indices).
280
+ // Reserved cores are the FIRST reserve_cores online cores.
281
+ // This keeps core 0 (event loop affinity default) reserved.
282
+ AffinityMask tf_mask = 0;
283
+ int assigned = 0;
284
+ for (int i = total_cores - 1; i >= 0 && assigned < tf_cores; --i)
285
+ {
286
+ if (full_affinity_mask_ & (AffinityMask(1) << i))
287
+ {
288
+ tf_mask |= (AffinityMask(1) << i);
289
+ ++assigned;
290
+ }
291
+ }
292
+ tf_affinity_mask_ = tf_mask;
293
+
294
+ fprintf(stderr,
295
+ "[isidorus] affinity: reserving %d core(s), "
296
+ "TF mask=0x%llx full mask=0x%llx\n",
297
+ reserve_cores,
298
+ static_cast<unsigned long long>(tf_affinity_mask_),
299
+ static_cast<unsigned long long>(full_affinity_mask_));
300
+ }
301
+ // tf_affinity_mask_ = 0 means "no restriction" — checked in OnRunWork.
302
+
303
+ // ── ConfigProto and session creation ────────────────────────────────────
304
+ uint8_t config_buf[4];
305
+ size_t config_len = 0;
306
+ make_config_proto(config_buf, config_len, intra_op, inter_op);
307
+
308
+ TF_SessionOptions *opts = TF_NewSessionOptions();
309
+ StatusGuard config_status;
310
+ TF_SetConfig(opts, config_buf, config_len, config_status.s);
311
+ if (!config_status.ok())
312
+ {
313
+ TF_DeleteSessionOptions(opts);
314
+ Napi::Error::New(env,
315
+ "TF_SetConfig failed: " + config_status.message())
316
+ .ThrowAsJavaScriptException();
317
+ return;
318
+ }
319
+
320
+ StatusGuard status;
321
+ session_ = TF_NewSession(graph_, opts, status.s);
322
+ TF_DeleteSessionOptions(opts);
323
+
324
+ if (!status.ok() || !session_)
325
+ {
326
+ Napi::Error::New(env,
327
+ "TF_NewSession failed: " + status.message())
328
+ .ThrowAsJavaScriptException();
329
+ return;
330
+ }
331
+ }
332
+
333
+ SessionWrap::~SessionWrap() { cleanup(); }
334
+
335
+ void SessionWrap::cleanup()
336
+ {
337
+ if (session_)
338
+ {
339
+ StatusGuard s;
340
+ TF_CloseSession(session_, s.s);
341
+ TF_DeleteSession(session_, s.s);
342
+ session_ = nullptr;
343
+ }
344
+ graph_ = nullptr;
345
+
346
+ if (!graph_ref_.IsEmpty())
347
+ {
348
+ graph_ref_.Reset();
349
+ }
350
+ }
351
+
352
+ Napi::Value SessionWrap::IntraOpThreads(const Napi::CallbackInfo &info)
353
+ {
354
+ return Napi::Number::New(info.Env(), intra_op_threads_);
355
+ }
356
+ Napi::Value SessionWrap::InterOpThreads(const Napi::CallbackInfo &info)
357
+ {
358
+ return Napi::Number::New(info.Env(), inter_op_threads_);
359
+ }
360
+ Napi::Value SessionWrap::TfAffinityMask(const Napi::CallbackInfo &info)
361
+ {
362
+ return Napi::Number::New(info.Env(),
363
+ static_cast<double>(tf_affinity_mask_));
364
+ }
365
+ Napi::Value SessionWrap::FullAffinityMask(const Napi::CallbackInfo &info)
366
+ {
367
+ return Napi::Number::New(info.Env(),
368
+ static_cast<double>(full_affinity_mask_));
369
+ }
370
+
371
+ // ---------------------------------------------------------------------------
372
+ // Feed / fetch / output helpers
373
+ // ---------------------------------------------------------------------------
374
+
375
+ static bool parse_feeds(
376
+ TF_Graph *graph,
377
+ Napi::Array feeds_arr,
378
+ std::vector<TF_Output> &inputs,
379
+ std::vector<TF_Tensor *> &input_tensors,
380
+ std::string &error)
381
+ {
382
+ for (uint32_t i = 0; i < feeds_arr.Length(); ++i)
383
+ {
384
+ auto feed = feeds_arr.Get(i).As<Napi::Object>();
385
+ std::string n = feed.Get("opName").As<Napi::String>().Utf8Value();
386
+ int idx = feed.Get("index").As<Napi::Number>().Int32Value();
387
+ TF_Operation *op = TF_GraphOperationByName(graph, n.c_str());
388
+ if (!op)
389
+ {
390
+ error = "Feed op not found: " + n;
391
+ return false;
392
+ }
393
+ inputs.push_back({op, idx});
394
+
395
+ auto t = feed.Get("tensor").As<Napi::Object>();
396
+ auto dtype = static_cast<TF_DataType>(
397
+ t.Get("dtype").As<Napi::Number>().Int32Value());
398
+ auto data = t.Get("data").As<Napi::Buffer<uint8_t>>();
399
+ auto darr = t.Get("shape").As<Napi::Array>();
400
+ std::vector<int64_t> dims(darr.Length());
401
+ for (uint32_t j = 0; j < darr.Length(); ++j)
402
+ dims[j] = darr.Get(j).As<Napi::Number>().Int64Value();
403
+
404
+ TF_Tensor *tensor = TF_AllocateTensor(
405
+ dtype, dims.data(), static_cast<int>(dims.size()),
406
+ data.ByteLength());
407
+ if (!tensor)
408
+ {
409
+ error = "TF_AllocateTensor failed";
410
+ return false;
411
+ }
412
+ std::memcpy(TF_TensorData(tensor), data.Data(), data.ByteLength());
413
+ input_tensors.push_back(tensor);
414
+ }
415
+ return true;
416
+ }
417
+
418
+ static bool parse_fetches(
419
+ TF_Graph *graph,
420
+ Napi::Array fetches_arr,
421
+ std::vector<TF_Output> &outputs,
422
+ std::string &error)
423
+ {
424
+ for (uint32_t i = 0; i < fetches_arr.Length(); ++i)
425
+ {
426
+ auto fetch = fetches_arr.Get(i).As<Napi::Object>();
427
+ std::string n = fetch.Get("opName").As<Napi::String>().Utf8Value();
428
+ int idx = fetch.Get("index").As<Napi::Number>().Int32Value();
429
+ TF_Operation *op = TF_GraphOperationByName(graph, n.c_str());
430
+ if (!op)
431
+ {
432
+ error = "Fetch op not found: " + n;
433
+ return false;
434
+ }
435
+ outputs.push_back({op, idx});
436
+ }
437
+ return true;
438
+ }
439
+
440
+ static Napi::Array pack_outputs(
441
+ Napi::Env env,
442
+ std::vector<TF_Tensor *> &output_tensors)
443
+ {
444
+ Napi::Array result = Napi::Array::New(env, output_tensors.size());
445
+ for (size_t i = 0; i < output_tensors.size(); ++i)
446
+ {
447
+ TF_Tensor *t = output_tensors[i];
448
+ if (!t)
449
+ {
450
+ result.Set(i, env.Null());
451
+ continue;
452
+ }
453
+
454
+ Napi::Object obj = Napi::Object::New(env);
455
+ TF_DataType dtype = TF_TensorType(t);
456
+ int ndims = TF_NumDims(t);
457
+ size_t nb = TF_TensorByteSize(t);
458
+
459
+ obj.Set("dtype", Napi::Number::New(env, static_cast<double>(dtype)));
460
+ Napi::Array shape = Napi::Array::New(env, ndims);
461
+ for (int j = 0; j < ndims; ++j)
462
+ shape.Set(j, Napi::Number::New(env,
463
+ static_cast<double>(TF_Dim(t, j))));
464
+ obj.Set("shape", shape);
465
+
466
+ auto buf = Napi::Buffer<uint8_t>::Copy(
467
+ env,
468
+ reinterpret_cast<const uint8_t *>(TF_TensorData(t)),
469
+ nb);
470
+ obj.Set("data", buf);
471
+ TF_DeleteTensor(t);
472
+ result.Set(i, obj);
473
+ }
474
+ return result;
475
+ }
476
+
477
+ // ---------------------------------------------------------------------------
478
+ // run — synchronous (no affinity — runs on event loop thread)
479
+ // ---------------------------------------------------------------------------
480
+ Napi::Value SessionWrap::Run(const Napi::CallbackInfo &info)
481
+ {
482
+ Napi::Env env = info.Env();
483
+ auto deferred = Napi::Promise::Deferred::New(env);
484
+
485
+ if (!session_)
486
+ {
487
+ deferred.Reject(Napi::Error::New(env, "Session destroyed").Value());
488
+ return deferred.Promise();
489
+ }
490
+
491
+ std::vector<TF_Output> tf_inputs, tf_outputs;
492
+ std::vector<TF_Tensor *> tf_input_tensors;
493
+ std::string error;
494
+
495
+ if (!parse_feeds(graph_, info[0].As<Napi::Array>(),
496
+ tf_inputs, tf_input_tensors, error) ||
497
+ !parse_fetches(graph_, info[1].As<Napi::Array>(),
498
+ tf_outputs, error))
499
+ {
500
+ for (auto *t : tf_input_tensors)
501
+ TF_DeleteTensor(t);
502
+ deferred.Reject(Napi::Error::New(env, error).Value());
503
+ return deferred.Promise();
504
+ }
505
+
506
+ std::vector<TF_Operation *> target_ops;
507
+ if (info.Length() >= 3 && info[2].IsArray())
508
+ {
509
+ auto targets = info[2].As<Napi::Array>();
510
+ for (uint32_t i = 0; i < targets.Length(); ++i)
511
+ {
512
+ std::string n = targets.Get(i).As<Napi::String>().Utf8Value();
513
+ TF_Operation *op = TF_GraphOperationByName(graph_, n.c_str());
514
+ if (op)
515
+ target_ops.push_back(op);
516
+ }
517
+ }
518
+
519
+ std::vector<TF_Tensor *> output_tensors(tf_outputs.size(), nullptr);
520
+ StatusGuard status;
521
+ TF_SessionRun(
522
+ session_, nullptr,
523
+ tf_inputs.data(), tf_input_tensors.data(),
524
+ static_cast<int>(tf_inputs.size()),
525
+ tf_outputs.data(), output_tensors.data(),
526
+ static_cast<int>(tf_outputs.size()),
527
+ target_ops.data(), static_cast<int>(target_ops.size()),
528
+ nullptr, status.s);
529
+
530
+ for (auto *t : tf_input_tensors)
531
+ TF_DeleteTensor(t);
532
+
533
+ if (!status.ok())
534
+ {
535
+ for (auto *t : output_tensors)
536
+ if (t)
537
+ TF_DeleteTensor(t);
538
+ deferred.Reject(Napi::Error::New(env,
539
+ "TF_SessionRun failed: " + status.message())
540
+ .Value());
541
+ return deferred.Promise();
542
+ }
543
+
544
+ deferred.Resolve(pack_outputs(env, output_tensors));
545
+ return deferred.Promise();
546
+ }
547
+
548
+ // ---------------------------------------------------------------------------
549
+ // runAsync — TF_SessionRun on libuv thread pool with affinity fencing
550
+ //
551
+ // Affinity fencing in OnRunWork:
552
+ //
553
+ // 1. Save the libuv worker's current affinity (full_affinity_mask).
554
+ // 2. Set the worker's affinity to tf_affinity_mask (TF cores only).
555
+ // 3. Call TF_SessionRun.
556
+ // TF's eigen threadpool spawns threads that inherit this affinity.
557
+ // All TF compute stays on the designated cores.
558
+ // 4. Restore the worker's affinity to full_affinity_mask.
559
+ // The libuv worker returns to unrestricted scheduling.
560
+ //
561
+ // If tf_affinity_mask == 0, affinity fencing is skipped entirely.
562
+ // ---------------------------------------------------------------------------
563
+ struct RunCtx
564
+ {
565
+ uv_work_t req;
566
+ TF_Session *session;
567
+ TF_Graph *graph;
568
+ AffinityMask tf_affinity_mask;
569
+ AffinityMask full_affinity_mask;
570
+ std::vector<TF_Output> tf_inputs;
571
+ std::vector<TF_Tensor *> tf_input_tensors;
572
+ std::vector<TF_Output> tf_outputs;
573
+ std::vector<TF_Operation *> target_ops;
574
+ std::vector<TF_Tensor *> output_tensors;
575
+ bool ok = true;
576
+ std::string error_message;
577
+ Napi::Promise::Deferred deferred;
578
+ Napi::ObjectReference self_ref;
579
+ explicit RunCtx(Napi::Env env)
580
+ : req{}, session(nullptr), graph(nullptr),
581
+ tf_affinity_mask(0), full_affinity_mask(0),
582
+ deferred(Napi::Promise::Deferred::New(env)) {}
583
+ };
584
+
585
+ static void OnRunWork(uv_work_t *req)
586
+ {
587
+ auto *ctx = reinterpret_cast<RunCtx *>(req);
588
+ ctx->output_tensors.assign(ctx->tf_outputs.size(), nullptr);
589
+
590
+ // ── Affinity fence in ────────────────────────────────────────────────────
591
+ // Pin this libuv worker to TF cores so TF's eigen threads inherit
592
+ // the restricted affinity when they are spawned by TF_SessionRun.
593
+ bool affinity_applied = false;
594
+ if (ctx->tf_affinity_mask != 0)
595
+ {
596
+ affinity_applied = affinity_set(ctx->tf_affinity_mask);
597
+ }
598
+
599
+ StatusGuard status;
600
+ TF_SessionRun(
601
+ ctx->session, nullptr,
602
+ ctx->tf_inputs.data(), ctx->tf_input_tensors.data(),
603
+ static_cast<int>(ctx->tf_inputs.size()),
604
+ ctx->tf_outputs.data(), ctx->output_tensors.data(),
605
+ static_cast<int>(ctx->tf_outputs.size()),
606
+ ctx->target_ops.data(), static_cast<int>(ctx->target_ops.size()),
607
+ nullptr, status.s);
608
+
609
+ // ── Affinity fence out ───────────────────────────────────────────────────
610
+ // Restore unrestricted scheduling so this libuv worker can service
611
+ // other work (I/O callbacks, opencv, etc.) on any core.
612
+ if (affinity_applied)
613
+ {
614
+ affinity_set(ctx->full_affinity_mask);
615
+ }
616
+
617
+ if (!status.ok())
618
+ {
619
+ ctx->ok = false;
620
+ ctx->error_message = status.message();
621
+ }
622
+ }
623
+
624
+ static void OnRunAfter(uv_work_t *req, int)
625
+ {
626
+ auto *ctx = reinterpret_cast<RunCtx *>(req);
627
+ Napi::Env env = ctx->deferred.Env();
628
+ Napi::HandleScope scope(env);
629
+ for (auto *t : ctx->tf_input_tensors)
630
+ TF_DeleteTensor(t);
631
+ if (!ctx->ok)
632
+ {
633
+ for (auto *t : ctx->output_tensors)
634
+ if (t)
635
+ TF_DeleteTensor(t);
636
+ ctx->deferred.Reject(Napi::Error::New(env,
637
+ "TF_SessionRun failed: " + ctx->error_message)
638
+ .Value());
639
+ }
640
+ else
641
+ {
642
+ ctx->deferred.Resolve(pack_outputs(env, ctx->output_tensors));
643
+ }
644
+ ctx->self_ref.Unref();
645
+ delete ctx;
646
+ }
647
+
648
+ Napi::Value SessionWrap::RunAsync(const Napi::CallbackInfo &info)
649
+ {
650
+ Napi::Env env = info.Env();
651
+ if (!session_)
652
+ {
653
+ auto d = Napi::Promise::Deferred::New(env);
654
+ d.Reject(Napi::Error::New(env, "Session destroyed").Value());
655
+ return d.Promise();
656
+ }
657
+
658
+ auto *ctx = new RunCtx(env);
659
+ ctx->session = session_;
660
+ ctx->graph = graph_;
661
+
662
+ // Pass affinity masks so OnRunWork can fence without touching the JS heap.
663
+ ctx->tf_affinity_mask = tf_affinity_mask_;
664
+ ctx->full_affinity_mask = full_affinity_mask_;
665
+
666
+ ctx->self_ref = Napi::ObjectReference::New(
667
+ info.This().As<Napi::Object>(), 1);
668
+
669
+ std::string error;
670
+ if (!parse_feeds(graph_, info[0].As<Napi::Array>(),
671
+ ctx->tf_inputs, ctx->tf_input_tensors, error) ||
672
+ !parse_fetches(graph_, info[1].As<Napi::Array>(),
673
+ ctx->tf_outputs, error))
674
+ {
675
+ auto promise = ctx->deferred.Promise();
676
+ ctx->deferred.Reject(Napi::Error::New(env, error).Value());
677
+ ctx->self_ref.Unref();
678
+ delete ctx;
679
+ return promise;
680
+ }
681
+
682
+ if (info.Length() >= 3 && info[2].IsArray())
683
+ {
684
+ auto targets = info[2].As<Napi::Array>();
685
+ for (uint32_t i = 0; i < targets.Length(); ++i)
686
+ {
687
+ std::string n = targets.Get(i).As<Napi::String>().Utf8Value();
688
+ TF_Operation *op = TF_GraphOperationByName(graph_, n.c_str());
689
+ if (op)
690
+ ctx->target_ops.push_back(op);
691
+ }
692
+ }
693
+
694
+ uv_loop_t *loop = nullptr;
695
+ if (napi_get_uv_event_loop(env, &loop) != napi_ok || !loop ||
696
+ uv_queue_work(loop, &ctx->req, OnRunWork, OnRunAfter) != 0)
697
+ {
698
+ for (auto *t : ctx->tf_input_tensors)
699
+ TF_DeleteTensor(t);
700
+ auto promise = ctx->deferred.Promise();
701
+ ctx->deferred.Reject(Napi::Error::New(env,
702
+ "Failed to queue runAsync work")
703
+ .Value());
704
+ ctx->self_ref.Unref();
705
+ delete ctx;
706
+ return promise;
707
+ }
708
+
709
+ return ctx->deferred.Promise();
710
+ }
711
+
712
+ Napi::Value SessionWrap::Destroy(const Napi::CallbackInfo &info)
713
+ {
714
+ cleanup();
715
+ return info.Env().Undefined();
716
+ }